From e28c86f002d66e88f12042659b667f5cc9d67b93 Mon Sep 17 00:00:00 2001 From: Andrew Oakley Date: Mon, 12 Apr 2021 09:52:50 +0100 Subject: [PATCH 001/198] git-p4: avoid decoding more data from perforce Perforce does not validate or store the encoding of user submitted data by default (although this can be enabled). In large repositories it is therefore very likely that some data will not be valid UTF-8. Historically (with python2) git-p4 did not attempt to decode the data from the perforce server - it just passed bytes from perforce to git, preserving whatever was stored in perforce. This seems like a sensible approach - it avoids any loss of data, and there is no way to determine the intended encoding for any invalid data from perforce. This change updates git-p4 to avoid decoding changelist descriptions, user and time information. The time data is almost certainly valid unicode, but as they are processed with the user information it is more convenient for them to be handled as bytes. Signed-off-by: Andrew Oakley Signed-off-by: Junio C Hamano --- git-p4.py | 57 +++++++++++++++--------------- t/t9835-git-p4-message-encoding.sh | 48 +++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 28 deletions(-) create mode 100755 t/t9835-git-p4-message-encoding.sh diff --git a/git-p4.py b/git-p4.py index 09c9e93ac401cc..8407ec5c7ae216 100755 --- a/git-p4.py +++ b/git-p4.py @@ -764,13 +764,15 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, while True: entry = marshal.load(p4.stdout) if bytes is not str: - # Decode unmarshalled dict to use str keys and values, except for: - # - `data` which may contain arbitrary binary data - # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text + # Decode unmarshalled dict to use str keys and values, except + # for cases where the values may not be valid UTF-8. + binary_keys = ('data', 'path', 'clientFile', 'Description', + 'desc', 'Email', 'FullName', 'Owner', 'time', + 'user', 'User') decoded_entry = {} for key, value in entry.items(): key = key.decode() - if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): + if isinstance(value, bytes) and not (key in binary_keys or key.startswith('depotFile')): value = value.decode() decoded_entry[key] = value # Parse out data if it's an error response @@ -949,11 +951,11 @@ def gitConfigInt(key): _gitConfig[key] = None return _gitConfig[key] -def gitConfigList(key): +def gitConfigList(key, raw=False): if key not in _gitConfig: - s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) + s = read_pipe(["git", "config", "--get-all", key], ignore_error=True, raw=raw) _gitConfig[key] = s.strip().splitlines() - if _gitConfig[key] == ['']: + if _gitConfig[key] == [''] or _gitConfig[key] == [b'']: _gitConfig[key] = [] return _gitConfig[key] @@ -1499,35 +1501,35 @@ def getUserMapFromPerforceServer(self): for output in p4CmdList("users"): if "User" not in output: continue - self.users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">" + self.users[output["User"]] = output["FullName"] + b" <" + output["Email"] + b">" self.emails[output["Email"]] = output["User"] - mapUserConfigRegex = re.compile(r"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$", re.VERBOSE) - for mapUserConfig in gitConfigList("git-p4.mapUser"): + mapUserConfigRegex = re.compile(br"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$", re.VERBOSE) + for mapUserConfig in gitConfigList("git-p4.mapUser", raw=True): mapUser = mapUserConfigRegex.findall(mapUserConfig) if mapUser and len(mapUser[0]) == 3: user = mapUser[0][0] fullname = mapUser[0][1] email = mapUser[0][2] - self.users[user] = fullname + " <" + email + ">" + self.users[user] = fullname + b" <" + email + b">" self.emails[email] = user - s = '' + s = b'' for (key, val) in self.users.items(): - s += "%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1)) + s += b"%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1)) - open(self.getUserCacheFilename(), 'w').write(s) + open(self.getUserCacheFilename(), 'wb').write(s) self.userMapFromPerforceServer = True def loadUserMapFromCache(self): self.users = {} self.userMapFromPerforceServer = False try: - cache = open(self.getUserCacheFilename(), 'r') + cache = open(self.getUserCacheFilename(), 'rb') lines = cache.readlines() cache.close() for line in lines: - entry = line.strip().split("\t") + entry = line.strip().split(b"\t") self.users[entry[0]] = entry[1] except IOError: self.getUserMapFromPerforceServer() @@ -1780,7 +1782,7 @@ def p4UserForCommit(self,id): # Return the tuple (perforce user,git email) for a given git commit id self.getUserMapFromPerforceServer() gitEmail = read_pipe(["git", "log", "--max-count=1", - "--format=%ae", id]) + "--format=%ae", id], raw=True) gitEmail = gitEmail.strip() if gitEmail not in self.emails: return (None,gitEmail) @@ -1911,7 +1913,7 @@ def prepareSubmitTemplate(self, changelist=None): template += key + ':' if key == 'Description': template += '\n' - for field_line in change_entry[key].splitlines(): + for field_line in decode_text_stream(change_entry[key]).splitlines(): template += '\t'+field_line+'\n' if len(files_list) > 0: template += '\n' @@ -2163,7 +2165,7 @@ def applyCommit(self, id): submitTemplate += "\n######## Actual user %s, modified after commit\n" % p4User if self.checkAuthorship and not self.p4UserIsMe(p4User): - submitTemplate += "######## git author %s does not match your p4 account.\n" % gitEmail + submitTemplate += "######## git author %s does not match your p4 account.\n" % decode_text_stream(gitEmail) submitTemplate += "######## Use option --preserve-user to modify authorship.\n" submitTemplate += "######## Variable git-p4.skipUserNameCheck hides this message.\n" @@ -2802,7 +2804,7 @@ def __init__(self): self.knownBranches = {} self.initialParents = {} - self.tz = "%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 3600) / 60)) + self.tz = b"%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 3600) / 60)) self.labels = {} # Force a checkpoint in fast-import and wait for it to finish @@ -3161,7 +3163,7 @@ def make_email(self, userid): if userid in self.users: return self.users[userid] else: - return "%s " % userid + return b"%s " % userid def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): """ Stream a p4 tag. @@ -3184,9 +3186,9 @@ def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): email = self.make_email(owner) else: email = self.make_email(self.p4UserId()) - tagger = "%s %s %s" % (email, epoch, self.tz) + tagger = b"%s %s %s" % (email, epoch, self.tz) - gitStream.write("tagger %s\n" % tagger) + gitStream.write(b"tagger %s\n" % tagger) print("labelDetails=",labelDetails) if 'Description' in labelDetails: @@ -3279,12 +3281,11 @@ def commit(self, details, files, branch, parent = "", allow_empty=False): self.gitStream.write("commit %s\n" % branch) self.gitStream.write("mark :%s\n" % details["change"]) self.committedChanges.add(int(details["change"])) - committer = "" if author not in self.users: self.getUserMapFromPerforceServer() - committer = "%s %s %s" % (self.make_email(author), epoch, self.tz) + committer = b"%s %s %s" % (self.make_email(author), epoch, self.tz) - self.gitStream.write("committer %s\n" % committer) + self.gitStream.write(b"committer %s\n" % committer) self.gitStream.write("data <dummy-file1 && + P4USER="${UTF8}" p4 add dummy-file1 && + P4USER="${UTF8}" p4 submit -d "message ${UTF8}" && + + >dummy-file2 && + P4USER="${ISO8859}" p4 add dummy-file2 && + P4USER="${ISO8859}" p4 submit -d "message ${ISO8859}" + ) +' + +test_expect_success 'check UTF-8 commit' ' + ( + git p4 clone --destination="$git/1" //depot@1,1 && + git -C "$git/1" cat-file commit HEAD | grep -q "^message ${UTF8}$" && + git -C "$git/1" cat-file commit HEAD | grep -q "^author Dr. ${UTF8} <${UTF8}@example.com>" + ) +' + +test_expect_success 'check ISO-8859 commit' ' + ( + git p4 clone --destination="$git/2" //depot@2,2 && + git -C "$git/2" cat-file commit HEAD > /tmp/dump.txt && + git -C "$git/2" cat-file commit HEAD | grep -q "^message ${ISO8859}$" && + git -C "$git/2" cat-file commit HEAD | grep -q "^author Dr. ${ISO8859} <${ISO8859}@example.com>" + ) +' + +test_done From ac9cdb5fd175869aef35ca126a41976818599f4f Mon Sep 17 00:00:00 2001 From: Andrew Oakley Date: Mon, 12 Apr 2021 09:52:51 +0100 Subject: [PATCH 002/198] git-p4: do not decode data from perforce by default This commit is not intended to change behaviour, any we still attempt to decode values that might not be valid unicode. It's not clear that all of these values are safe to decode, but it's now more obvious which data is decoded. Signed-off-by: Junio C Hamano --- git-p4.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/git-p4.py b/git-p4.py index 8407ec5c7ae216..8a97ff3dd2d633 100755 --- a/git-p4.py +++ b/git-p4.py @@ -764,15 +764,19 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, while True: entry = marshal.load(p4.stdout) if bytes is not str: - # Decode unmarshalled dict to use str keys and values, except - # for cases where the values may not be valid UTF-8. - binary_keys = ('data', 'path', 'clientFile', 'Description', - 'desc', 'Email', 'FullName', 'Owner', 'time', - 'user', 'User') + # Decode unmarshalled dict to use str keys and values where it + # is expected that the data is always valid UTF-8. + text_keys = ('action', 'change', 'Change', 'Client', 'code', + 'fileSize', 'headAction', 'headRev', 'headType', + 'Jobs', 'label', 'options', 'perm', 'rev', 'Root', + 'Status', 'type', 'Update') + text_key_prefixes = ('action', 'File', 'job', 'rev', 'type', + 'View') decoded_entry = {} for key, value in entry.items(): key = key.decode() - if isinstance(value, bytes) and not (key in binary_keys or key.startswith('depotFile')): + if isinstance(value, bytes) and (key in text_keys or + any(filter(key.startswith, text_key_prefixes))): value = value.decode() decoded_entry[key] = value # Parse out data if it's an error response From 51a0478d2c65df251d69dda959f15260a5fbe50b Mon Sep 17 00:00:00 2001 From: Tzadik Vanderhoof Date: Thu, 29 Apr 2021 00:39:05 -0700 Subject: [PATCH 003/198] git-p4: git-p4.fallbackEncoding to specify non UTF-8 charset Add git-p4.fallbackEncoding config variable, to prevent git-p4 from crashing on non UTF-8 changeset descriptions. When git-p4 reads the output from a p4 command, it assumes it will be 100% UTF-8. If even one character in the output of one p4 command is not UTF-8, git-p4 crashes with: File "C:/Program Files/Git/bin/git-p4.py", line 774, in p4CmdList value = value.decode() UnicodeDecodeError: 'utf-8' codec can't decode byte Ox93 in position 42: invalid start byte This is especially a problem for the "git p4 clone ... @all" command, where git-p4 needs to read thousands of changeset descriptions, one of which may have a stray smart quote, causing the whole clone operation to fail. Add a new config setting, allowing git-p4 to try a fallback encoding (for example, "cp1252") and/or use the Unicode replacement character, to prevent the whole program from crashing on such a minor problem. Signed-off-by: Tzadik Vanderhoof Signed-off-by: Junio C Hamano --- Documentation/git-p4.txt | 9 ++ git-p4.py | 11 ++- t/t9836-git-p4-config-fallback-encoding.sh | 98 ++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100755 t/t9836-git-p4-config-fallback-encoding.sh diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index f89e68b424c3b2..86d3ffa644a49f 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -638,6 +638,15 @@ git-p4.pathEncoding:: to transcode the paths to UTF-8. As an example, Perforce on Windows often uses "cp1252" to encode path names. +git-p4.fallbackEncoding:: + Perforce changeset descriptions can be stored in any encoding. + Git-p4 first tries to interpret each description as UTF-8. If that + fails, this config allows another encoding to be tried. You can specify, + for example, "cp1252". If git-p4.fallbackEncoding is "replace", UTF-8 will + be used, with invalid UTF-8 characters replaced by the Unicode replacement + character. The default is "none": there is no fallback, and any non UTF-8 + character will cause git-p4 to immediately fail. + git-p4.largeFileSystem:: Specify the system that is used for large (binary) files. Please note that large file systems do not support the 'git p4 submit' command. diff --git a/git-p4.py b/git-p4.py index 09c9e93ac401cc..202fb01bdf0491 100755 --- a/git-p4.py +++ b/git-p4.py @@ -771,7 +771,16 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, for key, value in entry.items(): key = key.decode() if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): - value = value.decode() + try: + value = value.decode() + except UnicodeDecodeError: + fallbackEncoding = gitConfig("git-p4.fallbackEncoding").lower() or 'none' + if fallbackEncoding == 'none': + raise Exception("UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding") + elif fallbackEncoding == 'replace': + value = value.decode(errors='replace') + else: + value = value.decode(encoding=fallbackEncoding) decoded_entry[key] = value # Parse out data if it's an error response if decoded_entry.get('code') == 'error' and 'data' in decoded_entry: diff --git a/t/t9836-git-p4-config-fallback-encoding.sh b/t/t9836-git-p4-config-fallback-encoding.sh new file mode 100755 index 00000000000000..901bb3759df508 --- /dev/null +++ b/t/t9836-git-p4-config-fallback-encoding.sh @@ -0,0 +1,98 @@ +#!/bin/sh + +test_description='test git-p4.fallbackEncoding config' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./lib-git-p4.sh + +test_expect_success 'start p4d' ' + start_p4d +' + +test_expect_success 'add Unicode description' ' + cd "$cli" && + echo file1 >file1 && + p4 add file1 && + p4 submit -d documentación +' + +# Unicode descriptions cause "git p4 clone" to crash with a UnicodeDecodeError in some +# environments. This test determines if that is the case in our environment. If so, +# we create a file called "clone_fails". In subsequent tests, we check whether that +# file exists to determine what behavior to expect. + +clone_fails="$TRASH_DIRECTORY/clone_fails" + +# If clone fails with git-p4.fallbackEncoding set to "none", create the "clone_fails" file, +# and make sure the error message is correct + +test_expect_success 'clone with git-p4.fallbackEncoding set to "none"' ' + git config --global git-p4.fallbackEncoding none && + test_when_finished cleanup_git && { + git p4 clone --dest="$git" //depot@all 2>error || ( + >"$clone_fails" && + grep "UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding" error + ) + } +' + +# If clone fails with git-p4.fallbackEncoding set to "none", it should also fail when it's unset, +# also with the correct error message. Otherwise the clone should succeed. + +test_expect_success 'clone with git-p4.fallbackEncoding unset' ' + git config --global --unset git-p4.fallbackEncoding && + test_when_finished cleanup_git && { + ( + test -f "$clone_fails" && + test_must_fail git p4 clone --dest="$git" //depot@all 2>error && + grep "UTF-8 decoding failed. Consider using git config git-p4.fallbackEncoding" error + ) || + ( + ! test -f "$clone_fails" && + git p4 clone --dest="$git" //depot@all 2>error + ) + } +' + +# Whether or not "clone_fails" exists, setting git-p4.fallbackEncoding +# to "cp1252" should cause clone to succeed and get the right description + +test_expect_success 'clone with git-p4.fallbackEncoding set to "cp1252"' ' + git config --global git-p4.fallbackEncoding cp1252 && + test_when_finished cleanup_git && + ( + git p4 clone --dest="$git" //depot@all && + cd "$git" && + git log --oneline >log && + desc=$(head -1 log | cut -d" " -f2) && + test "$desc" = "documentación" + ) +' + +# Setting git-p4.fallbackEncoding to "replace" should always cause clone to succeed. +# If "clone_fails" exists, the description should contain the Unicode replacement +# character, otherwise the description should be correct (since we're on a system that +# doesn't have the Unicode issue) + +test_expect_success 'clone with git-p4.fallbackEncoding set to "replace"' ' + git config --global git-p4.fallbackEncoding replace && + test_when_finished cleanup_git && + ( + git p4 clone --dest="$git" //depot@all && + cd "$git" && + git log --oneline >log && + desc=$(head -1 log | cut -d" " -f2) && + { + (test -f "$clone_fails" && + test "$desc" = "documentaci�n" + ) || + (! test -f "$clone_fails" && + test "$desc" = "documentación" + ) + } + ) +' + +test_done From 7b1a3fb529fa7a0395b9e1fa6d89ec1b5aa888c2 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 6 Jun 2021 13:47:23 -0500 Subject: [PATCH 004/198] completion: bash: fix prefix detection in branch.* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we are completely ignoring the --cur argument. The issue can be tested with: git clone --config=branch. Reviewed-by: SZEDER Gábor Signed-off-by: Felipe Contreras Tested-by: David Aguilar Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index b50c5d0ea3867b..47b48fbab6614e 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2649,8 +2649,8 @@ __git_complete_config_variable_name () return ;; branch.*) - local pfx="${cur%.*}." - cur_="${cur#*.}" + local pfx="${cur_%.*}." + cur_="${cur_#*.}" __gitcomp_direct "$(__git_heads "$pfx" "$cur_" ".")" __gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "$sfx" return From 772a6e72a8b04e6faa2b86414d643ca29bd062d2 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 6 Jun 2021 13:47:24 -0500 Subject: [PATCH 005/198] completion: bash: fix for suboptions with value We need to ignore options that don't start with -- as well. Depending on the value of COMP_WORDBREAKS the last word could be duplicated otherwise. Can be tested with: git merge -X diff-algorithm= Signed-off-by: Felipe Contreras Tested-by: David Aguilar Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- t/t9902-completion.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 47b48fbab6614e..05606609f97bd6 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -356,7 +356,7 @@ __gitcomp () local cur_="${3-$cur}" case "$cur_" in - --*=) + *=) ;; --no-*) local c i=0 IFS=$' \t\n' diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index cb057ef16134f7..6b56e54fc37c67 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -540,6 +540,15 @@ test_expect_success '__gitcomp - expand/narrow all negative options' ' EOF ' +test_expect_success '__gitcomp - equal skip' ' + test_gitcomp "--option=" "--option=" <<-\EOF && + + EOF + test_gitcomp "option=" "option=" <<-\EOF + + EOF +' + test_expect_success '__gitcomp - doesnt fail because of invalid variable name' ' __gitcomp "$invalid_variable_name" ' @@ -2380,6 +2389,12 @@ test_expect_success 'git clone --config= - value' ' EOF ' +test_expect_success 'options with value' ' + test_completion "git merge -X diff-algorithm=" <<-\EOF + + EOF +' + test_expect_success 'sourcing the completion script clears cached commands' ' __git_compute_all_commands && verbose test -n "$__git_all_commands" && From 345d5425a1102a08f6e6ba375152793c5a2cdd7f Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 6 Jun 2021 13:47:25 -0500 Subject: [PATCH 006/198] completion: bash: fix for multiple dash commands Otherwise commands like 'for-each-ref' are not completed. Signed-off-by: Felipe Contreras Tested-by: David Aguilar Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 05606609f97bd6..1feb2ee1081a4e 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -421,7 +421,7 @@ __gitcomp_builtin () local incl="${2-}" local excl="${3-}" - local var=__gitcomp_builtin_"${cmd/-/_}" + local var=__gitcomp_builtin_"${cmd//-/_}" local options eval "options=\${$var-}" From e3ee1a316302caaeeb176ad4c05a977b08c379b2 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Sun, 6 Jun 2021 13:47:26 -0500 Subject: [PATCH 007/198] completion: bash: add correct suffix in variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __gitcomp automatically adds a suffix, but __gitcomp_nl and others don't, we need to specify a space by default. Can be tested with: git config branch.autoSetupMe This fix only works for versions of bash greater than 4.0, before that "local sfx" creates an empty string, therefore the unset expansion doesn't work. The same happens in zsh. Therefore we don't add the test for that for now. The correct fix for all shells requires semantic changes in __gitcomp, but that can be done later. Cc: SZEDER Gábor Signed-off-by: Felipe Contreras Tested-by: David Aguilar Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 1feb2ee1081a4e..c72b5465f9ba14 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2652,7 +2652,7 @@ __git_complete_config_variable_name () local pfx="${cur_%.*}." cur_="${cur_#*.}" __gitcomp_direct "$(__git_heads "$pfx" "$cur_" ".")" - __gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "$sfx" + __gitcomp_nl_append $'autoSetupMerge\nautoSetupRebase\n' "$pfx" "$cur_" "${sfx- }" return ;; guitool.*.*) @@ -2686,7 +2686,7 @@ __git_complete_config_variable_name () local pfx="${cur_%.*}." cur_="${cur_#*.}" __git_compute_all_commands - __gitcomp_nl "$__git_all_commands" "$pfx" "$cur_" "$sfx" + __gitcomp_nl "$__git_all_commands" "$pfx" "$cur_" "${sfx- }" return ;; remote.*.*) @@ -2702,7 +2702,7 @@ __git_complete_config_variable_name () local pfx="${cur_%.*}." cur_="${cur_#*.}" __gitcomp_nl "$(__git_remotes)" "$pfx" "$cur_" "." - __gitcomp_nl_append "pushDefault" "$pfx" "$cur_" "$sfx" + __gitcomp_nl_append "pushDefault" "$pfx" "$cur_" "${sfx- }" return ;; url.*.*) From e681b728e7db88445f2c69be7b2f07053c577d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 7 Jun 2021 13:58:24 +0200 Subject: [PATCH 008/198] test-tool: split up test-tool read-cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the "test-tool read-cache" was originally added back in 1ecb5ff141 (read-cache: add simple performance test, 2013-06-09) it's been growing all sorts of bells and whistles that aren't very conducive to performance testing the index, e.g. it learned how to read config. Then in recent changes in e2df6c3972 (test-read-cache: print cache entries with --table, 2021-03-30) and 2782db3eed (test-tool: don't force full index, 2021-03-30) we gained even more logic to deal with sparse index testing. I think that having one test tool do so many different things makes it harder to read its code. Let's instead split up the "again" and "perf" uses for it into their own tools. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 2 ++ t/helper/test-read-cache-again.c | 31 ++++++++++++++++++ t/helper/test-read-cache-perf.c | 21 ++++++++++++ t/helper/test-read-cache.c | 56 +++++++++++++------------------- t/helper/test-tool.c | 2 ++ t/helper/test-tool.h | 2 ++ t/perf/p0002-read-cache.sh | 2 +- t/t7519-status-fsmonitor.sh | 2 +- 8 files changed, 83 insertions(+), 35 deletions(-) create mode 100644 t/helper/test-read-cache-again.c create mode 100644 t/helper/test-read-cache-perf.c diff --git a/Makefile b/Makefile index c3565fc0f8f7df..142303bd17a3a2 100644 --- a/Makefile +++ b/Makefile @@ -732,6 +732,8 @@ TEST_BUILTINS_OBJS += test-prio-queue.o TEST_BUILTINS_OBJS += test-proc-receive.o TEST_BUILTINS_OBJS += test-progress.o TEST_BUILTINS_OBJS += test-reach.o +TEST_BUILTINS_OBJS += test-read-cache-again.o +TEST_BUILTINS_OBJS += test-read-cache-perf.o TEST_BUILTINS_OBJS += test-read-cache.o TEST_BUILTINS_OBJS += test-read-graph.o TEST_BUILTINS_OBJS += test-read-midx.o diff --git a/t/helper/test-read-cache-again.c b/t/helper/test-read-cache-again.c new file mode 100644 index 00000000000000..707db036cb1ade --- /dev/null +++ b/t/helper/test-read-cache-again.c @@ -0,0 +1,31 @@ +#include "test-tool.h" +#include "cache.h" + +int cmd__read_cache_again(int argc, const char **argv) +{ + struct repository *r = the_repository; + int i, cnt; + const char *name; + + if (argc != 2) + die("usage: test-tool read-cache-again "); + + cnt = strtol(argv[0], NULL, 0); + name = argv[2]; + + setup_git_directory(); + for (i = 0; i < cnt; i++) { + int pos; + repo_read_index(r); + refresh_index(r->index, REFRESH_QUIET, + NULL, NULL, NULL); + pos = index_name_pos(r->index, name, strlen(name)); + if (pos < 0) + die("%s not in index", name); + printf("%s is%s up to date\n", name, + ce_uptodate(r->index->cache[pos]) ? "" : " not"); + write_file(name, "%d\n", cnt); + discard_index(r->index); + } + return 0; +} diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c new file mode 100644 index 00000000000000..90176c010a10ff --- /dev/null +++ b/t/helper/test-read-cache-perf.c @@ -0,0 +1,21 @@ +#include "test-tool.h" +#include "cache.h" + +int cmd__read_cache_perf(int argc, const char **argv) +{ + struct repository *r = the_repository; + int i, cnt = 1; + + if (argc == 2) + cnt = strtol(argv[1], NULL, 0); + else + die("usage: test-tool read-cache-perf []"); + + setup_git_directory(); + for (i = 0; i < cnt; i++) { + repo_read_index(r); + discard_index(r->index); + } + + return 0; +} diff --git a/t/helper/test-read-cache.c b/t/helper/test-read-cache.c index b52c174acc7a1f..ae4b9b70ad187d 100644 --- a/t/helper/test-read-cache.c +++ b/t/helper/test-read-cache.c @@ -5,6 +5,12 @@ #include "commit.h" #include "tree.h" #include "sparse-index.h" +#include "parse-options.h" + +static const char *read_cache_usage[] = { + "test-tool read-cache [...]", + NULL +}; static void print_cache_entry(struct cache_entry *ce) { @@ -34,49 +40,33 @@ static void print_cache(struct index_state *istate) int cmd__read_cache(int argc, const char **argv) { struct repository *r = the_repository; - int i, cnt = 1; - const char *name = NULL; int table = 0, expand = 0; + struct option options[] = { + OPT_BOOL(0, "table", &table, + "print a dump of the cache"), + OPT_BOOL(0, "expand", &expand, + "call ensure_full_index()"), + OPT_END() + }; + + argc = parse_options(argc, argv, "test-tools", options, read_cache_usage, 0); + if (argc > 0) + usage_msg_opt("Too many arguments.", read_cache_usage, options); initialize_the_repository(); prepare_repo_settings(r); r->settings.command_requires_full_index = 0; - for (++argv, --argc; *argv && starts_with(*argv, "--"); ++argv, --argc) { - if (skip_prefix(*argv, "--print-and-refresh=", &name)) - continue; - if (!strcmp(*argv, "--table")) - table = 1; - else if (!strcmp(*argv, "--expand")) - expand = 1; - } - - if (argc == 1) - cnt = strtol(argv[0], NULL, 0); setup_git_directory(); git_config(git_default_config, NULL); + repo_read_index(r); - for (i = 0; i < cnt; i++) { - repo_read_index(r); - - if (expand) - ensure_full_index(r->index); + if (expand) + ensure_full_index(r->index); - if (name) { - int pos; + if (table) + print_cache(r->index); + discard_index(r->index); - refresh_index(r->index, REFRESH_QUIET, - NULL, NULL, NULL); - pos = index_name_pos(r->index, name, strlen(name)); - if (pos < 0) - die("%s not in index", name); - printf("%s is%s up to date\n", name, - ce_uptodate(r->index->cache[pos]) ? "" : " not"); - write_file(name, "%d\n", i); - } - if (table) - print_cache(r->index); - discard_index(r->index); - } return 0; } diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index c5bd0c6d4c7abc..b0300f70c7964a 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -54,6 +54,8 @@ static struct test_cmd cmds[] = { { "progress", cmd__progress }, { "reach", cmd__reach }, { "read-cache", cmd__read_cache }, + { "read-cache-again", cmd__read_cache_again }, + { "read-cache-perf", cmd__read_cache_perf }, { "read-graph", cmd__read_graph }, { "read-midx", cmd__read_midx }, { "ref-store", cmd__ref_store }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index e8069a3b2222b9..7f451a1eb5d42d 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -43,6 +43,8 @@ int cmd__proc_receive(int argc, const char **argv); int cmd__progress(int argc, const char **argv); int cmd__reach(int argc, const char **argv); int cmd__read_cache(int argc, const char **argv); +int cmd__read_cache_again(int argc, const char **argv); +int cmd__read_cache_perf(int argc, const char **argv); int cmd__read_graph(int argc, const char **argv); int cmd__read_midx(int argc, const char **argv); int cmd__ref_store(int argc, const char **argv); diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh index cdd105a5945239..d0ba5173fb161a 100755 --- a/t/perf/p0002-read-cache.sh +++ b/t/perf/p0002-read-cache.sh @@ -8,7 +8,7 @@ test_perf_default_repo count=1000 test_perf "read_cache/discard_cache $count times" " - test-tool read-cache $count + test-tool read-cache-perf $count " test_done diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 637391c6ce4608..4c199c16d4de11 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -359,7 +359,7 @@ test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR' test_expect_success 'discard_index() also discards fsmonitor info' ' test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" && test_might_fail git update-index --refresh && - test-tool read-cache --print-and-refresh=tracked 2 >actual && + test-tool read-cache-again 2 tracked >actual && printf "tracked is%s up to date\n" "" " not" >expect && test_cmp expect actual ' From 1eb71fb0e962493a4e46c74949975a6831494992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 7 Jun 2021 13:58:25 +0200 Subject: [PATCH 009/198] test-tool: migrate read-cache-perf to parse_options() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the newly added (but then mostly copy/pasted) read-cache-perf to use the parse_options() API. This will make things easier as we add new options. Since we check the "cnt = < 1" case now via more idiomatic post-parse_options() assertions we can move from the for-loop to a while-loop and ditch the "i" variable. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-read-cache-perf.c | 26 ++++++++++++++++++++------ t/perf/p0002-read-cache.sh | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c index 90176c010a10ff..54ad0c3135e2a0 100644 --- a/t/helper/test-read-cache-perf.c +++ b/t/helper/test-read-cache-perf.c @@ -1,18 +1,32 @@ #include "test-tool.h" #include "cache.h" +#include "parse-options.h" + +static const char *read_cache_perf_usage[] = { + "test-tool read-cache-perf [...]", + NULL +}; int cmd__read_cache_perf(int argc, const char **argv) { struct repository *r = the_repository; - int i, cnt = 1; + int cnt = -1; + struct option options[] = { + OPT_INTEGER(0, "count", &cnt, "number of passes"), + OPT_END() + }; - if (argc == 2) - cnt = strtol(argv[1], NULL, 0); - else - die("usage: test-tool read-cache-perf []"); + argc = parse_options(argc, argv, "test-tools", options, + read_cache_perf_usage, 0); + if (argc > 0) + usage_msg_opt("Too many arguments.", read_cache_perf_usage, + options); + if (cnt < 1) + usage_msg_opt("Need at least one pass.", read_cache_perf_usage, + options); setup_git_directory(); - for (i = 0; i < cnt; i++) { + while (cnt--) { repo_read_index(r); discard_index(r->index); } diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh index d0ba5173fb161a..1762b648654ef2 100755 --- a/t/perf/p0002-read-cache.sh +++ b/t/perf/p0002-read-cache.sh @@ -8,7 +8,7 @@ test_perf_default_repo count=1000 test_perf "read_cache/discard_cache $count times" " - test-tool read-cache-perf $count + test-tool read-cache-perf --count=$count " test_done From 32ace34988f3221c0ca4e8754e0b4372e7e24ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 7 Jun 2021 13:58:26 +0200 Subject: [PATCH 010/198] test-tool: migrate read-cache-again to parse_options() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the newly added (but then mostly copy/pasted) read-cache-perf to use the parse_options() API. I have no plans to further modify read-cache-again, but making these commands consistent has a value in and of itself. Since we check the "cnt = < 1" case now via more idiomatic post-parse_options() assertions we can move from the for-loop to a while-loop and ditch the "i" variable. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-read-cache-again.c | 28 ++++++++++++++++++++++------ t/t7519-status-fsmonitor.sh | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/t/helper/test-read-cache-again.c b/t/helper/test-read-cache-again.c index 707db036cb1ade..8487f79d048e8b 100644 --- a/t/helper/test-read-cache-again.c +++ b/t/helper/test-read-cache-again.c @@ -1,20 +1,36 @@ #include "test-tool.h" #include "cache.h" +#include "parse-options.h" + +static const char *read_cache_again_usage[] = { + "test-tool read-cache-again [...] ", + NULL +}; int cmd__read_cache_again(int argc, const char **argv) { struct repository *r = the_repository; - int i, cnt; + int cnt = -1; const char *name; + struct option options[] = { + OPT_INTEGER(0, "count", &cnt, "number of passes"), + OPT_END() + }; - if (argc != 2) - die("usage: test-tool read-cache-again "); - - cnt = strtol(argv[0], NULL, 0); + argc = parse_options(argc, argv, "test-tools", options, + read_cache_again_usage, 0); + if (argc != 1) + usage_msg_opt("Too many arguments.", read_cache_again_usage, + options); + if (cnt == -1) + cnt = 2; + else if (cnt < 1) + usage_msg_opt("Need at least one pass.", read_cache_again_usage, + options); name = argv[2]; setup_git_directory(); - for (i = 0; i < cnt; i++) { + while (cnt--) { int pos; repo_read_index(r); refresh_index(r->index, REFRESH_QUIET, diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 4c199c16d4de11..fd0815f6b7a8de 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -359,7 +359,7 @@ test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR' test_expect_success 'discard_index() also discards fsmonitor info' ' test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" && test_might_fail git update-index --refresh && - test-tool read-cache-again 2 tracked >actual && + test-tool read-cache-again --count=2 tracked >actual && printf "tracked is%s up to date\n" "" " not" >expect && test_cmp expect actual ' From bbf9e8f1381a33f2c2955412ed91749c0c0fe75e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 7 Jun 2021 13:58:27 +0200 Subject: [PATCH 011/198] read-cache perf: add a perf test for refresh_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a perf test for the refresh_index() function to compliment the existing read()/discard() in a loop perf test added in 1ecb5ff141f (read-cache: add simple performance test, 2013-06-09). Since this test is much slower (around 10x) than the previous read()/discard() test let's run it 100 times instead of the 1000 time the first one runs. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-read-cache-perf.c | 12 ++++++++++++ t/perf/p0002-read-cache.sh | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/t/helper/test-read-cache-perf.c b/t/helper/test-read-cache-perf.c index 54ad0c3135e2a0..a2802559a5cd25 100644 --- a/t/helper/test-read-cache-perf.c +++ b/t/helper/test-read-cache-perf.c @@ -11,8 +11,11 @@ int cmd__read_cache_perf(int argc, const char **argv) { struct repository *r = the_repository; int cnt = -1; + int refresh = 0; struct option options[] = { OPT_INTEGER(0, "count", &cnt, "number of passes"), + OPT_BOOL(0, "refresh", &refresh, + "call refresh_index() in a loop, not read()/discard()"), OPT_END() }; @@ -26,10 +29,19 @@ int cmd__read_cache_perf(int argc, const char **argv) options); setup_git_directory(); + if (refresh) + repo_read_index(r); while (cnt--) { + if (refresh) { + unsigned int flags = REFRESH_QUIET|REFRESH_PROGRESS; + refresh_index(r->index, flags, NULL, NULL, NULL); + continue; + } repo_read_index(r); discard_index(r->index); } + if (refresh) + discard_index(r->index); return 0; } diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh index 1762b648654ef2..cbccc5ace95d07 100755 --- a/t/perf/p0002-read-cache.sh +++ b/t/perf/p0002-read-cache.sh @@ -11,4 +11,9 @@ test_perf "read_cache/discard_cache $count times" " test-tool read-cache-perf --count=$count " +count=100 +test_perf "refresh_index() $count times" " + test-tool read-cache-perf --count=$count --refresh +" + test_done From 7fd34207440af057c372477fc1fc51fdc169ec89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 8 Jun 2021 14:16:27 +0200 Subject: [PATCH 012/198] upload-pack: run is_repository_shallow() before setup_revisions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the is_repository_shallow() added in b790e0f67cd (upload-pack: send shallow info over stdin to pack-objects, 2014-03-11) to above setup_revisions(). Running is_repository_shallow() before setup_revisions() doesn't matter now, but in subsequent commits we'll make the code that followed setup_revisions() happen inside a callback in that function. This isolated change documents that re-arranging this part of the code is OK in isolation. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de00adbb9e0d51..1fbaa34f91b264 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3757,11 +3757,12 @@ static void get_object_list(int ac, const char **av) repo_init_revisions(the_repository, &revs, NULL); save_commit_buffer = 0; - setup_revisions(ac, av, &revs, &s_r_opt); /* make sure shallows are read */ is_repository_shallow(the_repository); + setup_revisions(ac, av, &revs, &s_r_opt); + save_warning = warn_on_object_refname_ambiguity; warn_on_object_refname_ambiguity = 0; From 2521047754c785190aed85be08b3f2867e3b6195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= Date: Sat, 12 Jun 2021 18:50:41 +0200 Subject: [PATCH 013/198] =?UTF-8?q?cache.h:=20Introduce=20a=20generic=20"x?= =?UTF-8?q?dg=5Fconfig=5Fhome=5Ffor(=E2=80=A6)"=20function?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current implementation of `xdg_config_home(filename)` returns `$XDG_CONFIG_HOME/git/$filename`, with the `git` subdirectory inserted between the `XDG_CONFIG_HOME` environment variable and the parameter. This patch introduces a `xdg_config_home_for(subdir, filename)` function which is more generic. It only concatenates "$XDG_CONFIG_HOME", or "$HOME/.config" if the former isn’t defined, with the parameters, without adding `git` in between. `xdg_config_home(filename)` is now implemented by calling `xdg_config_home_for("git", filename)` but this new generic function can be used to compute the configuration directory of other programs. Signed-off-by: Lénaïc Huard Signed-off-by: Junio C Hamano --- cache.h | 7 +++++++ path.c | 13 ++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index ba04ff8bd36b36..2a0fb3e4ba16f0 100644 --- a/cache.h +++ b/cache.h @@ -1286,6 +1286,13 @@ int is_ntfs_dotmailmap(const char *name); */ int looks_like_command_line_option(const char *str); +/** + * Return a newly allocated string with the evaluation of + * "$XDG_CONFIG_HOME/$subdir/$filename" if $XDG_CONFIG_HOME is non-empty, otherwise + * "$HOME/.config/$subdir/$filename". Return NULL upon error. + */ +char *xdg_config_home_for(const char *subdir, const char *filename); + /** * Return a newly allocated string with the evaluation of * "$XDG_CONFIG_HOME/git/$filename" if $XDG_CONFIG_HOME is non-empty, otherwise diff --git a/path.c b/path.c index 7bccd830e95890..1b1de3be0932d8 100644 --- a/path.c +++ b/path.c @@ -1503,21 +1503,28 @@ int looks_like_command_line_option(const char *str) return str && str[0] == '-'; } -char *xdg_config_home(const char *filename) +char *xdg_config_home_for(const char *subdir, const char *filename) { const char *home, *config_home; + assert(subdir); assert(filename); config_home = getenv("XDG_CONFIG_HOME"); if (config_home && *config_home) - return mkpathdup("%s/git/%s", config_home, filename); + return mkpathdup("%s/%s/%s", config_home, subdir, filename); home = getenv("HOME"); if (home) - return mkpathdup("%s/.config/git/%s", home, filename); + return mkpathdup("%s/.config/%s/%s", home, subdir, filename); + return NULL; } +char *xdg_config_home(const char *filename) +{ + return xdg_config_home_for("git", filename); +} + char *xdg_cache_home(const char *filename) { const char *home, *cache_home; From 9c0d17a61d2cd952b182eb2e2d095cbed712d445 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Fri, 11 Jun 2021 15:54:25 -0700 Subject: [PATCH 014/198] t7400-submodule-basic: modernize inspect() helper Since the inspect() helper in the submodule-basic test suite was written, 'git -C ' was added. By using -C, we no longer need a reference to the base directory for the test. This simplifies callsites, and will make the addition of other arguments in later patches more readable. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- t/t7400-submodule-basic.sh | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index a924fdb7a6c9aa..f5dc051a6e4f18 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -107,25 +107,18 @@ test_expect_success 'setup - repository to add submodules to' ' # generates, which will expand symbolic links. submodurl=$(pwd -P) -listbranches() { - git for-each-ref --format='%(refname)' 'refs/heads/*' -} - inspect() { dir=$1 && - dotdot="${2:-..}" && - ( - cd "$dir" && - listbranches >"$dotdot/heads" && - { git symbolic-ref HEAD || :; } >"$dotdot/head" && - git rev-parse HEAD >"$dotdot/head-sha1" && - git update-index --refresh && - git diff-files --exit-code && - git clean -n -d -x >"$dotdot/untracked" - ) + git -C "$dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads && + { git -C "$dir" symbolic-ref HEAD || :; } >head && + git -C "$dir" rev-parse HEAD >head-sha1 && + git -C "$dir" update-index --refresh && + git -C "$dir" diff-files --exit-code && + git -C "$dir" clean -n -d -x >untracked } + test_expect_success 'submodule add' ' echo "refs/heads/main" >expect && @@ -146,7 +139,7 @@ test_expect_success 'submodule add' ' ) && rm -f heads head untracked && - inspect addtest/submod ../.. && + inspect addtest/submod && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -237,7 +230,7 @@ test_expect_success 'submodule add --branch' ' ) && rm -f heads head untracked && - inspect addtest/submod-branch ../.. && + inspect addtest/submod-branch && test_cmp expect-heads heads && test_cmp expect-head head && test_must_be_empty untracked @@ -253,7 +246,7 @@ test_expect_success 'submodule add with ./ in path' ' ) && rm -f heads head untracked && - inspect addtest/dotsubmod/frotz ../../.. && + inspect addtest/dotsubmod/frotz && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -269,7 +262,7 @@ test_expect_success 'submodule add with /././ in path' ' ) && rm -f heads head untracked && - inspect addtest/dotslashdotsubmod/frotz ../../.. && + inspect addtest/dotslashdotsubmod/frotz && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -285,7 +278,7 @@ test_expect_success 'submodule add with // in path' ' ) && rm -f heads head untracked && - inspect addtest/slashslashsubmod/frotz ../../.. && + inspect addtest/slashslashsubmod/frotz && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -301,7 +294,7 @@ test_expect_success 'submodule add with /.. in path' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod ../.. && + inspect addtest/realsubmod && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -317,7 +310,7 @@ test_expect_success 'submodule add with ./, /.. and // in path' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod2 ../.. && + inspect addtest/realsubmod2 && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -348,7 +341,7 @@ test_expect_success 'submodule add in subdirectory' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod3 ../.. && + inspect addtest/realsubmod3 && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked From cb3a2ac603528856e64137ef45b171f77ffaaa81 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Jun 2021 05:16:09 +0000 Subject: [PATCH 015/198] xdiff: implement a zealous diff3, or "zdiff3" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "zdiff3" is identical to ordinary diff3 except that it allows compaction of common lines on the two sides of history at the beginning or end of the conflict hunk. For example, the following diff3 conflict: 1 2 3 4 <<<<<< A B C D E |||||| 5 6 ====== A X C Y E >>>>>> 7 8 9 has common lines 'A', 'C', and 'E' on the two sides. With zdiff3, one would instead get the following conflict: 1 2 3 4 A <<<<<< B C D |||||| 5 6 ====== X C Y >>>>>> E 7 8 9 Note that the common lines, 'A', and 'E' were moved outside the conflict. Unlike with the two-way conflicts from the 'merge' conflictStyle, the zdiff3 conflict is NOT split into multiple conflict regions to allow the common 'C' lines to be shown outside a conflict, because zdiff3 shows the base version too and the base version cannot be reasonably split. Initial-patch-by: Uwe Kleine-König Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/merge-file.c | 2 + contrib/completion/git-completion.bash | 2 +- xdiff-interface.c | 2 + xdiff/xdiff.h | 1 + xdiff/xmerge.c | 52 ++++++++++++++++++++++++-- 5 files changed, 55 insertions(+), 4 deletions(-) diff --git a/builtin/merge-file.c b/builtin/merge-file.c index 06a2f90c4875f2..e695867ee54894 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -34,6 +34,8 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix) struct option options[] = { OPT_BOOL('p', "stdout", &to_stdout, N_("send results to standard output")), OPT_SET_INT(0, "diff3", &xmp.style, N_("use a diff3 based merge"), XDL_MERGE_DIFF3), + OPT_SET_INT(0, "zdiff3", &xmp.style, N_("use a zealous diff3 based merge"), + XDL_MERGE_ZEALOUS_DIFF3), OPT_SET_INT(0, "ours", &xmp.favor, N_("for conflicts, use our version"), XDL_MERGE_FAVOR_OURS), OPT_SET_INT(0, "theirs", &xmp.favor, N_("for conflicts, use their version"), diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index b50c5d0ea3867b..8594559298140f 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1566,7 +1566,7 @@ _git_checkout () case "$cur" in --conflict=*) - __gitcomp "diff3 merge" "" "${cur##--conflict=}" + __gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}" ;; --*) __gitcomp_builtin checkout diff --git a/xdiff-interface.c b/xdiff-interface.c index 609615db2cd68f..9977813a9d37d4 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -308,6 +308,8 @@ int git_xmerge_config(const char *var, const char *value, void *cb) die("'%s' is not a boolean", var); if (!strcmp(value, "diff3")) git_xmerge_style = XDL_MERGE_DIFF3; + else if (!strcmp(value, "zdiff3")) + git_xmerge_style = XDL_MERGE_ZEALOUS_DIFF3; else if (!strcmp(value, "merge")) git_xmerge_style = 0; /* diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 7a046051468f9e..8629ae287c79c3 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -65,6 +65,7 @@ extern "C" { /* merge output styles */ #define XDL_MERGE_DIFF3 1 +#define XDL_MERGE_ZEALOUS_DIFF3 2 typedef struct s_mmfile { char *ptr; diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index 1659edb45393a6..b1dc9df7ea0ac9 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -230,7 +230,7 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, 1, dest ? dest + size : NULL); - if (style == XDL_MERGE_DIFF3) { + if (style == XDL_MERGE_DIFF3 || style == XDL_MERGE_ZEALOUS_DIFF3) { /* Shared preimage */ if (!dest) { size += marker_size + 1 + needs_cr + marker3_size; @@ -327,7 +327,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, * lines. Try hard to show only these few lines as conflicting. */ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, - xpparam_t const *xpp) + xpparam_t const *xpp, int style) { for (; m; m = m->next) { mmfile_t t1, t2; @@ -368,6 +368,42 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, continue; } x = xscr; + if (style == XDL_MERGE_ZEALOUS_DIFF3) { + int advance1 = xscr->i1, advance2 = xscr->i2; + + /* + * Advance m->i1 and m->i2 so that conflict for sides + * 1 and 2 start after common region. Decrement + * m->chg[12] since there are now fewer conflict lines + * for those sides. + */ + m->i1 += advance1; + m->i2 += advance2; + m->chg1 -= advance1; + m->chg2 -= advance2; + + /* + * Splitting conflicts due to internal common regions + * on the two sides would be inappropriate since we + * are also showing the merge base and have no + * reasonable way to split the merge base text. + */ + while (xscr->next) + xscr = xscr->next; + + /* + * Lower the number of conflict lines to not include + * the final common lines, if any. Do this by setting + * number of conflict lines to + * (line offset for start of conflict in xscr) + + * (number of lines in the conflict in xscr) + */ + m->chg1 = (xscr->i1 - advance1) + (xscr->chg1); + m->chg2 = (xscr->i2 - advance2) + (xscr->chg2); + xdl_free_env(&xe); + xdl_free_script(x); + continue; + } m->i1 = xscr->i1 + i1; m->chg1 = xscr->chg1; m->i2 = xscr->i2 + i2; @@ -482,6 +518,16 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, int style = xmp->style; int favor = xmp->favor; + /* + * XDL_MERGE_DIFF3 does not attempt to refine conflicts by looking + * at common areas of sides 1 & 2, because the base (side 0) does + * not match and is being shown. + * + * XDL_MERGE_ZEALOUS_DIFF3 will attempt to refine conflicts + * looking for common areas of sides 1 & 2, despite the base + * not matching and being shown, but will only look for common + * areas at the beginning or ending of the conflict block. + */ if (style == XDL_MERGE_DIFF3) { /* * "diff3 -m" output does not make sense for anything @@ -604,7 +650,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, changes = c; /* refine conflicts */ if (XDL_MERGE_ZEALOUS <= level && - (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 || + (xdl_refine_conflicts(xe1, xe2, changes, xpp, style) < 0 || xdl_simplify_non_conflicts(xe1, changes, XDL_MERGE_ZEALOUS < level) < 0)) { xdl_cleanup_merge(changes); From cf9d93e547cedd172f2180dab49fef27b0d2208d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Jun 2021 05:16:10 +0000 Subject: [PATCH 016/198] update documentation for new zdiff3 conflictStyle Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/config/merge.txt | 9 +++++++- Documentation/git-checkout.txt | 3 +-- Documentation/git-merge-file.txt | 3 +++ Documentation/git-merge.txt | 32 ++++++++++++++++++++++---- Documentation/git-rebase.txt | 6 ++--- Documentation/git-restore.txt | 3 +-- Documentation/git-switch.txt | 3 +-- Documentation/technical/rerere.txt | 10 ++++---- builtin/checkout.c | 2 +- contrib/completion/git-completion.bash | 4 ++-- 10 files changed, 52 insertions(+), 23 deletions(-) diff --git a/Documentation/config/merge.txt b/Documentation/config/merge.txt index cb2ed589075baa..7ab289f35c7f14 100644 --- a/Documentation/config/merge.txt +++ b/Documentation/config/merge.txt @@ -4,7 +4,14 @@ merge.conflictStyle:: shows a `<<<<<<<` conflict marker, changes made by one side, a `=======` marker, changes made by the other side, and then a `>>>>>>>` marker. An alternate style, "diff3", adds a `|||||||` - marker and the original text before the `=======` marker. + marker and the original text before the `=======` marker. The + "merge" style tends to produce smaller conflict regions than diff3, + both because of the exclusion of the original text, and because + when a subset of lines match on the two sides they are just pulled + out of the conflict region. Another alternate style, "zdiff3", is + similar to diff3 but removes matching lines on the two sides from + the conflict region when those matching lines appear near the + beginning or ending of a conflict region. merge.defaultToUpstream:: If merge is called without any commit argument, merge the upstream diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt index b1a6fe44997306..85c3d3513f74c2 100644 --- a/Documentation/git-checkout.txt +++ b/Documentation/git-checkout.txt @@ -265,8 +265,7 @@ When switching branches with `--merge`, staged changes may be lost. The same as `--merge` option above, but changes the way the conflicting hunks are presented, overriding the `merge.conflictStyle` configuration variable. Possible values are - "merge" (default) and "diff3" (in addition to what is shown by - "merge" style, shows the original contents). + "merge" (default), "diff3", and "zdiff3". -p:: --patch:: diff --git a/Documentation/git-merge-file.txt b/Documentation/git-merge-file.txt index f85603261325f6..7e9093fab60d26 100644 --- a/Documentation/git-merge-file.txt +++ b/Documentation/git-merge-file.txt @@ -70,6 +70,9 @@ OPTIONS --diff3:: Show conflicts in "diff3" style. +--zdiff3:: + Show conflicts in "zdiff3" style. + --ours:: --theirs:: --union:: diff --git a/Documentation/git-merge.txt b/Documentation/git-merge.txt index 3819fadac1f1e4..259e1ac2cf0c97 100644 --- a/Documentation/git-merge.txt +++ b/Documentation/git-merge.txt @@ -238,7 +238,8 @@ from the RCS suite to present such a conflicted hunk, like this: ------------ Here are lines that are either unchanged from the common -ancestor, or cleanly resolved because only one side changed. +ancestor, or cleanly resolved because only one side changed, +or cleanly resolved because both sides changed the same way. <<<<<<< yours:sample.txt Conflict resolution is hard; let's go shopping. @@ -259,16 +260,37 @@ side wants to say it is hard and you'd prefer to go shopping, while the other side wants to claim it is easy. An alternative style can be used by setting the "merge.conflictStyle" -configuration variable to "diff3". In "diff3" style, the above conflict -may look like this: +configuration variable to either "diff3" or "zdiff3". In "diff3" +style, the above conflict may look like this: ------------ Here are lines that are either unchanged from the common -ancestor, or cleanly resolved because only one side changed. +ancestor, or cleanly resolved because only one side changed, <<<<<<< yours:sample.txt +or cleanly resolved because both sides changed the same way. Conflict resolution is hard; let's go shopping. -||||||| +||||||| base:sample.txt +or cleanly resolved because both sides changed identically. +Conflict resolution is hard. +======= +or cleanly resolved because both sides changed the same way. +Git makes conflict resolution easy. +>>>>>>> theirs:sample.txt +And here is another line that is cleanly resolved or unmodified. +------------ + +while in "zdiff3" style, it may look like this: + +------------ +Here are lines that are either unchanged from the common +ancestor, or cleanly resolved because only one side changed, +or cleanly resolved because both sides changed the same way. +<<<<<<< yours:sample.txt +Conflict resolution is hard; +let's go shopping. +||||||| base:sample.txt +or cleanly resolved because both sides changed identically. Conflict resolution is hard. ======= Git makes conflict resolution easy. diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index 55af6fd24e27cd..a61742c8f98f43 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -740,9 +740,9 @@ information about the rebased commits and their parents (and instead generates new fake commits based off limited information in the generated patches), those commits cannot be identified; instead it has to fall back to a commit summary. Also, when merge.conflictStyle is -set to diff3, the apply backend will use "constructed merge base" to -label the content from the merge base, and thus provide no information -about the merge base commit whatsoever. +set to diff3 or zdiff3, the apply backend will use "constructed merge +base" to label the content from the merge base, and thus provide no +information about the merge base commit whatsoever. The merge backend works with the full commits on both sides of history and thus has no such limitations. diff --git a/Documentation/git-restore.txt b/Documentation/git-restore.txt index 55bde91ef9e54b..5964810caa4153 100644 --- a/Documentation/git-restore.txt +++ b/Documentation/git-restore.txt @@ -92,8 +92,7 @@ in linkgit:git-checkout[1] for details. The same as `--merge` option above, but changes the way the conflicting hunks are presented, overriding the `merge.conflictStyle` configuration variable. Possible values - are "merge" (default) and "diff3" (in addition to what is - shown by "merge" style, shows the original contents). + are "merge" (default), "diff3", and "zdiff3". --ignore-unmerged:: When restoring files on the working tree from the index, do diff --git a/Documentation/git-switch.txt b/Documentation/git-switch.txt index 5c438cd5058758..5c90f76fbe3511 100644 --- a/Documentation/git-switch.txt +++ b/Documentation/git-switch.txt @@ -137,8 +137,7 @@ should result in deletion of the path). The same as `--merge` option above, but changes the way the conflicting hunks are presented, overriding the `merge.conflictStyle` configuration variable. Possible values are - "merge" (default) and "diff3" (in addition to what is shown by - "merge" style, shows the original contents). + "merge" (default), "diff3", and "zdiff3". -q:: --quiet:: diff --git a/Documentation/technical/rerere.txt b/Documentation/technical/rerere.txt index af5f9fc24f9343..35d454143399e0 100644 --- a/Documentation/technical/rerere.txt +++ b/Documentation/technical/rerere.txt @@ -14,9 +14,9 @@ conflicts before writing them to the rerere database. Different conflict styles and branch names are normalized by stripping the labels from the conflict markers, and removing the common ancestor -version from the `diff3` conflict style. Branches that are merged -in different order are normalized by sorting the conflict hunks. More -on each of those steps in the following sections. +version from the `diff3` or `zdiff3` conflict styles. Branches that +are merged in different order are normalized by sorting the conflict +hunks. More on each of those steps in the following sections. Once these two normalization operations are applied, a conflict ID is calculated based on the normalized conflict, which is later used by @@ -42,8 +42,8 @@ get a conflict like the following: >>>>>>> AC Doing the analogous with AC2 (forking a branch ABAC2 off of branch AB -and then merging branch AC2 into it), using the diff3 conflict style, -we get a conflict like the following: +and then merging branch AC2 into it), using the diff3 or zdiff3 +conflict style, we get a conflict like the following: <<<<<<< HEAD B diff --git a/builtin/checkout.c b/builtin/checkout.c index f4cd7747d35dd1..45606936c328bf 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1524,7 +1524,7 @@ static struct option *add_common_options(struct checkout_opts *opts, OPT_BOOL(0, "progress", &opts->show_progress, N_("force progress reporting")), OPT_BOOL('m', "merge", &opts->merge, N_("perform a 3-way merge with the new branch")), OPT_STRING(0, "conflict", &opts->conflict_style, N_("style"), - N_("conflict style (merge or diff3)")), + N_("conflict style (merge, diff3, or zdiff3)")), OPT_END() }; struct option *newopts = parse_options_concat(prevopts, options); diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 8594559298140f..8489ca39497095 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2445,7 +2445,7 @@ _git_switch () case "$cur" in --conflict=*) - __gitcomp "diff3 merge" "" "${cur##--conflict=}" + __gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}" ;; --*) __gitcomp_builtin switch @@ -2886,7 +2886,7 @@ _git_restore () case "$cur" in --conflict=*) - __gitcomp "diff3 merge" "" "${cur##--conflict=}" + __gitcomp "diff3 merge zdiff3" "" "${cur##--conflict=}" ;; --source=*) __git_complete_refs --cur="${cur##--source=}" From 9dcf24461440f9acff960d6b5d3592334248bf8e Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Tue, 15 Jun 2021 17:45:06 -0700 Subject: [PATCH 017/198] introduce submodule.superprojectGitDir cache Teach submodules a reference to their superproject's gitdir. This allows us to A) know that we're running from a submodule, and B) have a shortcut to the superproject's vitals, for example, configs. By using a relative path instead of an absolute path, we can move the superproject directory around on the filesystem without breaking the submodule's cache. Since this cached value is only introduced during new submodule creation via `git submodule add`, though, there is more work to do to allow the cache to be created at other times. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- Documentation/config/submodule.txt | 12 +++++++++ builtin/submodule--helper.c | 4 +++ t/t7400-submodule-basic.sh | 40 ++++++++++++++++-------------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt index d7a63c8c12bbc2..7c459cc19e4a3b 100644 --- a/Documentation/config/submodule.txt +++ b/Documentation/config/submodule.txt @@ -90,3 +90,15 @@ submodule.alternateErrorStrategy:: `ignore`, `info`, `die`. Default is `die`. Note that if set to `ignore` or `info`, and if there is an error with the computed alternate, the clone proceeds as if no alternate was specified. + +submodule.superprojectGitDir:: + The relative path from the submodule's worktree to the superproject's + gitdir. This config should only be present in projects which are + submodules, but is not guaranteed to be present in every submodule. It + is set automatically during submodule creation. ++ + In situations where more than one superproject references the same + submodule worktree, the value of this config and the behavior of + operations which use it are undefined. To reference a single project + from multiple superprojects, it is better to create a worktree of the + submodule for each superproject. diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ae6174ab05bd83..7024a696625141 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1910,6 +1910,10 @@ static int module_clone(int argc, const char **argv, const char *prefix) git_config_set_in_file(p, "submodule.alternateErrorStrategy", error_strategy); + git_config_set_in_file(p, "submodule.superprojectGitdir", + relative_path(absolute_path(get_git_dir()), + path, &sb)); + free(sm_alternate); free(error_strategy); diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index f5dc051a6e4f18..e45f42588feb36 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -108,14 +108,18 @@ test_expect_success 'setup - repository to add submodules to' ' submodurl=$(pwd -P) inspect() { - dir=$1 && - - git -C "$dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads && - { git -C "$dir" symbolic-ref HEAD || :; } >head && - git -C "$dir" rev-parse HEAD >head-sha1 && - git -C "$dir" update-index --refresh && - git -C "$dir" diff-files --exit-code && - git -C "$dir" clean -n -d -x >untracked + sub_dir=$1 && + super_dir=$2 && + + git -C "$sub_dir" for-each-ref --format='%(refname)' 'refs/heads/*' >heads && + { git -C "$sub_dir" symbolic-ref HEAD || :; } >head && + git -C "$sub_dir" rev-parse HEAD >head-sha1 && + git -C "$sub_dir" update-index --refresh && + git -C "$sub_dir" diff-files --exit-code && + cached_super_dir="$(git -C "$sub_dir" config --get submodule.superprojectGitDir)" && + [ "$(git -C "$super_dir" rev-parse --absolute-git-dir)" \ + -ef "$sub_dir/$cached_super_dir" ] && + git -C "$sub_dir" clean -n -d -x >untracked } @@ -139,7 +143,7 @@ test_expect_success 'submodule add' ' ) && rm -f heads head untracked && - inspect addtest/submod && + inspect addtest/submod addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -230,7 +234,7 @@ test_expect_success 'submodule add --branch' ' ) && rm -f heads head untracked && - inspect addtest/submod-branch && + inspect addtest/submod-branch addtest && test_cmp expect-heads heads && test_cmp expect-head head && test_must_be_empty untracked @@ -246,7 +250,7 @@ test_expect_success 'submodule add with ./ in path' ' ) && rm -f heads head untracked && - inspect addtest/dotsubmod/frotz && + inspect addtest/dotsubmod/frotz addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -262,7 +266,7 @@ test_expect_success 'submodule add with /././ in path' ' ) && rm -f heads head untracked && - inspect addtest/dotslashdotsubmod/frotz && + inspect addtest/dotslashdotsubmod/frotz addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -278,7 +282,7 @@ test_expect_success 'submodule add with // in path' ' ) && rm -f heads head untracked && - inspect addtest/slashslashsubmod/frotz && + inspect addtest/slashslashsubmod/frotz addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -294,7 +298,7 @@ test_expect_success 'submodule add with /.. in path' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod && + inspect addtest/realsubmod addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -310,7 +314,7 @@ test_expect_success 'submodule add with ./, /.. and // in path' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod2 && + inspect addtest/realsubmod2 addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -341,7 +345,7 @@ test_expect_success 'submodule add in subdirectory' ' ) && rm -f heads head untracked && - inspect addtest/realsubmod3 && + inspect addtest/realsubmod3 addtest && test_cmp expect heads && test_cmp expect head && test_must_be_empty untracked @@ -482,7 +486,7 @@ test_expect_success 'update should work when path is an empty dir' ' git submodule update -q >update.out && test_must_be_empty update.out && - inspect init && + inspect init . && test_cmp expect head-sha1 ' @@ -541,7 +545,7 @@ test_expect_success 'update should checkout rev1' ' echo "$rev1" >expect && git submodule update init && - inspect init && + inspect init . && test_cmp expect head-sha1 ' From 74e46ce157d1a0ddf630f06ac7df782c30b9e457 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Tue, 15 Jun 2021 17:45:07 -0700 Subject: [PATCH 018/198] submodule: cache superproject gitdir during absorbgitdirs Already during 'git submodule add' we cache a pointer to the superproject's gitdir. However, this doesn't help brand-new submodules created with 'git init' and later absorbed with 'git submodule absorbgitdir'. Let's start adding that pointer during 'git submodule absorbgitdir' too. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- submodule.c | 10 ++++++++++ t/t7412-submodule-absorbgitdirs.sh | 9 ++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/submodule.c b/submodule.c index 0b1d9c1dde5a8f..4b314bf09c72c3 100644 --- a/submodule.c +++ b/submodule.c @@ -2065,6 +2065,7 @@ static void relocate_single_git_dir_into_superproject(const char *path) char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL; char *new_git_dir; const struct submodule *sub; + struct strbuf config_path = STRBUF_INIT, sb = STRBUF_INIT; if (submodule_uses_worktrees(path)) die(_("relocate_gitdir for submodule '%s' with " @@ -2096,6 +2097,15 @@ static void relocate_single_git_dir_into_superproject(const char *path) relocate_gitdir(path, real_old_git_dir, real_new_git_dir); + /* cache pointer to superproject's gitdir */ + /* NEEDSWORK: this may differ if experimental.worktreeConfig is enabled */ + strbuf_addf(&config_path, "%s/config", real_new_git_dir); + git_config_set_in_file(config_path.buf, "submodule.superprojectGitdir", + relative_path(get_super_prefix_or_empty(), + path, &sb)); + + strbuf_release(&config_path); + strbuf_release(&sb); free(old_git_dir); free(real_old_git_dir); free(real_new_git_dir); diff --git a/t/t7412-submodule-absorbgitdirs.sh b/t/t7412-submodule-absorbgitdirs.sh index 1cfa150768d7f8..e2d78e01df3e5f 100755 --- a/t/t7412-submodule-absorbgitdirs.sh +++ b/t/t7412-submodule-absorbgitdirs.sh @@ -30,7 +30,14 @@ test_expect_success 'absorb the git dir' ' git status >actual.1 && git -C sub1 rev-parse HEAD >actual.2 && test_cmp expect.1 actual.1 && - test_cmp expect.2 actual.2 + test_cmp expect.2 actual.2 && + + # make sure the submodule cached the superproject gitdir correctly + test-tool path-utils real_path . >expect && + test-tool path-utils real_path \ + "$(git -C sub1 config submodule.superprojectGitDir)" >actual && + + test_cmp expect actual ' test_expect_success 'absorbing does not fail for deinitialized submodules' ' From d041338fb2f84881e3fd45ba35c60b803c5709a3 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Tue, 15 Jun 2021 17:45:08 -0700 Subject: [PATCH 019/198] submodule: cache superproject gitdir during 'update' A cached path to the superproject's gitdir might be added during 'git submodule add', but in some cases - like submodules which were created before 'git submodule add' learned to cache that info - it might be useful to update the cache. Let's do it during 'git submodule update', when we already have a handle to the superproject while calling operations on the submodules. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- git-submodule.sh | 10 ++++++++++ t/t7406-submodule-update.sh | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/git-submodule.sh b/git-submodule.sh index 46783784240668..f98dcc16aecd31 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -648,6 +648,16 @@ cmd_update() fi fi + # Cache a pointer to the superproject's gitdir. This may have + # changed, so rewrite it unconditionally. Writes it to worktree + # if applicable, otherwise to local. + relative_gitdir="$(git rev-parse --path-format=relative \ + --prefix "${sm_path}" \ + --git-dir)" + + git -C "$sm_path" config --worktree \ + submodule.superprojectgitdir "$relative_gitdir" + if test -n "$recursive" then ( diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index f4f61fe5544cdb..c39821ba8e9df8 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1061,4 +1061,14 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s ) ' +test_expect_success 'submodule update adds superproject gitdir to older repos' ' + (cd super && + git -C submodule config --unset submodule.superprojectGitdir && + git submodule update && + echo "../.git" >expect && + git -C submodule config submodule.superprojectGitdir >actual && + test_cmp expect actual + ) +' + test_done From 3fa6c577dcf8a578a7241d2210139419750b3740 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 16 Jun 2021 13:43:34 +0900 Subject: [PATCH 020/198] SQUASH??? --- Documentation/config/submodule.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt index 7c459cc19e4a3b..58ba63a75e91ad 100644 --- a/Documentation/config/submodule.txt +++ b/Documentation/config/submodule.txt @@ -97,8 +97,5 @@ submodule.superprojectGitDir:: submodules, but is not guaranteed to be present in every submodule. It is set automatically during submodule creation. + - In situations where more than one superproject references the same - submodule worktree, the value of this config and the behavior of - operations which use it are undefined. To reference a single project - from multiple superprojects, it is better to create a worktree of the - submodule for each superproject. +Because of this configuration variable, it is forbidden to use the +same submodule worktree shared by multiple superprojects. From 74e34878155fc8658110279f036ceaaa80f92931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= Date: Fri, 2 Jul 2021 16:25:55 +0200 Subject: [PATCH 021/198] maintenance: `git maintenance run` learned `--scheduler=` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on the system, different schedulers can be used to schedule the hourly, daily and weekly executions of `git maintenance run`: * `launchctl` for MacOS, * `schtasks` for Windows and * `crontab` for everything else. `git maintenance run` now has an option to let the end-user explicitly choose which scheduler he wants to use: `--scheduler=auto|crontab|launchctl|schtasks`. When `git maintenance start --scheduler=XXX` is run, it not only registers `git maintenance run` tasks in the scheduler XXX, it also removes the `git maintenance run` tasks from all the other schedulers to ensure we cannot have two schedulers launching concurrent identical tasks. The default value is `auto` which chooses a suitable scheduler for the system. `git maintenance stop` doesn't have any `--scheduler` parameter because this command will try to remove the `git maintenance run` tasks from all the available schedulers. Signed-off-by: Lénaïc Huard Signed-off-by: Junio C Hamano --- Documentation/git-maintenance.txt | 9 + builtin/gc.c | 370 ++++++++++++++++++++++++------ t/t7900-maintenance.sh | 55 ++++- 3 files changed, 359 insertions(+), 75 deletions(-) diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 1e738ad398320a..576290b5c61175 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -179,6 +179,15 @@ OPTIONS `maintenance..enabled` configured as `true` are considered. See the 'TASKS' section for the list of accepted `` values. +--scheduler=auto|crontab|launchctl|schtasks:: + When combined with the `start` subcommand, specify the scheduler + for running the hourly, daily and weekly executions of + `git maintenance run`. + Possible values for `` are `auto`, `crontab` (POSIX), + `launchctl` (macOS), and `schtasks` (Windows). + When `auto` is specified, the appropriate platform-specific + scheduler is used. Default is `auto`. + TROUBLESHOOTING --------------- diff --git a/builtin/gc.c b/builtin/gc.c index f05d2f0a1ac9cd..96a43f99b47eed 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1529,6 +1529,98 @@ static const char *get_frequency(enum schedule_priority schedule) } } +/* + * get_schedule_cmd` reads the GIT_TEST_MAINT_SCHEDULER environment variable + * to mock the schedulers that `git maintenance start` rely on. + * + * For test purpose, GIT_TEST_MAINT_SCHEDULER can be set to a comma-separated + * list of colon-separated key/value pairs where each pair contains a scheduler + * and its corresponding mock. + * + * * If $GET_TEST_MAINT_SCHEDULER is not set, return false and leave the + * arguments unmodified. + * + * * If $GET_TEST_MAINT_SCHEDULER is set, return true. + * In this case, the *cmd value is read as input. + * + * * if the input value *cmd is the key of one of the comma-separated list + * item, then *is_available is set to true and *cmd is modified and becomes + * the mock command. + * + * * if the input value *cmd isn’t the key of any of the comma-separated list + * item, then *is_available is set to false. + * + * Ex.: + * GIT_TEST_MAINT_SCHEDULER not set + * ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + * ┃ Input ┃ Output ┃ + * ┃ *cmd ┃ return code │ *cmd │ *is_available ┃ + * ┣━━━━━━━╋━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┫ + * ┃ "foo" ┃ false │ "foo" (unchanged) │ (unchanged) ┃ + * ┗━━━━━━━┻━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ + * + * GIT_TEST_MAINT_SCHEDULER set to “foo:./mock_foo.sh,bar:./mock_bar.sh” + * ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + * ┃ Input ┃ Output ┃ + * ┃ *cmd ┃ return code │ *cmd │ *is_available ┃ + * ┣━━━━━━━╋━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┫ + * ┃ "foo" ┃ true │ "./mock.foo.sh" │ true ┃ + * ┃ "qux" ┃ true │ "qux" (unchanged) │ false ┃ + * ┗━━━━━━━┻━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ + */ +static int get_schedule_cmd(const char **cmd, int *is_available) +{ + char *item; + char *testing = xstrdup_or_null(getenv("GIT_TEST_MAINT_SCHEDULER")); + + if (!testing) + return 0; + + if (is_available) + *is_available = 0; + + for (item = testing;;) { + char *sep; + char *end_item = strchr(item, ','); + if (end_item) + *end_item = '\0'; + + sep = strchr(item, ':'); + if (!sep) + die("GIT_TEST_MAINT_SCHEDULER unparseable: %s", testing); + *sep = '\0'; + + if (!strcmp(*cmd, item)) { + *cmd = sep + 1; + if (is_available) + *is_available = 1; + UNLEAK(testing); + return 1; + } + + if (!end_item) + break; + item = end_item + 1; + } + + free(testing); + return 1; +} + +static int is_launchctl_available(void) +{ + const char *cmd = "launchctl"; + int is_available; + if (get_schedule_cmd(&cmd, &is_available)) + return is_available; + +#ifdef __APPLE__ + return 1; +#else + return 0; +#endif +} + static char *launchctl_service_name(const char *frequency) { struct strbuf label = STRBUF_INIT; @@ -1555,19 +1647,17 @@ static char *launchctl_get_uid(void) return xstrfmt("gui/%d", getuid()); } -static int launchctl_boot_plist(int enable, const char *filename, const char *cmd) +static int launchctl_boot_plist(int enable, const char *filename) { + const char *cmd = "launchctl"; int result; struct child_process child = CHILD_PROCESS_INIT; char *uid = launchctl_get_uid(); + get_schedule_cmd(&cmd, NULL); strvec_split(&child.args, cmd); - if (enable) - strvec_push(&child.args, "bootstrap"); - else - strvec_push(&child.args, "bootout"); - strvec_push(&child.args, uid); - strvec_push(&child.args, filename); + strvec_pushl(&child.args, enable ? "bootstrap" : "bootout", uid, + filename, NULL); child.no_stderr = 1; child.no_stdout = 1; @@ -1581,26 +1671,26 @@ static int launchctl_boot_plist(int enable, const char *filename, const char *cm return result; } -static int launchctl_remove_plist(enum schedule_priority schedule, const char *cmd) +static int launchctl_remove_plist(enum schedule_priority schedule) { const char *frequency = get_frequency(schedule); char *name = launchctl_service_name(frequency); char *filename = launchctl_service_filename(name); - int result = launchctl_boot_plist(0, filename, cmd); + int result = launchctl_boot_plist(0, filename); unlink(filename); free(filename); free(name); return result; } -static int launchctl_remove_plists(const char *cmd) +static int launchctl_remove_plists(void) { - return launchctl_remove_plist(SCHEDULE_HOURLY, cmd) || - launchctl_remove_plist(SCHEDULE_DAILY, cmd) || - launchctl_remove_plist(SCHEDULE_WEEKLY, cmd); + return launchctl_remove_plist(SCHEDULE_HOURLY) || + launchctl_remove_plist(SCHEDULE_DAILY) || + launchctl_remove_plist(SCHEDULE_WEEKLY); } -static int launchctl_schedule_plist(const char *exec_path, enum schedule_priority schedule, const char *cmd) +static int launchctl_schedule_plist(const char *exec_path, enum schedule_priority schedule) { FILE *plist; int i; @@ -1669,8 +1759,8 @@ static int launchctl_schedule_plist(const char *exec_path, enum schedule_priorit fclose(plist); /* bootout might fail if not already running, so ignore */ - launchctl_boot_plist(0, filename, cmd); - if (launchctl_boot_plist(1, filename, cmd)) + launchctl_boot_plist(0, filename); + if (launchctl_boot_plist(1, filename)) die(_("failed to bootstrap service %s"), filename); free(filename); @@ -1678,21 +1768,35 @@ static int launchctl_schedule_plist(const char *exec_path, enum schedule_priorit return 0; } -static int launchctl_add_plists(const char *cmd) +static int launchctl_add_plists(void) { const char *exec_path = git_exec_path(); - return launchctl_schedule_plist(exec_path, SCHEDULE_HOURLY, cmd) || - launchctl_schedule_plist(exec_path, SCHEDULE_DAILY, cmd) || - launchctl_schedule_plist(exec_path, SCHEDULE_WEEKLY, cmd); + return launchctl_schedule_plist(exec_path, SCHEDULE_HOURLY) || + launchctl_schedule_plist(exec_path, SCHEDULE_DAILY) || + launchctl_schedule_plist(exec_path, SCHEDULE_WEEKLY); } -static int launchctl_update_schedule(int run_maintenance, int fd, const char *cmd) +static int launchctl_update_schedule(int run_maintenance, int fd) { if (run_maintenance) - return launchctl_add_plists(cmd); + return launchctl_add_plists(); else - return launchctl_remove_plists(cmd); + return launchctl_remove_plists(); +} + +static int is_schtasks_available(void) +{ + const char *cmd = "schtasks"; + int is_available; + if (get_schedule_cmd(&cmd, &is_available)) + return is_available; + +#ifdef GIT_WINDOWS_NATIVE + return 1; +#else + return 0; +#endif } static char *schtasks_task_name(const char *frequency) @@ -1702,13 +1806,15 @@ static char *schtasks_task_name(const char *frequency) return strbuf_detach(&label, NULL); } -static int schtasks_remove_task(enum schedule_priority schedule, const char *cmd) +static int schtasks_remove_task(enum schedule_priority schedule) { + const char *cmd = "schtasks"; int result; struct strvec args = STRVEC_INIT; const char *frequency = get_frequency(schedule); char *name = schtasks_task_name(frequency); + get_schedule_cmd(&cmd, NULL); strvec_split(&args, cmd); strvec_pushl(&args, "/delete", "/tn", name, "/f", NULL); @@ -1719,15 +1825,16 @@ static int schtasks_remove_task(enum schedule_priority schedule, const char *cmd return result; } -static int schtasks_remove_tasks(const char *cmd) +static int schtasks_remove_tasks(void) { - return schtasks_remove_task(SCHEDULE_HOURLY, cmd) || - schtasks_remove_task(SCHEDULE_DAILY, cmd) || - schtasks_remove_task(SCHEDULE_WEEKLY, cmd); + return schtasks_remove_task(SCHEDULE_HOURLY) || + schtasks_remove_task(SCHEDULE_DAILY) || + schtasks_remove_task(SCHEDULE_WEEKLY); } -static int schtasks_schedule_task(const char *exec_path, enum schedule_priority schedule, const char *cmd) +static int schtasks_schedule_task(const char *exec_path, enum schedule_priority schedule) { + const char *cmd = "schtasks"; int result; struct child_process child = CHILD_PROCESS_INIT; const char *xml; @@ -1736,6 +1843,8 @@ static int schtasks_schedule_task(const char *exec_path, enum schedule_priority char *name = schtasks_task_name(frequency); struct strbuf tfilename = STRBUF_INIT; + get_schedule_cmd(&cmd, NULL); + strbuf_addf(&tfilename, "%s/schedule_%s_XXXXXX", get_git_common_dir(), frequency); tfile = xmks_tempfile(tfilename.buf); @@ -1840,28 +1949,52 @@ static int schtasks_schedule_task(const char *exec_path, enum schedule_priority return result; } -static int schtasks_schedule_tasks(const char *cmd) +static int schtasks_schedule_tasks(void) { const char *exec_path = git_exec_path(); - return schtasks_schedule_task(exec_path, SCHEDULE_HOURLY, cmd) || - schtasks_schedule_task(exec_path, SCHEDULE_DAILY, cmd) || - schtasks_schedule_task(exec_path, SCHEDULE_WEEKLY, cmd); + return schtasks_schedule_task(exec_path, SCHEDULE_HOURLY) || + schtasks_schedule_task(exec_path, SCHEDULE_DAILY) || + schtasks_schedule_task(exec_path, SCHEDULE_WEEKLY); } -static int schtasks_update_schedule(int run_maintenance, int fd, const char *cmd) +static int schtasks_update_schedule(int run_maintenance, int fd) { if (run_maintenance) - return schtasks_schedule_tasks(cmd); + return schtasks_schedule_tasks(); else - return schtasks_remove_tasks(cmd); + return schtasks_remove_tasks(); +} + +static int is_crontab_available(void) +{ + const char *cmd = "crontab"; + int is_available; + struct child_process child = CHILD_PROCESS_INIT; + + if (get_schedule_cmd(&cmd, &is_available)) + return is_available; + + strvec_split(&child.args, cmd); + strvec_push(&child.args, "-l"); + child.no_stdin = 1; + child.no_stdout = 1; + child.no_stderr = 1; + child.silent_exec_failure = 1; + + if (start_command(&child)) + return 0; + /* Ignore exit code, as an empty crontab will return error. */ + finish_command(&child); + return 1; } #define BEGIN_LINE "# BEGIN GIT MAINTENANCE SCHEDULE" #define END_LINE "# END GIT MAINTENANCE SCHEDULE" -static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd) +static int crontab_update_schedule(int run_maintenance, int fd) { + const char *cmd = "crontab"; int result = 0; int in_old_region = 0; struct child_process crontab_list = CHILD_PROCESS_INIT; @@ -1869,6 +2002,7 @@ static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd) FILE *cron_list, *cron_in; struct strbuf line = STRBUF_INIT; + get_schedule_cmd(&cmd, NULL); strvec_split(&crontab_list.args, cmd); strvec_push(&crontab_list.args, "-l"); crontab_list.in = -1; @@ -1945,66 +2079,160 @@ static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd) return result; } +enum scheduler { + SCHEDULER_INVALID = -1, + SCHEDULER_AUTO, + SCHEDULER_CRON, + SCHEDULER_LAUNCHCTL, + SCHEDULER_SCHTASKS, +}; + +static const struct { + const char *name; + int (*is_available)(void); + int (*update_schedule)(int run_maintenance, int fd); +} scheduler_fn[] = { + [SCHEDULER_CRON] = { + .name = "crontab", + .is_available = is_crontab_available, + .update_schedule = crontab_update_schedule, + }, + [SCHEDULER_LAUNCHCTL] = { + .name = "launchctl", + .is_available = is_launchctl_available, + .update_schedule = launchctl_update_schedule, + }, + [SCHEDULER_SCHTASKS] = { + .name = "schtasks", + .is_available = is_schtasks_available, + .update_schedule = schtasks_update_schedule, + }, +}; + +static enum scheduler parse_scheduler(const char *value) +{ + if (!value) + return SCHEDULER_INVALID; + else if (!strcasecmp(value, "auto")) + return SCHEDULER_AUTO; + else if (!strcasecmp(value, "cron") || !strcasecmp(value, "crontab")) + return SCHEDULER_CRON; + else if (!strcasecmp(value, "launchctl")) + return SCHEDULER_LAUNCHCTL; + else if (!strcasecmp(value, "schtasks")) + return SCHEDULER_SCHTASKS; + else + return SCHEDULER_INVALID; +} + +static int maintenance_opt_scheduler(const struct option *opt, const char *arg, + int unset) +{ + enum scheduler *scheduler = opt->value; + + BUG_ON_OPT_NEG(unset); + + *scheduler = parse_scheduler(arg); + if (*scheduler == SCHEDULER_INVALID) + return error(_("unrecognized --scheduler argument '%s'"), arg); + return 0; +} + +struct maintenance_start_opts { + enum scheduler scheduler; +}; + +static enum scheduler resolve_scheduler(enum scheduler scheduler) +{ + if (scheduler != SCHEDULER_AUTO) + return scheduler; + #if defined(__APPLE__) -static const char platform_scheduler[] = "launchctl"; + return SCHEDULER_LAUNCHCTL; + #elif defined(GIT_WINDOWS_NATIVE) -static const char platform_scheduler[] = "schtasks"; + return SCHEDULER_SCHTASKS; + #else -static const char platform_scheduler[] = "crontab"; + return SCHEDULER_CRON; #endif +} -static int update_background_schedule(int enable) +static void validate_scheduler(enum scheduler scheduler) { - int result; - const char *scheduler = platform_scheduler; - const char *cmd = scheduler; - char *testing; + if (scheduler == SCHEDULER_INVALID) + BUG("invalid scheduler"); + if (scheduler == SCHEDULER_AUTO) + BUG("resolve_scheduler should have been called before"); + + if (!scheduler_fn[scheduler].is_available()) + die(_("%s scheduler is not available"), + scheduler_fn[scheduler].name); +} + +static int update_background_schedule(const struct maintenance_start_opts *opts, + int enable) +{ + unsigned int i; + int result = 0; struct lock_file lk; char *lock_path = xstrfmt("%s/schedule", the_repository->objects->odb->path); - testing = xstrdup_or_null(getenv("GIT_TEST_MAINT_SCHEDULER")); - if (testing) { - char *sep = strchr(testing, ':'); - if (!sep) - die("GIT_TEST_MAINT_SCHEDULER unparseable: %s", testing); - *sep = '\0'; - scheduler = testing; - cmd = sep + 1; + if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) { + free(lock_path); + return error(_("another process is scheduling background maintenance")); } - if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) { - result = error(_("another process is scheduling background maintenance")); - goto cleanup; + for (i = 1; i < ARRAY_SIZE(scheduler_fn); i++) { + if (enable && opts->scheduler == i) + continue; + if (!scheduler_fn[i].is_available()) + continue; + scheduler_fn[i].update_schedule(0, get_lock_file_fd(&lk)); } - if (!strcmp(scheduler, "launchctl")) - result = launchctl_update_schedule(enable, get_lock_file_fd(&lk), cmd); - else if (!strcmp(scheduler, "schtasks")) - result = schtasks_update_schedule(enable, get_lock_file_fd(&lk), cmd); - else if (!strcmp(scheduler, "crontab")) - result = crontab_update_schedule(enable, get_lock_file_fd(&lk), cmd); - else - die("unknown background scheduler: %s", scheduler); + if (enable) + result = scheduler_fn[opts->scheduler].update_schedule( + 1, get_lock_file_fd(&lk)); rollback_lock_file(&lk); -cleanup: free(lock_path); - free(testing); return result; } -static int maintenance_start(void) +static const char *const builtin_maintenance_start_usage[] = { + N_("git maintenance start [--scheduler=]"), + NULL +}; + +static int maintenance_start(int argc, const char **argv, const char *prefix) { + struct maintenance_start_opts opts = { 0 }; + struct option options[] = { + OPT_CALLBACK_F( + 0, "scheduler", &opts.scheduler, N_("scheduler"), + N_("scheduler to trigger git maintenance run"), + PARSE_OPT_NONEG, maintenance_opt_scheduler), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, options, + builtin_maintenance_start_usage, 0); + if (argc) + usage_with_options(builtin_maintenance_start_usage, options); + + opts.scheduler = resolve_scheduler(opts.scheduler); + validate_scheduler(opts.scheduler); + if (maintenance_register()) warning(_("failed to add repo to global config")); - - return update_background_schedule(1); + return update_background_schedule(&opts, 1); } static int maintenance_stop(void) { - return update_background_schedule(0); + return update_background_schedule(NULL, 0); } static const char builtin_maintenance_usage[] = N_("git maintenance []"); @@ -2018,7 +2246,7 @@ int cmd_maintenance(int argc, const char **argv, const char *prefix) if (!strcmp(argv[1], "run")) return maintenance_run(argc - 1, argv + 1, prefix); if (!strcmp(argv[1], "start")) - return maintenance_start(); + return maintenance_start(argc - 1, argv + 1, prefix); if (!strcmp(argv[1], "stop")) return maintenance_stop(); if (!strcmp(argv[1], "register")) diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index b93ae014ee58f9..b36b7f5fb0f21e 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -494,8 +494,21 @@ test_expect_success !MINGW 'register and unregister with regex metacharacters' ' maintenance.repo "$(pwd)/$META" ' +test_expect_success 'start --scheduler=' ' + test_expect_code 129 git maintenance start --scheduler=foo 2>err && + test_i18ngrep "unrecognized --scheduler argument" err && + + test_expect_code 129 git maintenance start --no-scheduler 2>err && + test_i18ngrep "unknown option" err && + + test_expect_code 128 \ + env GIT_TEST_MAINT_SCHEDULER="launchctl:true,schtasks:true" \ + git maintenance start --scheduler=crontab 2>err && + test_i18ngrep "fatal: crontab scheduler is not available" err +' + test_expect_success 'start from empty cron table' ' - GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start && + GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start --scheduler=crontab && # start registers the repo git config --get --global --fixed-value maintenance.repo "$(pwd)" && @@ -518,7 +531,7 @@ test_expect_success 'stop from existing schedule' ' test_expect_success 'start preserves existing schedule' ' echo "Important information!" >cron.txt && - GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start && + GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt" git maintenance start --scheduler=crontab && grep "Important information!" cron.txt ' @@ -547,7 +560,7 @@ test_expect_success 'start and stop macOS maintenance' ' EOF rm -f args && - GIT_TEST_MAINT_SCHEDULER=launchctl:./print-args git maintenance start && + GIT_TEST_MAINT_SCHEDULER=launchctl:./print-args git maintenance start --scheduler=launchctl && # start registers the repo git config --get --global --fixed-value maintenance.repo "$(pwd)" && @@ -598,7 +611,7 @@ test_expect_success 'start and stop Windows maintenance' ' EOF rm -f args && - GIT_TEST_MAINT_SCHEDULER="schtasks:./print-args" git maintenance start && + GIT_TEST_MAINT_SCHEDULER="schtasks:./print-args" git maintenance start --scheduler=schtasks && # start registers the repo git config --get --global --fixed-value maintenance.repo "$(pwd)" && @@ -621,6 +634,40 @@ test_expect_success 'start and stop Windows maintenance' ' test_cmp expect args ' +test_expect_success 'start and stop when several schedulers are available' ' + write_script print-args <<-\EOF && + printf "%s\n" "$*" | sed "s:gui/[0-9][0-9]*:gui/[UID]:; s:\(schtasks /create .* /xml\).*:\1:;" >>args + EOF + + rm -f args && + GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl && + printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ + hourly daily weekly >expect && + for frequency in hourly daily weekly + do + PLIST="$pfx/Library/LaunchAgents/org.git-scm.git.$frequency.plist" && + echo "launchctl bootout gui/[UID] $PLIST" >>expect && + echo "launchctl bootstrap gui/[UID] $PLIST" >>expect || return 1 + done && + test_cmp expect args && + + rm -f args && + GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks && + printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \ + hourly daily weekly >expect && + printf "schtasks /create /tn Git Maintenance (%s) /f /xml\n" \ + hourly daily weekly >>expect && + test_cmp expect args && + + rm -f args && + GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop && + printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \ + hourly daily weekly >expect && + printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ + hourly daily weekly >>expect && + test_cmp expect args +' + test_expect_success 'register preserves existing strategy' ' git config maintenance.strategy none && git maintenance register && From 54aef5b267747683f5fce7bd4cfa6ea13a7be0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= Date: Fri, 2 Jul 2021 16:25:56 +0200 Subject: [PATCH 022/198] maintenance: add support for systemd timers on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing mechanism for scheduling background maintenance is done through cron. On Linux systems managed by systemd, systemd provides an alternative to schedule recurring tasks: systemd timers. The main motivations to implement systemd timers in addition to cron are: * cron is optional and Linux systems running systemd might not have it installed. * The execution of `crontab -l` can tell us if cron is installed but not if the daemon is actually running. * With systemd, each service is run in its own cgroup and its logs are tagged by the service inside journald. With cron, all scheduled tasks are running in the cron daemon cgroup and all the logs of the user-scheduled tasks are pretended to belong to the system cron service. Concretely, a user that doesn’t have access to the system logs won’t have access to the log of their own tasks scheduled by cron whereas they will have access to the log of their own tasks scheduled by systemd timer. Although `cron` attempts to send email, that email may go unseen by the user because these days, local mailboxes are not heavily used anymore. In order to schedule git maintenance, we need two unit template files: * ~/.config/systemd/user/git-maintenance@.service to define the command to be started by systemd and * ~/.config/systemd/user/git-maintenance@.timer to define the schedule at which the command should be run. Those units are templates that are parameterized by the frequency. Based on those templates, 3 timers are started: * git-maintenance@hourly.timer * git-maintenance@daily.timer * git-maintenance@weekly.timer The command launched by those three timers are the same as with the other scheduling methods: /path/to/git for-each-repo --exec-path=/path/to --config=maintenance.repo maintenance run --schedule=%i with the full path for git to ensure that the version of git launched for the scheduled maintenance is the same as the one used to run `maintenance start`. The timer unit contains `Persistent=true` so that, if the computer is powered down when a maintenance task should run, the task will be run when the computer is back powered on. Signed-off-by: Lénaïc Huard Signed-off-by: Junio C Hamano --- Documentation/git-maintenance.txt | 58 +++++++- builtin/gc.c | 227 ++++++++++++++++++++++++++++++ t/t7900-maintenance.sh | 67 ++++++++- 3 files changed, 341 insertions(+), 11 deletions(-) diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 576290b5c61175..e2cfb68ab57907 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -179,14 +179,16 @@ OPTIONS `maintenance..enabled` configured as `true` are considered. See the 'TASKS' section for the list of accepted `` values. ---scheduler=auto|crontab|launchctl|schtasks:: +--scheduler=auto|crontab|systemd-timer|launchctl|schtasks:: When combined with the `start` subcommand, specify the scheduler for running the hourly, daily and weekly executions of `git maintenance run`. - Possible values for `` are `auto`, `crontab` (POSIX), - `launchctl` (macOS), and `schtasks` (Windows). - When `auto` is specified, the appropriate platform-specific - scheduler is used. Default is `auto`. + Possible values for `` are `auto`, `crontab` + (POSIX), `systemd-timer` (Linux), `launchctl` (macOS), and + `schtasks` (Windows). When `auto` is specified, the + appropriate platform-specific scheduler is used; on Linux, + `systemd-timer` is used if available, otherwise + `crontab`. Default is `auto`. TROUBLESHOOTING @@ -286,6 +288,52 @@ schedule to ensure you are executing the correct binaries in your schedule. +BACKGROUND MAINTENANCE ON LINUX SYSTEMD SYSTEMS +----------------------------------------------- + +While Linux supports `cron`, depending on the distribution, `cron` may +be an optional package not necessarily installed. On modern Linux +distributions, systemd timers are superseding it. + +If user systemd timers are available, they will be used as a replacement +of `cron`. + +In this case, `git maintenance start` will create user systemd timer units +and start the timers. The current list of user-scheduled tasks can be found +by running `systemctl --user list-timers`. The timers written by `git +maintenance start` are similar to this: + +----------------------------------------------------------------------- +$ systemctl --user list-timers +NEXT LEFT LAST PASSED UNIT ACTIVATES +Thu 2021-04-29 19:00:00 CEST 42min left Thu 2021-04-29 18:00:11 CEST 17min ago git-maintenance@hourly.timer git-maintenance@hourly.service +Fri 2021-04-30 00:00:00 CEST 5h 42min left Thu 2021-04-29 00:00:11 CEST 18h ago git-maintenance@daily.timer git-maintenance@daily.service +Mon 2021-05-03 00:00:00 CEST 3 days left Mon 2021-04-26 00:00:11 CEST 3 days ago git-maintenance@weekly.timer git-maintenance@weekly.service +----------------------------------------------------------------------- + +One timer is registered for each `--schedule=` option. + +The definition of the systemd units can be inspected in the following files: + +----------------------------------------------------------------------- +~/.config/systemd/user/git-maintenance@.timer +~/.config/systemd/user/git-maintenance@.service +~/.config/systemd/user/timers.target.wants/git-maintenance@hourly.timer +~/.config/systemd/user/timers.target.wants/git-maintenance@daily.timer +~/.config/systemd/user/timers.target.wants/git-maintenance@weekly.timer +----------------------------------------------------------------------- + +`git maintenance start` will overwrite these files and start the timer +again with `systemctl --user`, so any customization should be done by +creating a drop-in file, i.e. a `.conf` suffixed file in the +`~/.config/systemd/user/git-maintenance@.service.d` directory. + +`git maintenance stop` will stop the user systemd timers and delete +the above mentioned files. + +For more details, see `systemd.timer(5)`. + + BACKGROUND MAINTENANCE ON MACOS SYSTEMS --------------------------------------- diff --git a/builtin/gc.c b/builtin/gc.c index 96a43f99b47eed..6698f885df7234 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -2079,10 +2079,221 @@ static int crontab_update_schedule(int run_maintenance, int fd) return result; } +#ifdef __linux__ + +static int real_is_systemd_timer_available(void) +{ + struct child_process child = CHILD_PROCESS_INIT; + + strvec_pushl(&child.args, "systemctl", "--user", "list-timers", NULL); + child.no_stdin = 1; + child.no_stdout = 1; + child.no_stderr = 1; + child.silent_exec_failure = 1; + + if (start_command(&child)) + return 0; + if (finish_command(&child)) + return 0; + return 1; +} + +#else + +static int real_is_systemd_timer_available(void) +{ + return 0; +} + +#endif + +static int is_systemd_timer_available(void) +{ + const char *cmd = "systemctl"; + int is_available; + + if (get_schedule_cmd(&cmd, &is_available)) + return is_available; + + return real_is_systemd_timer_available(); +} + +static char *xdg_config_home_systemd(const char *filename) +{ + return xdg_config_home_for("systemd/user", filename); +} + +static int systemd_timer_enable_unit(int enable, + enum schedule_priority schedule) +{ + const char *cmd = "systemctl"; + struct child_process child = CHILD_PROCESS_INIT; + const char *frequency = get_frequency(schedule); + + /* + * Disabling the systemd unit while it is already disabled makes + * systemctl print an error. + * Let's ignore it since it means we already are in the expected state: + * the unit is disabled. + * + * On the other hand, enabling a systemd unit which is already enabled + * produces no error. + */ + if (!enable) + child.no_stderr = 1; + + get_schedule_cmd(&cmd, NULL); + strvec_split(&child.args, cmd); + strvec_pushl(&child.args, "--user", enable ? "enable" : "disable", + "--now", NULL); + strvec_pushf(&child.args, "git-maintenance@%s.timer", frequency); + + if (start_command(&child)) + return error(_("failed to start systemctl")); + if (finish_command(&child)) + /* + * Disabling an already disabled systemd unit makes + * systemctl fail. + * Let's ignore this failure. + * + * Enabling an enabled systemd unit doesn't fail. + */ + if (enable) + return error(_("failed to run systemctl")); + return 0; +} + +static int systemd_timer_delete_unit_templates(void) +{ + int ret = 0; + char *filename = xdg_config_home_systemd("git-maintenance@.timer"); + if (unlink(filename) && !is_missing_file_error(errno)) + ret = error_errno(_("failed to delete '%s'"), filename); + FREE_AND_NULL(filename); + + filename = xdg_config_home_systemd("git-maintenance@.service"); + if (unlink(filename) && !is_missing_file_error(errno)) + ret = error_errno(_("failed to delete '%s'"), filename); + + free(filename); + return ret; +} + +static int systemd_timer_delete_units(void) +{ + return systemd_timer_enable_unit(0, SCHEDULE_HOURLY) || + systemd_timer_enable_unit(0, SCHEDULE_DAILY) || + systemd_timer_enable_unit(0, SCHEDULE_WEEKLY) || + systemd_timer_delete_unit_templates(); +} + +static int systemd_timer_write_unit_templates(const char *exec_path) +{ + char *filename; + FILE *file; + const char *unit; + + filename = xdg_config_home_systemd("git-maintenance@.timer"); + if (safe_create_leading_directories(filename)) { + error(_("failed to create directories for '%s'"), filename); + goto error; + } + file = fopen_or_warn(filename, "w"); + if (file == NULL) + goto error; + + unit = "# This file was created and is maintained by Git.\n" + "# Any edits made in this file might be replaced in the future\n" + "# by a Git command.\n" + "\n" + "[Unit]\n" + "Description=Optimize Git repositories data\n" + "\n" + "[Timer]\n" + "OnCalendar=%i\n" + "Persistent=true\n" + "\n" + "[Install]\n" + "WantedBy=timers.target\n"; + if (fputs(unit, file) == EOF) { + error(_("failed to write to '%s'"), filename); + fclose(file); + goto error; + } + if (fclose(file) == EOF) { + error_errno(_("failed to flush '%s'"), filename); + goto error; + } + free(filename); + + filename = xdg_config_home_systemd("git-maintenance@.service"); + file = fopen_or_warn(filename, "w"); + if (file == NULL) + goto error; + + unit = "# This file was created and is maintained by Git.\n" + "# Any edits made in this file might be replaced in the future\n" + "# by a Git command.\n" + "\n" + "[Unit]\n" + "Description=Optimize Git repositories data\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "ExecStart=\"%s/git\" --exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%%i\n" + "LockPersonality=yes\n" + "MemoryDenyWriteExecute=yes\n" + "NoNewPrivileges=yes\n" + "RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6\n" + "RestrictNamespaces=yes\n" + "RestrictRealtime=yes\n" + "RestrictSUIDSGID=yes\n" + "SystemCallArchitectures=native\n" + "SystemCallFilter=@system-service\n"; + if (fprintf(file, unit, exec_path, exec_path) < 0) { + error(_("failed to write to '%s'"), filename); + fclose(file); + goto error; + } + if (fclose(file) == EOF) { + error_errno(_("failed to flush '%s'"), filename); + goto error; + } + free(filename); + return 0; + +error: + free(filename); + systemd_timer_delete_unit_templates(); + return -1; +} + +static int systemd_timer_setup_units(void) +{ + const char *exec_path = git_exec_path(); + + int ret = systemd_timer_write_unit_templates(exec_path) || + systemd_timer_enable_unit(1, SCHEDULE_HOURLY) || + systemd_timer_enable_unit(1, SCHEDULE_DAILY) || + systemd_timer_enable_unit(1, SCHEDULE_WEEKLY); + if (ret) + systemd_timer_delete_units(); + return ret; +} + +static int systemd_timer_update_schedule(int run_maintenance, int fd) +{ + if (run_maintenance) + return systemd_timer_setup_units(); + else + return systemd_timer_delete_units(); +} + enum scheduler { SCHEDULER_INVALID = -1, SCHEDULER_AUTO, SCHEDULER_CRON, + SCHEDULER_SYSTEMD, SCHEDULER_LAUNCHCTL, SCHEDULER_SCHTASKS, }; @@ -2097,6 +2308,11 @@ static const struct { .is_available = is_crontab_available, .update_schedule = crontab_update_schedule, }, + [SCHEDULER_SYSTEMD] = { + .name = "systemctl", + .is_available = is_systemd_timer_available, + .update_schedule = systemd_timer_update_schedule, + }, [SCHEDULER_LAUNCHCTL] = { .name = "launchctl", .is_available = is_launchctl_available, @@ -2117,6 +2333,9 @@ static enum scheduler parse_scheduler(const char *value) return SCHEDULER_AUTO; else if (!strcasecmp(value, "cron") || !strcasecmp(value, "crontab")) return SCHEDULER_CRON; + else if (!strcasecmp(value, "systemd") || + !strcasecmp(value, "systemd-timer")) + return SCHEDULER_SYSTEMD; else if (!strcasecmp(value, "launchctl")) return SCHEDULER_LAUNCHCTL; else if (!strcasecmp(value, "schtasks")) @@ -2153,6 +2372,14 @@ static enum scheduler resolve_scheduler(enum scheduler scheduler) #elif defined(GIT_WINDOWS_NATIVE) return SCHEDULER_SCHTASKS; +#elif defined(__linux__) + if (is_systemd_timer_available()) + return SCHEDULER_SYSTEMD; + else if (is_crontab_available()) + return SCHEDULER_CRON; + else + die(_("neither systemd timers nor crontab are available")); + #else return SCHEDULER_CRON; #endif diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index b36b7f5fb0f21e..b289cae6b9e20b 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -20,6 +20,18 @@ test_xmllint () { fi } +test_lazy_prereq SYSTEMD_ANALYZE ' + systemd-analyze --help >out && + grep verify out +' + +test_systemd_analyze_verify () { + if test_have_prereq SYSTEMD_ANALYZE + then + systemd-analyze verify "$@" + fi +} + test_expect_success 'help text' ' test_expect_code 129 git maintenance -h 2>err && test_i18ngrep "usage: git maintenance " err && @@ -634,15 +646,56 @@ test_expect_success 'start and stop Windows maintenance' ' test_cmp expect args ' +test_expect_success 'start and stop Linux/systemd maintenance' ' + write_script print-args <<-\EOF && + printf "%s\n" "$*" >>args + EOF + + XDG_CONFIG_HOME="$PWD" && + export XDG_CONFIG_HOME && + rm -f args && + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args" git maintenance start --scheduler=systemd-timer && + + # start registers the repo + git config --get --global --fixed-value maintenance.repo "$(pwd)" && + + test_systemd_analyze_verify "systemd/user/git-maintenance@.service" && + + printf -- "--user enable --now git-maintenance@%s.timer\n" hourly daily weekly >expect && + test_cmp expect args && + + rm -f args && + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args" git maintenance stop && + + # stop does not unregister the repo + git config --get --global --fixed-value maintenance.repo "$(pwd)" && + + test_path_is_missing "systemd/user/git-maintenance@.timer" && + test_path_is_missing "systemd/user/git-maintenance@.service" && + + printf -- "--user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect && + test_cmp expect args +' + test_expect_success 'start and stop when several schedulers are available' ' write_script print-args <<-\EOF && printf "%s\n" "$*" | sed "s:gui/[0-9][0-9]*:gui/[UID]:; s:\(schtasks /create .* /xml\).*:\1:;" >>args EOF rm -f args && - GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl && - printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=systemd-timer && + printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \ hourly daily weekly >expect && + printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ + hourly daily weekly >>expect && + printf -- "systemctl --user enable --now git-maintenance@%s.timer\n" hourly daily weekly >>expect && + test_cmp expect args && + + rm -f args && + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=launchctl && + printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect && + printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ + hourly daily weekly >>expect && for frequency in hourly daily weekly do PLIST="$pfx/Library/LaunchAgents/org.git-scm.git.$frequency.plist" && @@ -652,17 +705,19 @@ test_expect_success 'start and stop when several schedulers are available' ' test_cmp expect args && rm -f args && - GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks && + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance start --scheduler=schtasks && + printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect && printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \ - hourly daily weekly >expect && + hourly daily weekly >>expect && printf "schtasks /create /tn Git Maintenance (%s) /f /xml\n" \ hourly daily weekly >>expect && test_cmp expect args && rm -f args && - GIT_TEST_MAINT_SCHEDULER="launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop && + GIT_TEST_MAINT_SCHEDULER="systemctl:./print-args systemctl,launchctl:./print-args launchctl,schtasks:./print-args schtasks" git maintenance stop && + printf -- "systemctl --user disable --now git-maintenance@%s.timer\n" hourly daily weekly >expect && printf "launchctl bootout gui/[UID] $pfx/Library/LaunchAgents/org.git-scm.git.%s.plist\n" \ - hourly daily weekly >expect && + hourly daily weekly >>expect && printf "schtasks /delete /tn Git Maintenance (%s) /f\n" \ hourly daily weekly >>expect && test_cmp expect args From 8afdce2d1ef6ed56f10c08dfd1e2df2c38682796 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 1 Jul 2021 22:51:17 +0000 Subject: [PATCH 023/198] submodule: mark submodules with update=none as inactive When the user recursively clones a repository with submodules and one or more of those submodules is marked with the submodule..update=none configuration, the submodule will end up being active. This is a problem because we will have skipped cloning or checking out the submodule, and as a result, other commands, such as git reset or git checkout, will fail if they are invoked with --recurse-submodules (or when submodule.recurse is true). This is obviously not the behavior the user wanted, so let's fix this by specifically setting the submodule as inactive in this case when we're initializing the repository. That will make us properly ignore the submodule when performing recursive operations. We only do this when initializing a submodule, since git submodule update can update the submodule with various options despite the setting of "none" and we want those options to override it as they currently do. Reported-by: Rose Kunkel Helped-by: Philippe Blain Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 6 ++++++ t/t5601-clone.sh | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index d55f6262e9c93b..c8cd2f85249c05 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -686,6 +686,12 @@ static void init_submodule(const char *path, const char *prefix, if (git_config_set_gently(sb.buf, upd)) die(_("Failed to register update mode for submodule path '%s'"), displaypath); + + if (sub->update_strategy.type == SM_UPDATE_NONE) { + strbuf_reset(&sb); + strbuf_addf(&sb, "submodule.%s.active", sub->name); + git_config_set_gently(sb.buf, "false"); + } } strbuf_release(&sb); free(displaypath); diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index c0688467e7c099..efe6b13be06355 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -752,6 +752,30 @@ test_expect_success 'batch missing blob request does not inadvertently try to fe git clone --filter=blob:limit=0 "file://$(pwd)/server" client ' +test_expect_success 'clone with submodule with update=none is not active' ' + rm -rf server client && + + test_create_repo server && + echo a >server/a && + echo b >server/b && + git -C server add a b && + git -C server commit -m x && + + echo aa >server/a && + echo bb >server/b && + git -C server submodule add --name c "$(pwd)/repo_for_submodule" c && + git -C server config -f .gitmodules submodule.c.update none && + git -C server add a b c .gitmodules && + git -C server commit -m x && + + git clone --recurse-submodules server client && + git -C client config submodule.c.active >actual && + echo false >expected && + test_cmp actual expected && + # This would fail if the submodule were active, since it is not checked out. + git -C client reset --recurse-submodules --hard +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 5e0fa8cbbfc89ec1226e9ccc4dc8f714ebb507b6 Mon Sep 17 00:00:00 2001 From: Clemens Fruhwirth Date: Tue, 6 Jul 2021 18:22:38 +0200 Subject: [PATCH 024/198] fetch: fix segfault on --set-upstream while on a detached HEAD branch_get("HEAD") can return NULL, when HEAD is detached, and cause the code to segfault when the user calls "git pull --set-upstream". Catch this case and warn the user to avoid a segfault. Signed-off-by: Clemens Fruhwirth Signed-off-by: Junio C Hamano --- builtin/fetch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index dfde96a4354c14..d8f634be321fcc 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1602,6 +1602,10 @@ static int do_fetch(struct transport *transport, struct ref *rm; struct ref *source_ref = NULL; + if (!branch) { + warning(_("not on a branch to use --set-upstream with")); + goto skip; + } /* * We're setting the upstream configuration for the * current branch. The relevant upstream is the From 7e378520a6b7460d6b924ec862c5bc94d02f60c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 9 Jul 2021 13:06:13 +0200 Subject: [PATCH 025/198] revision.h: refactor "disable_stdin" and "read_from_stdin" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the two "disable_stdin" and "read_from_stdin" flags to an enum, in preparation for a subsequent commit adding more flags. The interaction between these is more subtle than they might appear at first sight, as noted in a12cbe23ef7. "read_stdin" is not the inverse of "disable_stdin", rather we read stdin if we see the "--stdin" option. The "read" is intended to understood as "I've read it already", not "you should read it". Let's avoid this confusion by using "consume" and "consumed" instead, i.e. a word whose present and past tense isn't the same. See 8b3dce56508 (Teach --stdin option to "log" family, 2009-11-03) where we added the "disable_stdin" flag, and a12cbe23ef7 (rev-list: make empty --stdin not an error, 2018-08-22) for the addition of the "read_from_stdin" flag. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/am.c | 4 ++-- builtin/blame.c | 2 +- builtin/diff-tree.c | 2 +- builtin/rev-list.c | 2 +- revision.c | 13 ++++++++----- revision.h | 23 ++++++++++++++++++++--- sequencer.c | 4 ++-- 7 files changed, 35 insertions(+), 15 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index 0b2d886c81b775..3a6c8455b47f71 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1355,7 +1355,7 @@ static void write_commit_patch(const struct am_state *state, struct commit *comm repo_init_revisions(the_repository, &rev_info, NULL); rev_info.diff = 1; rev_info.abbrev = 0; - rev_info.disable_stdin = 1; + rev_info.stdin_handling = REV_INFO_STDIN_IGNORE; rev_info.show_root_diff = 1; rev_info.diffopt.output_format = DIFF_FORMAT_PATCH; rev_info.no_commit_id = 1; @@ -1390,7 +1390,7 @@ static void write_index_patch(const struct am_state *state) fp = xfopen(am_path(state, "patch"), "w"); repo_init_revisions(the_repository, &rev_info, NULL); rev_info.diff = 1; - rev_info.disable_stdin = 1; + rev_info.stdin_handling = REV_INFO_STDIN_IGNORE; rev_info.no_commit_id = 1; rev_info.diffopt.output_format = DIFF_FORMAT_PATCH; rev_info.diffopt.use_color = 0; diff --git a/builtin/blame.c b/builtin/blame.c index 641523ff9af693..c9f66c58c46034 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1061,7 +1061,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix) argv[argc - 1] = "--"; } - revs.disable_stdin = 1; + revs.stdin_handling = REV_INFO_STDIN_IGNORE; setup_revisions(argc, argv, &revs, NULL); if (!revs.pending.nr && is_bare_repository()) { struct commit *head_commit; diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index f33d30d57bff2e..fc548ebe16671c 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -122,7 +122,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix) die(_("index file corrupt")); opt->abbrev = 0; opt->diff = 1; - opt->disable_stdin = 1; + opt->stdin_handling = REV_INFO_STDIN_IGNORE; memset(&s_r_opt, 0, sizeof(s_r_opt)); s_r_opt.tweak = diff_tree_tweak_rev; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 7677b1af5a45fc..524632ba328d69 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -651,7 +651,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if ((!revs.commits && reflog_walk_empty(revs.reflog_info) && (!(revs.tag_objects || revs.tree_objects || revs.blob_objects) && !revs.pending.nr) && - !revs.rev_input_given && !revs.read_from_stdin) || + !revs.rev_input_given && !revs.consumed_stdin) || revs.diff) usage(rev_list_usage); diff --git a/revision.c b/revision.c index 8140561b6c7e23..50909339a592aa 100644 --- a/revision.c +++ b/revision.c @@ -2741,14 +2741,17 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s } if (!strcmp(arg, "--stdin")) { - if (revs->disable_stdin) { + switch (revs->stdin_handling) { + case REV_INFO_STDIN_IGNORE: argv[left++] = arg; continue; + case REV_INFO_STDIN_CONSUME_ON_OPTION: + if (revs->consumed_stdin) + die("--stdin given twice?"); + read_revisions_from_stdin(revs, &prune_data); + revs->consumed_stdin = 1; + continue; } - if (revs->read_from_stdin++) - die("--stdin given twice?"); - read_revisions_from_stdin(revs, &prune_data); - continue; } if (!strcmp(arg, "--end-of-options")) { diff --git a/revision.h b/revision.h index 93aa012f518eb0..0598220831ce8a 100644 --- a/revision.h +++ b/revision.h @@ -86,6 +86,11 @@ struct rev_cmdline_info { struct oidset; struct topo_walk_info; +enum rev_info_stdin { + REV_INFO_STDIN_CONSUME_ON_OPTION, + REV_INFO_STDIN_IGNORE, +}; + struct rev_info { /* Starting list */ struct commit_list *commits; @@ -114,9 +119,22 @@ struct rev_info { int rev_input_given; /* - * Whether we read from stdin due to the --stdin option. + * How should we handle seeing --stdin? + * + * Defaults to REV_INFO_STDIN_CONSUME_ON_OPTION where we'll + * attempt to read it if we see the --stdin option. + * + * Can be set to REV_INFO_STDIN_IGNORE to ignore the --stdin + * option. + */ + enum rev_info_stdin stdin_handling; + + /* + * Did we read from stdin due to stdin_handling == + * REV_INFO_STDIN_CONSUME_ON_OPTION and seeing the --stdin + * option? */ - int read_from_stdin; + unsigned int consumed_stdin:1; /* topo-sort */ enum rev_sort_order sort_order; @@ -216,7 +234,6 @@ struct rev_info { date_mode_explicit:1, preserve_subject:1, encode_email_headers:1; - unsigned int disable_stdin:1; /* --show-linear-break */ unsigned int track_linear:1, track_first_time:1, diff --git a/sequencer.c b/sequencer.c index 0bec01cf38e817..4e73bd79d69a3b 100644 --- a/sequencer.c +++ b/sequencer.c @@ -3377,7 +3377,7 @@ static int make_patch(struct repository *r, log_tree_opt.abbrev = 0; log_tree_opt.diff = 1; log_tree_opt.diffopt.output_format = DIFF_FORMAT_PATCH; - log_tree_opt.disable_stdin = 1; + log_tree_opt.stdin_handling = REV_INFO_STDIN_IGNORE; log_tree_opt.no_commit_id = 1; log_tree_opt.diffopt.file = fopen(buf.buf, "w"); log_tree_opt.diffopt.use_color = GIT_COLOR_NEVER; @@ -4513,7 +4513,7 @@ static int pick_commits(struct repository *r, log_tree_opt.diff = 1; log_tree_opt.diffopt.output_format = DIFF_FORMAT_DIFFSTAT; - log_tree_opt.disable_stdin = 1; + log_tree_opt.stdin_handling = REV_INFO_STDIN_IGNORE; if (read_oneliner(&buf, rebase_path_orig_head(), 0) && !get_oid(buf.buf, &orig) && From 92a32304a32caa3b759a3e04c565c21b1cbdc7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 9 Jul 2021 13:06:14 +0200 Subject: [PATCH 026/198] revision.[ch]: add a "handle_stdin_line" API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the rev_info stdin parsing API to support hooking into its read_revisions_from_stdin() function, in the next commit we'll change the the custom stdin parsing in pack-objects.c to use it.. For that use-case adding API is barely justified. We'll be able to make the handle_revision_arg() static in exchange for it, and we'll avoid the duplicate dance around setting "save_warning" and "warn_on_object_refname_ambiguity", but we could just continue to do that ourselves in builtin/pack-objects.c The real reason to add this is for a change not part of this series. We'll soon teach "git bundle create" to accept revision/refname pairs on stdin, and thus do away with the limitation of it being impossible to create bundles with ref tips that don't correspond to the state of the current repository. I.e. this will work: $ printf "e83c5163316f89bfbde7\trefs/heads/first-git-dot-git-commit\n" | git bundle create initial.bundle --stdin As well as things like: $ git for-each-ref 'refs/remotes/origin' | sed 's!\trefs/remotes/origin/!\trefs/heads/!' | git bundle create origin.bundle --stdin In order to do that we'll need to modify the lines we consume on stdin revision.c (which bundle.c uses already), and be assured that that stripping extra bundle-specific data from them is the only change in behavior. That change will be much more complex if bundle.c needs to start doing its own stdin parsing again outside of revision.c, it was recently converted to use revision.c's parsing in 5bb0fd2cab5 (bundle: arguments can be read from stdin, 2021-01-11) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- revision.c | 22 ++++++++++++++++++++++ revision.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/revision.c b/revision.c index 50909339a592aa..3f6ab834aff4a5 100644 --- a/revision.c +++ b/revision.c @@ -2119,6 +2119,19 @@ static void read_revisions_from_stdin(struct rev_info *revs, int len = sb.len; if (!len) break; + + if (revs->handle_stdin_line) { + enum rev_info_stdin_line ret = revs->handle_stdin_line( + revs, &sb, revs->stdin_line_priv); + + switch (ret) { + case REV_INFO_STDIN_LINE_PROCESS: + break; + case REV_INFO_STDIN_LINE_CONTINUE: + continue; + } + } + if (sb.buf[0] == '-') { if (len == 2 && sb.buf[1] == '-') { seen_dashdash = 1; @@ -2742,6 +2755,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (!strcmp(arg, "--stdin")) { switch (revs->stdin_handling) { + case REV_INFO_STDIN_ALWAYS_READ: case REV_INFO_STDIN_IGNORE: argv[left++] = arg; continue; @@ -2790,6 +2804,14 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s } } + /* + * We're asked to ALWAYS_READ from stdin, but no --stdin + * option (or "consumed_stdin" would be set). + */ + if (!revs->consumed_stdin && + revs->stdin_handling == REV_INFO_STDIN_ALWAYS_READ) + read_revisions_from_stdin(revs, &prune_data); + if (prune_data.nr) { /* * If we need to introduce the magic "a lone ':' means no diff --git a/revision.h b/revision.h index 0598220831ce8a..93b6a02cf020ce 100644 --- a/revision.h +++ b/revision.h @@ -89,8 +89,17 @@ struct topo_walk_info; enum rev_info_stdin { REV_INFO_STDIN_CONSUME_ON_OPTION, REV_INFO_STDIN_IGNORE, + REV_INFO_STDIN_ALWAYS_READ, }; +enum rev_info_stdin_line { + REV_INFO_STDIN_LINE_PROCESS, + REV_INFO_STDIN_LINE_CONTINUE, +}; + +typedef enum rev_info_stdin_line (*rev_info_stdin_line_func)( + struct rev_info *revs, struct strbuf *line, void *stdin_line_priv); + struct rev_info { /* Starting list */ struct commit_list *commits; @@ -126,6 +135,9 @@ struct rev_info { * * Can be set to REV_INFO_STDIN_IGNORE to ignore the --stdin * option. + * + * Set it to REV_INFO_STDIN_ALWAYS_READ if there's always data + * on stdin to be read, even if no --stdin option is provided. */ enum rev_info_stdin stdin_handling; @@ -136,6 +148,24 @@ struct rev_info { */ unsigned int consumed_stdin:1; + /* + * When reading from stdin (see "stdin_handling" above) define + * a handle_stdin_line function to consume the lines. + * + * - Return REV_INFO_STDIN_LINE_PROCESS to continue + * revision.c's normal processing of the line (after + * possibly munging the provided strbuf). + * + * - Return REV_INFO_STDIN_LINE_CONTINUE to indicate that the + * line is fully processed, moving onto the next line (if + * any) + * + * Use the "stdin_line_priv" to optionally pass your own data + * around. + */ + rev_info_stdin_line_func handle_stdin_line; + void *stdin_line_priv; + /* topo-sort */ enum rev_sort_order sort_order; From 7db42aa925edb7212c216479344f50a1e55f7e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 9 Jul 2021 13:06:15 +0200 Subject: [PATCH 027/198] pack-objects.c: do stdin parsing via revision.c's API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new "handle_stdin_line" API in revision.c to parse stdin in pack-objects.c, instead of using custom pack-objects.c-specific code to do so. This means that we can remove the "if (len && line[len - 1] == '\n')" check, it's now redundant to using strbuf_getline(), and we get to skip the whole "warn_on_object_refname_ambiguity" dance. The read_revisions_from_stdin() function in revision.c we're now using does it for us. The pack-objects.c code being refactored away here was first added in Linus's c323ac7d9c5 (git-pack-objects: create a packed object representation., 2005-06-25). Later on rev-list started doing similar parsing in 42cabc341c4 (Teach rev-list an option to read revs from the standard input., 2006-09-05). That code was promoted to a more general API in 1fc561d169a (Move read_revisions_from_stdin from builtin-rev-list.c to revision.c, 2008-07-05). Since then the API in revision.c has received improvements that have been missed here. E.g. the arbitrary limit of 1000 bytes was removed in 63d564b3002 (read_revision_from_stdin(): use strbuf, 2009-11-20), and it moved to a more simpler strbuf API in 6e8d46f9d4b (revision: read --stdin with strbuf_getline(), 2015-10-28). For now we've just made setup_revisions() loop over stdin for us, but the callback we define makes no use of REV_INFO_STDIN_LINE_PROCESS. We still need to call handle_revision_arg() ourselves because we'd like to call it with different flags. This very light use of the API will be further refined in a subsequent commit, for now we're just doing the bare minimum to move this existing code over to the new callback pattern without any functional changes, and making it as friendly to "git show -w" and "the --color-moved-ws=allow-indentation-change" mode as possible. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 63 ++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1fbaa34f91b264..35d5247f85a212 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3745,15 +3745,40 @@ static void mark_bitmap_preferred_tips(void) } } +static enum rev_info_stdin_line get_object_list_handle_stdin_line( + struct rev_info *revs, struct strbuf *line_sb, void *stdin_line_priv) +{ + int *flags = stdin_line_priv; + char *line = line_sb->buf; + + if (*line == '-') { + if (!strcmp(line, "--not")) { + *flags ^= UNINTERESTING; + write_bitmap_index = 0; + return REV_INFO_STDIN_LINE_CONTINUE; + } + if (starts_with(line, "--shallow ")) { + struct object_id oid; + if (get_oid_hex(line + 10, &oid)) + die("not an object name '%s'", line + 10); + register_shallow(the_repository, &oid); + use_bitmap_index = 0; + return REV_INFO_STDIN_LINE_CONTINUE; + } + die(_("not a rev '%s'"), line); + } + if (handle_revision_arg(line, revs, *flags, REVARG_CANNOT_BE_FILENAME)) + die(_("bad revision '%s'"), line); + return REV_INFO_STDIN_LINE_CONTINUE; +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; struct setup_revision_opt s_r_opt = { .allow_exclude_promisor_objects = 1, }; - char line[1000]; int flags = 0; - int save_warning; repo_init_revisions(the_repository, &revs, NULL); save_commit_buffer = 0; @@ -3761,39 +3786,11 @@ static void get_object_list(int ac, const char **av) /* make sure shallows are read */ is_repository_shallow(the_repository); + revs.stdin_handling = REV_INFO_STDIN_ALWAYS_READ; + revs.handle_stdin_line = get_object_list_handle_stdin_line; + revs.stdin_line_priv = &flags; setup_revisions(ac, av, &revs, &s_r_opt); - save_warning = warn_on_object_refname_ambiguity; - warn_on_object_refname_ambiguity = 0; - - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = strlen(line); - if (len && line[len - 1] == '\n') - line[--len] = 0; - if (!len) - break; - if (*line == '-') { - if (!strcmp(line, "--not")) { - flags ^= UNINTERESTING; - write_bitmap_index = 0; - continue; - } - if (starts_with(line, "--shallow ")) { - struct object_id oid; - if (get_oid_hex(line + 10, &oid)) - die("not an object name '%s'", line + 10); - register_shallow(the_repository, &oid); - use_bitmap_index = 0; - continue; - } - die(_("not a rev '%s'"), line); - } - if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME)) - die(_("bad revision '%s'"), line); - } - - warn_on_object_refname_ambiguity = save_warning; - if (use_bitmap_index && !get_object_list_from_bitmap(&revs)) return; From bb0a763e6d3ba4b06e040ecef466b5cdfd91742b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 9 Jul 2021 13:06:16 +0200 Subject: [PATCH 028/198] pack-objects.c: make use of REV_INFO_STDIN_LINE_PROCESS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continue the work started in the preceding commit of porting pack-objects.c over to the new handle_stdin_line callback pattern. The common case for the users of this API is to do some of their own parsing or munging, and then have handle_revision_arg() handle the rest. The existing users of the --stdin parsing always wanted a flag of "0" to be passed to handle_revision_arg(), but pack-objects.c wants to set custom flags. Let's support this common case by having a "revarg_flags" member in the "rev_info" struct. This allows us to return REV_INFO_STDIN_LINE_PROCESS in the new get_object_list_handle_stdin_line() instead of REV_INFO_STDIN_LINE_CONTINUE, as read_revisions_from_stdin() will now pass down the right flag for us. I considered making the "--not" parsing be another flag handled by the revision.c API itself, but since there's only one caller who wants this, and the "write_bitmap_index = 0" case is more specific than marking things UNINTERESTING I think it's better to handle it with a more general mechanism. These changes means that we can make the handle_revision_arg() function static. Now that the only external user of the API has been migrated over to the callback mechanism nothing external to revision.c needs to call handle_revision_arg() anymore. That handle_revision_arg() function was made public in a combination of 5d6f0935e6d (revision.c: allow injecting revision parameters after setup_revisions()., 2006-09-05) and b5d97e6b0a0 (pack-objects: run rev-list equivalent internally., 2006-09-04). This change leaves the briefly-used in preceding commits "void *stdin_line_priv" without any in-tree user, as builtin/pack-objects.c could be ported over to our new "revarg_flags" common case. I'm leaving that "void *stdin_line_priv" in place anyway for two reasons: 1. It's a common pattern to allow such a "void *" to be used for callback data, so we might as well follow that pattern here in anticipation of a future in-tree user. 2. I have patches for such an in-tree user already in a series that'll be submitted after this one. See the reference to "git bundle create --stdin" in the commit that added the "handle_stdin_line" API. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 35 +++++++++++++++-------------------- revision.c | 4 ++-- revision.h | 6 ++++-- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 35d5247f85a212..06a085a9a2aa85 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3748,28 +3748,25 @@ static void mark_bitmap_preferred_tips(void) static enum rev_info_stdin_line get_object_list_handle_stdin_line( struct rev_info *revs, struct strbuf *line_sb, void *stdin_line_priv) { - int *flags = stdin_line_priv; char *line = line_sb->buf; - if (*line == '-') { - if (!strcmp(line, "--not")) { - *flags ^= UNINTERESTING; - write_bitmap_index = 0; - return REV_INFO_STDIN_LINE_CONTINUE; - } - if (starts_with(line, "--shallow ")) { - struct object_id oid; - if (get_oid_hex(line + 10, &oid)) - die("not an object name '%s'", line + 10); - register_shallow(the_repository, &oid); - use_bitmap_index = 0; - return REV_INFO_STDIN_LINE_CONTINUE; - } + if (*line != '-') + return REV_INFO_STDIN_LINE_PROCESS; + + if (!strcmp(line, "--not")) { + revs->revarg_flags ^= UNINTERESTING; + write_bitmap_index = 0; + return REV_INFO_STDIN_LINE_CONTINUE; + } else if (starts_with(line, "--shallow ")) { + struct object_id oid; + if (get_oid_hex(line + 10, &oid)) + die("not an object name '%s'", line + 10); + register_shallow(the_repository, &oid); + use_bitmap_index = 0; + return REV_INFO_STDIN_LINE_CONTINUE; + } else { die(_("not a rev '%s'"), line); } - if (handle_revision_arg(line, revs, *flags, REVARG_CANNOT_BE_FILENAME)) - die(_("bad revision '%s'"), line); - return REV_INFO_STDIN_LINE_CONTINUE; } static void get_object_list(int ac, const char **av) @@ -3778,7 +3775,6 @@ static void get_object_list(int ac, const char **av) struct setup_revision_opt s_r_opt = { .allow_exclude_promisor_objects = 1, }; - int flags = 0; repo_init_revisions(the_repository, &revs, NULL); save_commit_buffer = 0; @@ -3788,7 +3784,6 @@ static void get_object_list(int ac, const char **av) revs.stdin_handling = REV_INFO_STDIN_ALWAYS_READ; revs.handle_stdin_line = get_object_list_handle_stdin_line; - revs.stdin_line_priv = &flags; setup_revisions(ac, av, &revs, &s_r_opt); if (use_bitmap_index && !get_object_list_from_bitmap(&revs)) diff --git a/revision.c b/revision.c index 3f6ab834aff4a5..4164a4fcd1155a 100644 --- a/revision.c +++ b/revision.c @@ -2089,7 +2089,7 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl return 0; } -int handle_revision_arg(const char *arg, struct rev_info *revs, int flags, unsigned revarg_opt) +static int handle_revision_arg(const char *arg, struct rev_info *revs, int flags, unsigned revarg_opt) { int ret = handle_revision_arg_1(arg, revs, flags, revarg_opt); if (!ret) @@ -2139,7 +2139,7 @@ static void read_revisions_from_stdin(struct rev_info *revs, } die("options not supported in --stdin mode"); } - if (handle_revision_arg(sb.buf, revs, 0, + if (handle_revision_arg(sb.buf, revs, revs->revarg_flags, REVARG_CANNOT_BE_FILENAME)) die("bad revision '%s'", sb.buf); } diff --git a/revision.h b/revision.h index 93b6a02cf020ce..4d67c216c8b7fc 100644 --- a/revision.h +++ b/revision.h @@ -156,6 +156,9 @@ struct rev_info { * revision.c's normal processing of the line (after * possibly munging the provided strbuf). * + * Change "revarg_flags" to affect the subsequent handling + * in handle_revision_arg() + * * - Return REV_INFO_STDIN_LINE_CONTINUE to indicate that the * line is fully processed, moving onto the next line (if * any) @@ -164,6 +167,7 @@ struct rev_info { * around. */ rev_info_stdin_line_func handle_stdin_line; + int revarg_flags; void *stdin_line_priv; /* topo-sort */ @@ -421,8 +425,6 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx, const char * const usagestr[]); #define REVARG_CANNOT_BE_FILENAME 01 #define REVARG_COMMITTISH 02 -int handle_revision_arg(const char *arg, struct rev_info *revs, - int flags, unsigned revarg_opt); /** * Reset the flags used by the revision walking api. You can use this to do From 4afb5e196b8a687cc43d6295eb73d4d693407f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:04 +0200 Subject: [PATCH 029/198] fsck tests: refactor one test to use a sub-repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor one of the fsck tests to use a throwaway repository. It's a pervasive pattern in t1450-fsck.sh to spend a lot of effort on the teardown of a tests so we're not leaving corrupt content for the next test. We should instead simply use something like this test_create_repo pattern. It's both less verbose, and makes things easier to debug as a failing test can have their state left behind under -d without damaging the state for other tests. But let's punt on that general refactoring and just change this one test, I'm going to change it further in subsequent commits. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 5071ac63a5b51b..7becab5ba1e815 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -48,24 +48,22 @@ remove_object () { rm "$(sha1_file "$1")" } -test_expect_success 'object with bad sha1' ' - sha=$(echo blob | git hash-object -w --stdin) && - old=$(test_oid_to_path "$sha") && - new=$(dirname $old)/$(test_oid ff_2) && - sha="$(dirname $new)$(basename $new)" && - mv .git/objects/$old .git/objects/$new && - test_when_finished "remove_object $sha" && - git update-index --add --cacheinfo 100644 $sha foo && - test_when_finished "git read-tree -u --reset HEAD" && - tree=$(git write-tree) && - test_when_finished "remove_object $tree" && - cmt=$(echo bogus | git commit-tree $tree) && - test_when_finished "remove_object $cmt" && - git update-ref refs/heads/bogus $cmt && - test_when_finished "git update-ref -d refs/heads/bogus" && - - test_must_fail git fsck 2>out && - test_i18ngrep "$sha.*corrupt" out +test_expect_success 'object with hash mismatch' ' + git init --bare hash-mismatch && + ( + cd hash-mismatch && + oid=$(echo blob | git hash-object -w --stdin) && + old=$(test_oid_to_path "$oid") && + new=$(dirname $old)/$(test_oid ff_2) && + oid="$(dirname $new)$(basename $new)" && + mv objects/$old objects/$new && + git update-index --add --cacheinfo 100644 $oid foo && + tree=$(git write-tree) && + cmt=$(echo bogus | git commit-tree $tree) && + git update-ref refs/heads/bogus $cmt && + test_must_fail git fsck 2>out && + test_i18ngrep "$oid.*corrupt" out + ) ' test_expect_success 'branch pointing to non-commit' ' From 94fc8edb87c0ab2b7dac46c0341de063c429cbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:05 +0200 Subject: [PATCH 030/198] fsck tests: add test for fsck-ing an unknown type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a blindspot in the fsck tests by checking what we do when we encounter an unknown "garbage" type produced with hash-object's --literally option. This behavior needs to be improved, which'll be done in subsequent patches, but for now let's test for the current behavior. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 7becab5ba1e815..f10d6f7b7e8c7f 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -863,4 +863,16 @@ test_expect_success 'detect corrupt index file in fsck' ' test_i18ngrep "bad index file" errors ' +test_expect_success 'fsck hard errors on an invalid object type' ' + git init --bare garbage-type && + empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob err.expect <<-\EOF && + fatal: invalid object type + EOF + test_must_fail git -C garbage-type fsck >out.actual 2>err.actual && + test_cmp err.expect err.actual && + test_must_be_empty out.actual +' + test_done From e70fb77a00f128229b2511c77a85d64242c5d43e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:06 +0200 Subject: [PATCH 031/198] cat-file tests: test for missing object with -t and -s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test for what happens when the -t and -s flags are asked to operate on a missing object, this extends tests added in 3e370f9faf0 (t1006: add tests for git cat-file --allow-unknown-type, 2015-05-03). The -t and -s flags are the only ones that can be combined with --allow-unknown-type, so let's test with and without that flag. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1006-cat-file.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 5d2dc99b74ad00..b71ef94329e58f 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -315,6 +315,33 @@ test_expect_success '%(deltabase) reports packed delta bases' ' } ' +missing_oid=$(test_oid deadbeef) +test_expect_success 'error on type of missing object' ' + cat >expect.err <<-\EOF && + fatal: git cat-file: could not get object info + EOF + test_must_fail git cat-file -t $missing_oid >out 2>err && + test_must_be_empty out && + test_cmp expect.err err && + + test_must_fail git cat-file -t --allow-unknown-type $missing_oid >out 2>err && + test_must_be_empty out && + test_cmp expect.err err +' + +test_expect_success 'error on size of missing object' ' + cat >expect.err <<-\EOF && + fatal: git cat-file: could not get object info + EOF + test_must_fail git cat-file -s $missing_oid >out 2>err && + test_must_be_empty out && + test_cmp expect.err err && + + test_must_fail git cat-file -s --allow-unknown-type $missing_oid >out 2>err && + test_must_be_empty out && + test_cmp expect.err err +' + bogus_type="bogus" bogus_content="bogus" bogus_size=$(strlen "$bogus_content") From 9194dc15a9d9ae3e5e1b2e586ffafef3ae226066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:07 +0200 Subject: [PATCH 032/198] cat-file tests: test that --allow-unknown-type isn't on by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a blindspot in the tests for the --allow-unknown-type feature added in 39e4ae38804 (cat-file: teach cat-file a '--allow-unknown-type' option, 2015-05-03). We should check that --allow-unknown-type isn't on by default. Before this change all the tests would succeed if --allow-unknown-type was on by default, let's fix that by asserting that -t and -s die on a "garbage" type without --allow-unknown-type. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1006-cat-file.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index b71ef94329e58f..dc01d7c4a9a9d5 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -347,6 +347,20 @@ bogus_content="bogus" bogus_size=$(strlen "$bogus_content") bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) +test_expect_success 'die on broken object under -t and -s without --allow-unknown-type' ' + cat >err.expect <<-\EOF && + fatal: invalid object type + EOF + + test_must_fail git cat-file -t $bogus_sha1 >out.actual 2>err.actual && + test_cmp err.expect err.actual && + test_must_be_empty out.actual && + + test_must_fail git cat-file -s $bogus_sha1 >out.actual 2>err.actual && + test_cmp err.expect err.actual && + test_must_be_empty out.actual +' + test_expect_success "Type of broken object is correct" ' echo $bogus_type >expect && git cat-file -t --allow-unknown-type $bogus_sha1 >actual && @@ -363,6 +377,21 @@ bogus_content="bogus" bogus_size=$(strlen "$bogus_content") bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) +test_expect_success 'die on broken object with large type under -t and -s without --allow-unknown-type' ' + cat >err.expect <<-EOF && + error: unable to unpack $bogus_sha1 header + fatal: git cat-file: could not get object info + EOF + + test_must_fail git cat-file -t $bogus_sha1 >out.actual 2>err.actual && + test_cmp err.expect err.actual && + test_must_be_empty out.actual && + + test_must_fail git cat-file -s $bogus_sha1 >out.actual 2>err.actual && + test_cmp err.expect err.actual && + test_must_be_empty out.actual +' + test_expect_success "Type of broken object is correct when type is large" ' echo $bogus_type >expect && git cat-file -t --allow-unknown-type $bogus_sha1 >actual && From 5c8b7ba104a409f341bf3759744a7f480925ac4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:08 +0200 Subject: [PATCH 033/198] rev-list tests: test for behavior with invalid object types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a blindspot in the tests for the "rev-list --disk-usage" feature added in 16950f8384a (rev-list: add --disk-usage option for calculating disk usage, 2021-02-09) to test for what happens when it's asked to calculate the disk usage of invalid object types. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t6115-rev-list-du.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t6115-rev-list-du.sh b/t/t6115-rev-list-du.sh index b4aef32b713ca0..edb2ed5584692b 100755 --- a/t/t6115-rev-list-du.sh +++ b/t/t6115-rev-list-du.sh @@ -48,4 +48,15 @@ check_du HEAD check_du --objects HEAD check_du --objects HEAD^..HEAD +test_expect_success 'setup garbage repository' ' + git clone --bare . garbage.git && + garbage_oid=$(git -C garbage.git hash-object -t garbage -w --stdin --literally garbage.git/refs/tags/garbage-tag && + test_must_fail git -C garbage.git rev-list --objects --all --disk-usage +' + test_done From 46de7a03e36a8880bdc627d7772e7a6ce56a5ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:09 +0200 Subject: [PATCH 034/198] cat-file tests: add corrupt loose object test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a blindspot in the tests for "cat-file" (and by proxy, the guts of object-file.c) by testing that when we can't decode a loose object with zlib we'll emit an error from zlib.c. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1006-cat-file.sh | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index dc01d7c4a9a9d5..7f10a92f0e46d3 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -404,6 +404,58 @@ test_expect_success "Size of large broken object is correct when type is large" test_cmp expect actual ' +test_expect_success 'cat-file -t and -s on corrupt loose object' ' + git init --bare corrupt-loose.git && + ( + cd corrupt-loose.git && + + # Setup and create the empty blob and its path + empty_path=$(git rev-parse --git-path objects/$(test_oid_to_path "$EMPTY_BLOB")) && + git hash-object -w --stdin other.blob && + other_blob=$(git hash-object -w --stdin out.expect <<-EOF && + 0 + EOF + git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual && + test_must_be_empty err.actual && + test_cmp out.expect out.actual && + + # Swap the two to corrupt the repository + mv -f "$other_path" "$empty_path" && + test_must_fail git fsck 2>err.fsck && + grep "hash mismatch" err.fsck && + + # confirm that cat-file is reading the new swapped-in + # blob... + cat >out.expect <<-EOF && + blob + EOF + git cat-file -t "$EMPTY_BLOB" >out.actual 2>err.actual && + test_must_be_empty err.actual && + test_cmp out.expect out.actual && + + # ... since it has a different size now. + cat >out.expect <<-EOF && + 6 + EOF + git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual && + test_must_be_empty err.actual && + test_cmp out.expect out.actual && + + # So far "cat-file" has been happy to spew the found + # content out as-is. Try to make it zlib-invalid. + mv -f other.blob "$empty_path" && + test_must_fail git fsck 2>err.fsck && + grep "^error: inflate: data stream error (" err.fsck + ) +' + # Tests for git cat-file --follow-symlinks test_expect_success 'prep for symlink tests' ' echo_without_newline "$hello_content" >morx && From 68d78db3dbd38ba49581039bd7ad9c06d83e3be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:10 +0200 Subject: [PATCH 035/198] cat-file tests: test for current --allow-unknown-type behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add more tests for the current --allow-unknown-type behavior. As noted in [1] I don't think much of this makes sense, but let's test for it as-is so we can see if the behavior changes in the future. 1. https://lore.kernel.org/git/87r1i4qf4h.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1006-cat-file.sh | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 7f10a92f0e46d3..86fd2a90ca773d 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -361,6 +361,46 @@ test_expect_success 'die on broken object under -t and -s without --allow-unknow test_must_be_empty out.actual ' +test_expect_success '-e is OK with a broken object without --allow-unknown-type' ' + git cat-file -e $bogus_sha1 +' + +test_expect_success '-e can not be combined with --allow-unknown-type' ' + test_expect_code 128 git cat-file -e --allow-unknown-type $bogus_sha1 +' + +test_expect_success '-p cannot print a broken object even with --allow-unknown-type' ' + test_must_fail git cat-file -p $bogus_sha1 && + test_expect_code 128 git cat-file -p --allow-unknown-type $bogus_sha1 +' + +test_expect_success ' does not work with objects of broken types' ' + cat >err.expect <<-\EOF && + fatal: invalid object type "bogus" + EOF + test_must_fail git cat-file $bogus_type $bogus_sha1 2>err.actual && + test_cmp err.expect err.actual +' + +test_expect_success 'broken types combined with --batch and --batch-check' ' + echo $bogus_sha1 >bogus-oid && + + cat >err.expect <<-\EOF && + fatal: invalid object type + EOF + + test_must_fail git cat-file --batch err.actual && + test_cmp err.expect err.actual && + + test_must_fail git cat-file --batch-check err.actual && + test_cmp err.expect err.actual +' + +test_expect_success 'the --batch and --batch-check options do not combine with --allow-unknown-type' ' + test_expect_code 128 git cat-file --batch --allow-unknown-type expect && git cat-file -t --allow-unknown-type $bogus_sha1 >actual && @@ -372,6 +412,27 @@ test_expect_success "Size of broken object is correct" ' git cat-file -s --allow-unknown-type $bogus_sha1 >actual && test_cmp expect actual ' + +test_expect_success 'the --allow-unknown-type option does not consider replacement refs' ' + cat >expect <<-EOF && + $bogus_type + EOF + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual && + + # Create it manually, as "git replace" will die on bogus + # types. + head=$(git rev-parse --verify HEAD) && + mkdir -p .git/refs/replace && + echo $head >.git/refs/replace/$bogus_sha1 && + + cat >expect <<-EOF && + commit + EOF + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + bogus_type="abcdefghijklmnopqrstuvwxyz1234679" bogus_content="bogus" bogus_size=$(strlen "$bogus_content") From 91e194f0e5cc9cf13c593c4efefee4550fe54f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:11 +0200 Subject: [PATCH 036/198] cache.h: move object functions to object-store.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the declaration of some ancient object functions added in e.g. c4483576b8d (Add "unpack_sha1_header()" helper function, 2005-06-01) from cache.h to object-store.h. This continues work started in cbd53a2193d (object-store: move object access functions to object-store.h, 2018-05-15). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- cache.h | 10 ---------- object-store.h | 9 +++++++++ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cache.h b/cache.h index ba04ff8bd36b36..32ea1ea0474fea 100644 --- a/cache.h +++ b/cache.h @@ -1302,16 +1302,6 @@ char *xdg_cache_home(const char *filename); int git_open_cloexec(const char *name, int flags); #define git_open(name) git_open_cloexec(name, O_RDONLY) -int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); -int parse_loose_header(const char *hdr, unsigned long *sizep); - -int check_object_signature(struct repository *r, const struct object_id *oid, - void *buf, unsigned long size, const char *type); - -int finalize_object_file(const char *tmpfile, const char *filename); - -/* Helper to check and "touch" a file */ -int check_and_freshen_file(const char *fn, int freshen); extern const signed char hexval_table[256]; static inline unsigned int hexval(unsigned char c) diff --git a/object-store.h b/object-store.h index ec32c23dcb5615..9117115a50c6df 100644 --- a/object-store.h +++ b/object-store.h @@ -477,4 +477,13 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(each_packed_object_fn, void *, enum for_each_object_flags flags); +int unpack_loose_header(git_zstream *stream, unsigned char *map, + unsigned long mapsize, void *buffer, + unsigned long bufsiz); +int parse_loose_header(const char *hdr, unsigned long *sizep); +int check_object_signature(struct repository *r, const struct object_id *oid, + void *buf, unsigned long size, const char *type); +int finalize_object_file(const char *tmpfile, const char *filename); +int check_and_freshen_file(const char *fn, int freshen); + #endif /* OBJECT_STORE_H */ From a4189220254cb0a226046dc4c663cbb6700932c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:12 +0200 Subject: [PATCH 037/198] object-file.c: don't set "typep" when returning non-zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the loose_object_info() function returns an error stop faking up the "oi->typep" to OBJ_BAD. Let the return value of the function itself suffice. This code cleanup simplifies subsequent changes. That we set this at all is a relic from the past. Before 052fe5eaca9 (sha1_loose_object_info: make type lookup optional, 2013-07-12) we would always return the type_from_string(type) via the parse_sha1_header() function, or -1 (i.e. OBJ_BAD) if we couldn't parse it. Then in a combination of 46f034483eb (sha1_file: support reading from a loose object of unknown type, 2015-05-03) and b3ea7dd32d6 (sha1_loose_object_info: handle errors from unpack_sha1_rest, 2017-10-05) our API drifted even further towards conflating the two again. Having read the code paths involved carefully I think this is OK. We are just about to return -1, and we have only one caller: do_oid_object_info_extended(). That function will in turn go on to return -1 when we return -1 here. This might be introducing a subtle bug where a caller of oid_object_info_extended() would inspect its "typep" and expect a meaningful value if the function returned -1. Such a problem would not occur for its simpler oid_object_info() sister function. That one always returns the "enum object_type", which in the case of -1 would be the OBJ_BAD. Having read the code for all the callers of these functions I don't believe any such bug is being introduced here, and in any case we'd likely already have such a bug for the "sizep" member (although blindly checking "typep" first would be a more common case). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/object-file.c b/object-file.c index f233b440b22c06..9210e2e6fe4738 100644 --- a/object-file.c +++ b/object-file.c @@ -1480,8 +1480,6 @@ static int loose_object_info(struct repository *r, git_inflate_end(&stream); munmap(map, mapsize); - if (status && oi->typep) - *oi->typep = status; if (oi->sizep == &size_scratch) oi->sizep = NULL; strbuf_release(&hdrbuf); From a0ef529a656b99432b275020fade43945f62ad21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:13 +0200 Subject: [PATCH 038/198] object-file.c: make parse_loose_header_extended() public MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the parse_loose_header_extended() function public and remove the parse_loose_header() wrapper. The only direct user of it outside of object-file.c itself was in streaming.c, that caller can simply pass the required "struct object-info *" instead. This change is being done in preparation for teaching read_loose_object() to accept a flag to pass to parse_loose_header(). It isn't strictly necessary for that change, we could simply use parse_loose_header_extended() there, but will leave the API in a better end state. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 21 ++++++++------------- object-store.h | 3 ++- streaming.c | 5 ++++- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/object-file.c b/object-file.c index 9210e2e6fe4738..e0ba1842272003 100644 --- a/object-file.c +++ b/object-file.c @@ -1340,8 +1340,9 @@ static void *unpack_loose_rest(git_zstream *stream, * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_loose_header_extended(const char *hdr, struct object_info *oi, - unsigned int flags) +int parse_loose_header(const char *hdr, + struct object_info *oi, + unsigned int flags) { const char *type_buf = hdr; unsigned long size; @@ -1401,14 +1402,6 @@ static int parse_loose_header_extended(const char *hdr, struct object_info *oi, return *hdr ? -1 : type; } -int parse_loose_header(const char *hdr, unsigned long *sizep) -{ - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = sizep; - return parse_loose_header_extended(hdr, &oi, 0); -} - static int loose_object_info(struct repository *r, const struct object_id *oid, struct object_info *oi, int flags) @@ -1463,10 +1456,10 @@ static int loose_object_info(struct repository *r, if (status < 0) ; /* Do nothing */ else if (hdrbuf.len) { - if ((status = parse_loose_header_extended(hdrbuf.buf, oi, flags)) < 0) + if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0) status = error(_("unable to parse %s header with --allow-unknown-type"), oid_to_hex(oid)); - } else if ((status = parse_loose_header_extended(hdr, oi, flags)) < 0) + } else if ((status = parse_loose_header(hdr, oi, flags)) < 0) status = error(_("unable to parse %s header"), oid_to_hex(oid)); if (status >= 0 && oi->contentp) { @@ -2547,6 +2540,8 @@ int read_loose_object(const char *path, unsigned long mapsize; git_zstream stream; char hdr[MAX_HEADER_LEN]; + struct object_info oi = OBJECT_INFO_INIT; + oi.sizep = size; *contents = NULL; @@ -2561,7 +2556,7 @@ int read_loose_object(const char *path, goto out; } - *type = parse_loose_header(hdr, size); + *type = parse_loose_header(hdr, &oi, 0); if (*type < 0) { error(_("unable to parse header of %s"), path); git_inflate_end(&stream); diff --git a/object-store.h b/object-store.h index 9117115a50c6df..d443964447c5ca 100644 --- a/object-store.h +++ b/object-store.h @@ -480,7 +480,8 @@ int for_each_packed_object(each_packed_object_fn, void *, int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); -int parse_loose_header(const char *hdr, unsigned long *sizep); +int parse_loose_header(const char *hdr, struct object_info *oi, + unsigned int flags); int check_object_signature(struct repository *r, const struct object_id *oid, void *buf, unsigned long size, const char *type); int finalize_object_file(const char *tmpfile, const char *filename); diff --git a/streaming.c b/streaming.c index 5f480ad50c415c..8beac62cbb7103 100644 --- a/streaming.c +++ b/streaming.c @@ -223,6 +223,9 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { + struct object_info oi = OBJECT_INFO_INIT; + oi.sizep = &st->size; + st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); if (!st->u.loose.mapped) return -1; @@ -231,7 +234,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, st->u.loose.mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr)) < 0) || - (parse_loose_header(st->u.loose.hdr, &st->size) < 0)) { + (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) { git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); return -1; From 9e35d955d8b133610ae8dde528d478b4779e8f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:14 +0200 Subject: [PATCH 039/198] object-file.c: add missing braces to loose_object_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the formatting in loose_object_info() to conform with our usual coding style: When there are multiple arms to a conditional and some of them require braces, enclose even a single line block in braces for consistency -- Documentation/CodingGuidelines This formatting-only change makes a subsequent commit easier to read. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index e0ba1842272003..646ca7f85d6aa8 100644 --- a/object-file.c +++ b/object-file.c @@ -1450,17 +1450,20 @@ static int loose_object_info(struct repository *r, if (unpack_loose_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) status = error(_("unable to unpack %s header with --allow-unknown-type"), oid_to_hex(oid)); - } else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) + } else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { status = error(_("unable to unpack %s header"), oid_to_hex(oid)); - if (status < 0) - ; /* Do nothing */ - else if (hdrbuf.len) { + } + + if (status < 0) { + /* Do nothing */ + } else if (hdrbuf.len) { if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0) status = error(_("unable to parse %s header with --allow-unknown-type"), oid_to_hex(oid)); - } else if ((status = parse_loose_header(hdr, oi, flags)) < 0) + } else if ((status = parse_loose_header(hdr, oi, flags)) < 0) { status = error(_("unable to parse %s header"), oid_to_hex(oid)); + } if (status >= 0 && oi->contentp) { *oi->contentp = unpack_loose_rest(&stream, hdr, @@ -1469,8 +1472,9 @@ static int loose_object_info(struct repository *r, git_inflate_end(&stream); status = -1; } - } else + } else { git_inflate_end(&stream); + } munmap(map, mapsize); if (oi->sizep == &size_scratch) From 4d77c12d20cf1b3708cc1697713a3874a877b5dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:15 +0200 Subject: [PATCH 040/198] object-file.c: simplify unpack_loose_short_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine the unpack_loose_short_header(), unpack_loose_header_to_strbuf() and unpack_loose_header() functions into one. The unpack_loose_header_to_strbuf() function was added in 46f034483eb (sha1_file: support reading from a loose object of unknown type, 2015-05-03). Its code was mostly copy/pasted between it and both of unpack_loose_header() and unpack_loose_short_header(). We now have a single unpack_loose_header() function which accepts an optional "struct strbuf *" instead. I think the remaining unpack_loose_header() function could be further simplified, we're carrying some complexity just to be able to emit a garbage type longer than MAX_HEADER_LEN, we could alternatively just say "we found a garbage type ..." instead. But let's leave the current behavior in place for now. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 60 ++++++++++++++++++-------------------------------- object-store.h | 14 +++++++++++- streaming.c | 3 ++- 3 files changed, 37 insertions(+), 40 deletions(-) diff --git a/object-file.c b/object-file.c index 646ca7f85d6aa8..ef3a1517fed220 100644 --- a/object-file.c +++ b/object-file.c @@ -1210,11 +1210,12 @@ void *map_loose_object(struct repository *r, return map_loose_object_1(r, NULL, oid, size); } -static int unpack_loose_short_header(git_zstream *stream, - unsigned char *map, unsigned long mapsize, - void *buffer, unsigned long bufsiz) +int unpack_loose_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz, + struct strbuf *header) { - int ret; + int status; /* Get the data stream */ memset(stream, 0, sizeof(*stream)); @@ -1225,44 +1226,25 @@ static int unpack_loose_short_header(git_zstream *stream, git_inflate_init(stream); obj_read_unlock(); - ret = git_inflate(stream, 0); + status = git_inflate(stream, 0); obj_read_lock(); - - return ret; -} - -int unpack_loose_header(git_zstream *stream, - unsigned char *map, unsigned long mapsize, - void *buffer, unsigned long bufsiz) -{ - int status = unpack_loose_short_header(stream, map, mapsize, - buffer, bufsiz); - if (status < Z_OK) return status; - /* Make sure we have the terminating NUL */ - if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) - return -1; - return 0; -} - -static int unpack_loose_header_to_strbuf(git_zstream *stream, unsigned char *map, - unsigned long mapsize, void *buffer, - unsigned long bufsiz, struct strbuf *header) -{ - int status; - - status = unpack_loose_short_header(stream, map, mapsize, buffer, bufsiz); - if (status < Z_OK) - return -1; - /* * Check if entire header is unpacked in the first iteration. */ if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) return 0; + /* + * We have a header longer than MAX_HEADER_LEN. The "header" + * here is only non-NULL when we run "cat-file + * --allow-unknown-type". + */ + if (!header) + return -1; + /* * buffer[0..bufsiz] was not large enough. Copy the partial * result out to header, and then append the result of further @@ -1410,9 +1392,11 @@ static int loose_object_info(struct repository *r, unsigned long mapsize; void *map; git_zstream stream; + int hdr_ret; char hdr[MAX_HEADER_LEN]; struct strbuf hdrbuf = STRBUF_INIT; unsigned long size_scratch; + int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE; if (oi->delta_base_oid) oidclr(oi->delta_base_oid); @@ -1446,11 +1430,10 @@ static int loose_object_info(struct repository *r, if (oi->disk_sizep) *oi->disk_sizep = mapsize; - if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) { - if (unpack_loose_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) - status = error(_("unable to unpack %s header with --allow-unknown-type"), - oid_to_hex(oid)); - } else if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { + + hdr_ret = unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr), + allow_unknown ? &hdrbuf : NULL); + if (hdr_ret < 0) { status = error(_("unable to unpack %s header"), oid_to_hex(oid)); } @@ -2555,7 +2538,8 @@ int read_loose_object(const char *path, goto out; } - if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { + if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr), + NULL) < 0) { error(_("unable to unpack header of %s"), path); goto out; } diff --git a/object-store.h b/object-store.h index d443964447c5ca..31327a7f6c3271 100644 --- a/object-store.h +++ b/object-store.h @@ -477,9 +477,21 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(each_packed_object_fn, void *, enum for_each_object_flags flags); +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns 0 on success. Returns negative values on error. + * + * It will only parse up to MAX_HEADER_LEN bytes unless an optional + * "hdrbuf" argument is non-NULL. This is intended for use with + * OBJECT_INFO_ALLOW_UNKNOWN_TYPE to extract the bad type for (error) + * reporting. The full header will be extracted to "hdrbuf" for use + * with parse_loose_header(). + */ int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, - unsigned long bufsiz); + unsigned long bufsiz, struct strbuf *hdrbuf); int parse_loose_header(const char *hdr, struct object_info *oi, unsigned int flags); int check_object_signature(struct repository *r, const struct object_id *oid, diff --git a/streaming.c b/streaming.c index 8beac62cbb7103..cb3c3cf6ff62f6 100644 --- a/streaming.c +++ b/streaming.c @@ -233,7 +233,8 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, st->u.loose.mapped, st->u.loose.mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr)) < 0) || + sizeof(st->u.loose.hdr), + NULL) < 0) || (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) { git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); From 9d89f44165b6c4a6265788be4e88410047eb110e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:16 +0200 Subject: [PATCH 041/198] object-file.c: split up ternary in parse_loose_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This minor formatting change serves to make a subsequent patch easier to read. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index ef3a1517fed220..e51cf2ca33efcd 100644 --- a/object-file.c +++ b/object-file.c @@ -1381,7 +1381,10 @@ int parse_loose_header(const char *hdr, /* * The length must be followed by a zero byte */ - return *hdr ? -1 : type; + if (*hdr) + return -1; + + return type; } static int loose_object_info(struct repository *r, From c8be48542e18a713931919f166613dc749a69cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:17 +0200 Subject: [PATCH 042/198] object-file.c: stop dying in parse_loose_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start the libification of parse_loose_header() by making it return error codes and data instead of invoking die() by itself. For now we'll move the relevant die() call to loose_object_info() and read_loose_object() to keep this change smaller, but in subsequent commits we'll also libify those. Since the refactoring of parse_loose_header_extended() into parse_loose_header() in an earlier commit, its interface accepts a "unsigned long *sizep". Rather it accepts a "struct object_info *", that structure will be populated with information about the object. It thus makes sense to further libify the interface so that it stops calling die() when it encounters OBJ_BAD, and instead rely on its callers to check the populated "oi->typep". Because of this we don't need to pass in the "unsigned int flags" which we used for OBJECT_INFO_ALLOW_UNKNOWN_TYPE, we can instead do that check in loose_object_info(). This also refactors some confusing control flow around the "status" variable. In some cases we set it to the return value of "error()", i.e. -1, and later checked if "status < 0" was true. In another case added in c84a1f3ed4d (sha1_file: refactor read_object, 2017-06-21) (but the behavior pre-dated that) we did checks of "status >= 0", because at that point "status" had become the return value of parse_loose_header(). I.e. a non-negative "enum object_type" (unless we -1, aka. OBJ_BAD). Now that parse_loose_header() will return 0 on success instead of the type (which it'll stick into the "struct object_info") we don't need to conflate these two cases in its callers. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 53 ++++++++++++++++++++++++++------------------------ object-store.h | 13 +++++++++++-- streaming.c | 4 +++- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/object-file.c b/object-file.c index e51cf2ca33efcd..31263335af9f53 100644 --- a/object-file.c +++ b/object-file.c @@ -1322,9 +1322,7 @@ static void *unpack_loose_rest(git_zstream *stream, * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, - struct object_info *oi, - unsigned int flags) +int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; unsigned long size; @@ -1346,15 +1344,6 @@ int parse_loose_header(const char *hdr, type = type_from_string_gently(type_buf, type_len, 1); if (oi->type_name) strbuf_add(oi->type_name, type_buf, type_len); - /* - * Set type to 0 if its an unknown object and - * we're obtaining the type using '--allow-unknown-type' - * option. - */ - if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0)) - type = 0; - else if (type < 0) - die(_("invalid object type")); if (oi->typep) *oi->typep = type; @@ -1384,7 +1373,11 @@ int parse_loose_header(const char *hdr, if (*hdr) return -1; - return type; + /* + * The format is valid, but the type may still be bogus. The + * Caller needs to check its oi->typep. + */ + return 0; } static int loose_object_info(struct repository *r, @@ -1399,6 +1392,8 @@ static int loose_object_info(struct repository *r, char hdr[MAX_HEADER_LEN]; struct strbuf hdrbuf = STRBUF_INIT; unsigned long size_scratch; + enum object_type type_scratch; + int parsed_header = 0; int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE; if (oi->delta_base_oid) @@ -1430,6 +1425,8 @@ static int loose_object_info(struct repository *r, if (!oi->sizep) oi->sizep = &size_scratch; + if (!oi->typep) + oi->typep = &type_scratch; if (oi->disk_sizep) *oi->disk_sizep = mapsize; @@ -1440,18 +1437,20 @@ static int loose_object_info(struct repository *r, status = error(_("unable to unpack %s header"), oid_to_hex(oid)); } - - if (status < 0) { - /* Do nothing */ - } else if (hdrbuf.len) { - if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0) - status = error(_("unable to parse %s header with --allow-unknown-type"), - oid_to_hex(oid)); - } else if ((status = parse_loose_header(hdr, oi, flags)) < 0) { - status = error(_("unable to parse %s header"), oid_to_hex(oid)); + if (!status) { + if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi)) + /* + * oi->{sizep,typep} are meaningless unless + * parse_loose_header() returns >= 0. + */ + parsed_header = 1; + else + status = error(_("unable to parse %s header"), oid_to_hex(oid)); } + if (!allow_unknown && parsed_header && *oi->typep < 0) + die(_("invalid object type")); - if (status >= 0 && oi->contentp) { + if (parsed_header && oi->contentp) { *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); if (!*oi->contentp) { @@ -1466,6 +1465,8 @@ static int loose_object_info(struct repository *r, if (oi->sizep == &size_scratch) oi->sizep = NULL; strbuf_release(&hdrbuf); + if (oi->typep == &type_scratch) + oi->typep = NULL; oi->whence = OI_LOOSE; return (status < 0) ? status : 0; } @@ -2531,6 +2532,7 @@ int read_loose_object(const char *path, git_zstream stream; char hdr[MAX_HEADER_LEN]; struct object_info oi = OBJECT_INFO_INIT; + oi.typep = type; oi.sizep = size; *contents = NULL; @@ -2547,12 +2549,13 @@ int read_loose_object(const char *path, goto out; } - *type = parse_loose_header(hdr, &oi, 0); - if (*type < 0) { + if (parse_loose_header(hdr, &oi) < 0) { error(_("unable to parse header of %s"), path); git_inflate_end(&stream); goto out; } + if (*type < 0) + die(_("invalid object type")); if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) diff --git a/object-store.h b/object-store.h index 31327a7f6c3271..65a8e4dc6a85bc 100644 --- a/object-store.h +++ b/object-store.h @@ -492,8 +492,17 @@ int for_each_packed_object(each_packed_object_fn, void *, int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz, struct strbuf *hdrbuf); -int parse_loose_header(const char *hdr, struct object_info *oi, - unsigned int flags); + +/** + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + */ +int parse_loose_header(const char *hdr, struct object_info *oi); + int check_object_signature(struct repository *r, const struct object_id *oid, void *buf, unsigned long size, const char *type); int finalize_object_file(const char *tmpfile, const char *filename); diff --git a/streaming.c b/streaming.c index cb3c3cf6ff62f6..c3dc241d6a5c2e 100644 --- a/streaming.c +++ b/streaming.c @@ -225,6 +225,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, { struct object_info oi = OBJECT_INFO_INIT; oi.sizep = &st->size; + oi.typep = type; st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); if (!st->u.loose.mapped) @@ -235,7 +236,8 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, st->u.loose.hdr, sizeof(st->u.loose.hdr), NULL) < 0) || - (parse_loose_header(st->u.loose.hdr, &oi, 0) < 0)) { + (parse_loose_header(st->u.loose.hdr, &oi) < 0) || + *type < 0) { git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); return -1; From b3f04915beb4c2049157b8c95d9ceb029a5e8b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:18 +0200 Subject: [PATCH 043/198] object-file.c: guard against future bugs in loose_object_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An earlier version of the preceding commit had a subtle bug where our "type_scratch" (later assigned to "oi->typep") would be uninitialized and used in the "!allow_unknown" case, at which point it would contain a nonsensical value if we'd failed to call parse_loose_header(). The preceding commit introduced "parsed_header" variable to check for this case, but I think we can do better, let's carry a "oi_header" variable initially set to NULL, and only set it to "oi" once we're past parse_loose_header(). This is functionally the same thing, but hopefully makes it even more obvious in the future that we must not access the "typep" and "sizep" (or "type_name") unless parse_loose_header() succeeds, but that accessing other fields set earlier (such as the "disk_sizep" set earlier) is OK. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 31263335af9f53..d41f444e6cc96b 100644 --- a/object-file.c +++ b/object-file.c @@ -1393,7 +1393,7 @@ static int loose_object_info(struct repository *r, struct strbuf hdrbuf = STRBUF_INIT; unsigned long size_scratch; enum object_type type_scratch; - int parsed_header = 0; + struct object_info *oi_header = NULL; int allow_unknown = flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE; if (oi->delta_base_oid) @@ -1441,18 +1441,20 @@ static int loose_object_info(struct repository *r, if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi)) /* * oi->{sizep,typep} are meaningless unless - * parse_loose_header() returns >= 0. + * parse_loose_header() returns >= 0. Let's + * access them as "oi_header" (just an alias + * for "oi") below to make that intent clear. */ - parsed_header = 1; + oi_header = oi; else status = error(_("unable to parse %s header"), oid_to_hex(oid)); } - if (!allow_unknown && parsed_header && *oi->typep < 0) + if (!allow_unknown && oi_header && *oi_header->typep < 0) die(_("invalid object type")); - if (parsed_header && oi->contentp) { + if (oi_header && oi->contentp) { *oi->contentp = unpack_loose_rest(&stream, hdr, - *oi->sizep, oid); + *oi_header->sizep, oid); if (!*oi->contentp) { git_inflate_end(&stream); status = -1; From b45bd7a52c9c485d9738db8058c8f445c613edec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:19 +0200 Subject: [PATCH 044/198] object-file.c: return -1, not "status" from unpack_loose_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return a -1 when git_inflate() fails instead of whatever Z_* status we'd get from zlib.c. This makes no difference to any error we report, but makes it more obvious that we don't care about the specific zlib error codes here. See d21f8426907 (unpack_sha1_header(): detect malformed object header, 2016-09-25) for the commit that added the "return status" code. As far as I can tell there was never a real reason (e.g. different reporting) for carrying down the "status" as opposed to "-1". At the time that d21f8426907 was written there was a corresponding "ret < Z_OK" check right after the unpack_sha1_header() call (the "unpack_sha1_header()" function was later rename to our current "unpack_loose_header()"). However, that check was removed in c84a1f3ed4d (sha1_file: refactor read_object, 2017-06-21) without changing the corresponding return code. So let's do the minor cleanup of also changing this function to return a -1. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index d41f444e6cc96b..956ca26051848e 100644 --- a/object-file.c +++ b/object-file.c @@ -1229,7 +1229,7 @@ int unpack_loose_header(git_zstream *stream, status = git_inflate(stream, 0); obj_read_lock(); if (status < Z_OK) - return status; + return -1; /* * Check if entire header is unpacked in the first iteration. From 211a82a896400b306d39de33f37cc89728d9e146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:20 +0200 Subject: [PATCH 045/198] object-file.c: return -2 on "header too long" in unpack_loose_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split up the return code for "header too long" from the generic negative return value unpack_loose_header() returns, and report via error() if we exceed MAX_HEADER_LEN. As a test added earlier in this series in t1006-cat-file.sh shows we'll correctly emit zlib errors from zlib.c already in this case, so we have no need to carry those return codes further down the stack. Let's instead just return -2 saying we ran into the MAX_HEADER_LEN limit, or other negative values for "unable to unpack header". I tried setting up an enum just for these three return values, but I think the result was less readable. Let's consider doing that if we gain even more return values. For now let's do the next best thing and enumerate our known return values, and BUG() if we encounter one we don't know about. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-file.c | 16 +++++++++++++--- object-store.h | 6 ++++-- t/t1006-cat-file.sh | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 956ca26051848e..1866115a1c5870 100644 --- a/object-file.c +++ b/object-file.c @@ -1243,7 +1243,7 @@ int unpack_loose_header(git_zstream *stream, * --allow-unknown-type". */ if (!header) - return -1; + return -2; /* * buffer[0..bufsiz] was not large enough. Copy the partial @@ -1264,7 +1264,7 @@ int unpack_loose_header(git_zstream *stream, stream->next_out = buffer; stream->avail_out = bufsiz; } while (status != Z_STREAM_END); - return -1; + return -2; } static void *unpack_loose_rest(git_zstream *stream, @@ -1433,9 +1433,19 @@ static int loose_object_info(struct repository *r, hdr_ret = unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr), allow_unknown ? &hdrbuf : NULL); - if (hdr_ret < 0) { + switch (hdr_ret) { + case 0: + break; + case -1: status = error(_("unable to unpack %s header"), oid_to_hex(oid)); + break; + case -2: + status = error(_("header for %s too long, exceeds %d bytes"), + oid_to_hex(oid), MAX_HEADER_LEN); + break; + default: + BUG("unknown hdr_ret value %d", hdr_ret); } if (!status) { if (!parse_loose_header(hdrbuf.len ? hdrbuf.buf : hdr, oi)) diff --git a/object-store.h b/object-store.h index 65a8e4dc6a85bc..1151ce8e8209a0 100644 --- a/object-store.h +++ b/object-store.h @@ -481,13 +481,15 @@ int for_each_packed_object(each_packed_object_fn, void *, * unpack_loose_header() initializes the data stream needed to unpack * a loose object header. * - * Returns 0 on success. Returns negative values on error. + * Returns 0 on success. Returns negative values on error. If the + * header exceeds MAX_HEADER_LEN -2 will be returned. * * It will only parse up to MAX_HEADER_LEN bytes unless an optional * "hdrbuf" argument is non-NULL. This is intended for use with * OBJECT_INFO_ALLOW_UNKNOWN_TYPE to extract the bad type for (error) * reporting. The full header will be extracted to "hdrbuf" for use - * with parse_loose_header(). + * with parse_loose_header(), -2 will still be returned from this + * function to indicate that the header was too long. */ int unpack_loose_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 86fd2a90ca773d..06d38e1fae6fca 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -440,7 +440,7 @@ bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_t test_expect_success 'die on broken object with large type under -t and -s without --allow-unknown-type' ' cat >err.expect <<-EOF && - error: unable to unpack $bogus_sha1 header + error: header for $bogus_sha1 too long, exceeds 32 bytes fatal: git cat-file: could not get object info EOF From 70241b6ebbb48608f360f648e88e523077531fbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:21 +0200 Subject: [PATCH 046/198] fsck: don't hard die on invalid object types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the error fsck emits on invalid object types, such as: $ git hash-object --stdin -w -t garbage --literally >From the very ungraceful error of: $ git fsck fatal: invalid object type $ To: $ git fsck error: hash mismatch for (expected ) error: : object corrupt or missing: [ the rest of the fsck output here, i.e. it didn't hard die ] We'll still exit with non-zero, but now we'll finish the rest of the traversal. The tests that's being added here asserts that we'll still complain about other fsck issues (e.g. an unrelated dangling blob). To do this we need to pass down the "OBJECT_INFO_ALLOW_UNKNOWN_TYPE" flag from read_loose_object() through to parse_loose_header(). Since the read_loose_object() function is only used in builtin/fsck.c we can simply change it. See f6371f92104 (sha1_file: add read_loose_object() function, 2017-01-13) for the introduction of read_loose_object(). Why are we complaining about a "hash mismatch" for an object of a type we don't know about? We shouldn't. This is the bare minimal change needed to not make fsck hard die on a repository that's been corrupted in this manner. In subsequent commits we'll teach fsck to recognize this particular type of corruption and emit a better error message. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/fsck.c | 3 ++- object-file.c | 11 ++++++++--- object-store.h | 3 ++- t/t1450-fsck.sh | 14 +++++++------- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 87a99b0108ea63..38b515deb690f3 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -600,7 +600,8 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data) void *contents; int eaten; - if (read_loose_object(path, oid, &type, &size, &contents) < 0) { + if (read_loose_object(path, oid, &type, &size, &contents, + OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) { errors_found |= ERROR_OBJECT; error(_("%s: object corrupt or missing: %s"), oid_to_hex(oid), path); diff --git a/object-file.c b/object-file.c index 1866115a1c5870..8fb55fc6f58f8a 100644 --- a/object-file.c +++ b/object-file.c @@ -2536,7 +2536,8 @@ int read_loose_object(const char *path, const struct object_id *expected_oid, enum object_type *type, unsigned long *size, - void **contents) + void **contents, + unsigned int oi_flags) { int ret = -1; void *map = NULL; @@ -2544,6 +2545,7 @@ int read_loose_object(const char *path, git_zstream stream; char hdr[MAX_HEADER_LEN]; struct object_info oi = OBJECT_INFO_INIT; + int allow_unknown = oi_flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE; oi.typep = type; oi.sizep = size; @@ -2566,8 +2568,11 @@ int read_loose_object(const char *path, git_inflate_end(&stream); goto out; } - if (*type < 0) - die(_("invalid object type")); + if (!allow_unknown && *type < 0) { + error(_("header for %s declares an unknown type"), path); + git_inflate_end(&stream); + goto out; + } if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) diff --git a/object-store.h b/object-store.h index 1151ce8e8209a0..94ff03072c143f 100644 --- a/object-store.h +++ b/object-store.h @@ -245,7 +245,8 @@ int read_loose_object(const char *path, const struct object_id *expected_oid, enum object_type *type, unsigned long *size, - void **contents); + void **contents, + unsigned int oi_flags); /* Retry packed storage after checking packed and loose storage */ #define HAS_OBJECT_RECHECK_PACKED 1 diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index f10d6f7b7e8c7f..d8303db9709d49 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -863,16 +863,16 @@ test_expect_success 'detect corrupt index file in fsck' ' test_i18ngrep "bad index file" errors ' -test_expect_success 'fsck hard errors on an invalid object type' ' +test_expect_success 'fsck error and recovery on invalid object type' ' git init --bare garbage-type && empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob err.expect <<-\EOF && - fatal: invalid object type - EOF - test_must_fail git -C garbage-type fsck >out.actual 2>err.actual && - test_cmp err.expect err.actual && - test_must_be_empty out.actual + test_must_fail git -C garbage-type fsck >out 2>err && + grep -e "^error" -e "^fatal" err >errors && + test_line_count = 2 errors && + grep "error: hash mismatch for" err && + grep "$garbage_blob: object corrupt or missing:" err && + grep "dangling blob $empty_blob" out ' test_done From c2a9c37085070449c28b70e643d0627f4d54b1a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:22 +0200 Subject: [PATCH 047/198] object-store.h: move read_loose_object() below 'struct object_info' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the declaration of read_loose_object() below "struct object_info". In the next commit we'll add a "struct object_info *" parameter to it, moving it will avoid a forward declaration of the struct. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-store.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/object-store.h b/object-store.h index 94ff03072c143f..72d668b1674995 100644 --- a/object-store.h +++ b/object-store.h @@ -234,20 +234,6 @@ int pretend_object_file(void *, unsigned long, enum object_type, int force_object_loose(const struct object_id *oid, time_t mtime); -/* - * Open the loose object at path, check its hash, and return the contents, - * type, and size. If the object is a blob, then "contents" may return NULL, - * to allow streaming of large blobs. - * - * Returns 0 on success, negative on error (details may be written to stderr). - */ -int read_loose_object(const char *path, - const struct object_id *expected_oid, - enum object_type *type, - unsigned long *size, - void **contents, - unsigned int oi_flags); - /* Retry packed storage after checking packed and loose storage */ #define HAS_OBJECT_RECHECK_PACKED 1 @@ -388,6 +374,20 @@ int oid_object_info_extended(struct repository *r, const struct object_id *, struct object_info *, unsigned flags); +/* + * Open the loose object at path, check its hash, and return the contents, + * type, and size. If the object is a blob, then "contents" may return NULL, + * to allow streaming of large blobs. + * + * Returns 0 on success, negative on error (details may be written to stderr). + */ +int read_loose_object(const char *path, + const struct object_id *expected_oid, + enum object_type *type, + unsigned long *size, + void **contents, + unsigned int oi_flags); + /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: From 87290e0ceeb14e30758936ad922ccc580857c7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:23 +0200 Subject: [PATCH 048/198] fsck: report invalid types recorded in objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continue the work in the preceding commit and improve the error on: $ git hash-object --stdin -w -t garbage --literally (expected ) error: : object corrupt or missing: [ other fsck output ] To instead emit: $ git fsck error: : object is of unknown type 'garbage': [ other fsck output ] The complaint about a "hash mismatch" was simply an emergent property of how we'd fall though from read_loose_object() into fsck_loose() when we didn't get the data we expected. Now we'll correctly note that the object type is invalid. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/fsck.c | 22 ++++++++++++++++++---- object-file.c | 13 +++++-------- object-store.h | 4 ++-- t/t1450-fsck.sh | 24 +++++++++++++++++++++--- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 38b515deb690f3..32f11dc1fe65c6 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -599,12 +599,26 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data) unsigned long size; void *contents; int eaten; - - if (read_loose_object(path, oid, &type, &size, &contents, - OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) { - errors_found |= ERROR_OBJECT; + struct strbuf sb = STRBUF_INIT; + unsigned int oi_flags = OBJECT_INFO_ALLOW_UNKNOWN_TYPE; + struct object_info oi; + int found = 0; + oi.type_name = &sb; + oi.sizep = &size; + oi.typep = &type; + + if (read_loose_object(path, oid, &contents, &oi, oi_flags) < 0) { + found |= ERROR_OBJECT; error(_("%s: object corrupt or missing: %s"), oid_to_hex(oid), path); + } + if (type < 0) { + found |= ERROR_OBJECT; + error(_("%s: object is of unknown type '%s': %s"), + oid_to_hex(oid), sb.buf, path); + } + if (found) { + errors_found |= ERROR_OBJECT; return 0; /* keep checking other objects */ } diff --git a/object-file.c b/object-file.c index 8fb55fc6f58f8a..e550ea0c7cf6bf 100644 --- a/object-file.c +++ b/object-file.c @@ -2534,9 +2534,8 @@ static int check_stream_oid(git_zstream *stream, int read_loose_object(const char *path, const struct object_id *expected_oid, - enum object_type *type, - unsigned long *size, void **contents, + struct object_info *oi, unsigned int oi_flags) { int ret = -1; @@ -2544,10 +2543,9 @@ int read_loose_object(const char *path, unsigned long mapsize; git_zstream stream; char hdr[MAX_HEADER_LEN]; - struct object_info oi = OBJECT_INFO_INIT; int allow_unknown = oi_flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE; - oi.typep = type; - oi.sizep = size; + enum object_type *type = oi->typep; + unsigned long *size = oi->sizep; *contents = NULL; @@ -2563,7 +2561,7 @@ int read_loose_object(const char *path, goto out; } - if (parse_loose_header(hdr, &oi) < 0) { + if (parse_loose_header(hdr, oi) < 0) { error(_("unable to parse header of %s"), path); git_inflate_end(&stream); goto out; @@ -2585,8 +2583,7 @@ int read_loose_object(const char *path, goto out; } if (check_object_signature(the_repository, expected_oid, - *contents, *size, - type_name(*type))) { + *contents, *size, oi->type_name->buf)) { error(_("hash mismatch for %s (expected %s)"), path, oid_to_hex(expected_oid)); free(*contents); diff --git a/object-store.h b/object-store.h index 72d668b1674995..96a5970f314914 100644 --- a/object-store.h +++ b/object-store.h @@ -376,6 +376,7 @@ int oid_object_info_extended(struct repository *r, /* * Open the loose object at path, check its hash, and return the contents, + * use the "oi" argument to assert things about the object, or e.g. populate its * type, and size. If the object is a blob, then "contents" may return NULL, * to allow streaming of large blobs. * @@ -383,9 +384,8 @@ int oid_object_info_extended(struct repository *r, */ int read_loose_object(const char *path, const struct object_id *expected_oid, - enum object_type *type, - unsigned long *size, void **contents, + struct object_info *oi, unsigned int oi_flags); /* diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index d8303db9709d49..da2658155c7daa 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -66,6 +66,25 @@ test_expect_success 'object with hash mismatch' ' ) ' +test_expect_success 'object with hash and type mismatch' ' + git init --bare hash-type-mismatch && + ( + cd hash-type-mismatch && + oid=$(echo blob | git hash-object -w --stdin -t garbage --literally) && + old=$(test_oid_to_path "$oid") && + new=$(dirname $old)/$(test_oid ff_2) && + oid="$(dirname $new)$(basename $new)" && + mv objects/$old objects/$new && + git update-index --add --cacheinfo 100644 $oid foo && + tree=$(git write-tree) && + cmt=$(echo bogus | git commit-tree $tree) && + git update-ref refs/heads/bogus $cmt && + test_must_fail git fsck 2>out && + grep "^error: hash mismatch for " out && + grep "^error: $oid: object is of unknown type '"'"'garbage'"'"'" out + ) +' + test_expect_success 'branch pointing to non-commit' ' git rev-parse HEAD^{tree} >.git/refs/heads/invalid && test_when_finished "git update-ref -d refs/heads/invalid" && @@ -869,9 +888,8 @@ test_expect_success 'fsck error and recovery on invalid object type' ' garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally out 2>err && grep -e "^error" -e "^fatal" err >errors && - test_line_count = 2 errors && - grep "error: hash mismatch for" err && - grep "$garbage_blob: object corrupt or missing:" err && + test_line_count = 1 errors && + grep "$garbage_blob: object is of unknown type '"'"'garbage'"'"':" err && grep "dangling blob $empty_blob" out ' From 27150d03c4ae81eed1a3d636cae359474c82d1cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 10 Jul 2021 15:37:24 +0200 Subject: [PATCH 049/198] fsck: report invalid object type-path combinations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the error that's emitted in cases where we find a loose object we parse, but which isn't at the location we expect it to be. Before this change we'd prefix the error with a not-a-OID derived from the path at which the object was found, due to an emergent behavior in how we'd end up with an "OID" in these codepaths. Now we'll instead say what object we hashed, and what path it was found at. Before this patch series e.g.: $ git hash-object --stdin -w -t blob Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 2 +- builtin/fsck.c | 13 +++++++++---- builtin/index-pack.c | 2 +- builtin/mktag.c | 3 ++- object-file.c | 21 ++++++++++++--------- object-store.h | 4 +++- object.c | 4 ++-- pack-check.c | 3 ++- t/t1006-cat-file.sh | 2 +- t/t1450-fsck.sh | 8 +++++--- 10 files changed, 38 insertions(+), 24 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 3c20f164f0f051..48a3b6a7f8f01b 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -312,7 +312,7 @@ static void export_blob(const struct object_id *oid) if (!buf) die("could not read blob %s", oid_to_hex(oid)); if (check_object_signature(the_repository, oid, buf, size, - type_name(type)) < 0) + type_name(type), NULL) < 0) die("oid mismatch in blob %s", oid_to_hex(oid)); object = parse_object_buffer(the_repository, oid, type, size, buf, &eaten); diff --git a/builtin/fsck.c b/builtin/fsck.c index 32f11dc1fe65c6..96df1aadbfaa73 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -602,20 +602,25 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data) struct strbuf sb = STRBUF_INIT; unsigned int oi_flags = OBJECT_INFO_ALLOW_UNKNOWN_TYPE; struct object_info oi; + struct object_id real_oid = *null_oid(); int found = 0; oi.type_name = &sb; oi.sizep = &size; oi.typep = &type; - if (read_loose_object(path, oid, &contents, &oi, oi_flags) < 0) { + if (read_loose_object(path, oid, &real_oid, &contents, &oi, oi_flags) < 0) { found |= ERROR_OBJECT; - error(_("%s: object corrupt or missing: %s"), - oid_to_hex(oid), path); + if (!oideq(&real_oid, oid)) + error(_("%s: hash-path mismatch, found at: %s"), + oid_to_hex(&real_oid), path); + else + error(_("%s: object corrupt or missing: %s"), + oid_to_hex(oid), path); } if (type < 0) { found |= ERROR_OBJECT; error(_("%s: object is of unknown type '%s': %s"), - oid_to_hex(oid), sb.buf, path); + oid_to_hex(&real_oid), sb.buf, path); } if (found) { errors_found |= ERROR_OBJECT; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 3fbc5d70777e50..bf860b6555e2a0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1421,7 +1421,7 @@ static void fix_unresolved_deltas(struct hashfile *f) if (check_object_signature(the_repository, &d->oid, data, size, - type_name(type))) + type_name(type), NULL)) die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); /* diff --git a/builtin/mktag.c b/builtin/mktag.c index dddcccdd368328..3b2dbbb37e6fe1 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -62,7 +62,8 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type) repl = lookup_replace_object(the_repository, tagged_oid); ret = check_object_signature(the_repository, repl, - buffer, size, type_name(*tagged_type)); + buffer, size, type_name(*tagged_type), + NULL); free(buffer); return ret; diff --git a/object-file.c b/object-file.c index e550ea0c7cf6bf..923ff759e1914d 100644 --- a/object-file.c +++ b/object-file.c @@ -1039,9 +1039,11 @@ void *xmmap(void *start, size_t length, * the streaming interface and rehash it to do the same. */ int check_object_signature(struct repository *r, const struct object_id *oid, - void *map, unsigned long size, const char *type) + void *map, unsigned long size, const char *type, + struct object_id *real_oidp) { - struct object_id real_oid; + struct object_id tmp; + struct object_id *real_oid = real_oidp ? real_oidp : &tmp; enum object_type obj_type; struct git_istream *st; git_hash_ctx c; @@ -1049,8 +1051,8 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int hdrlen; if (map) { - hash_object_file(r->hash_algo, map, size, type, &real_oid); - return !oideq(oid, &real_oid) ? -1 : 0; + hash_object_file(r->hash_algo, map, size, type, real_oid); + return !oideq(oid, real_oid) ? -1 : 0; } st = open_istream(r, oid, &obj_type, &size, NULL); @@ -1075,9 +1077,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid, break; r->hash_algo->update_fn(&c, buf, readlen); } - r->hash_algo->final_oid_fn(&real_oid, &c); + r->hash_algo->final_oid_fn(real_oid, &c); close_istream(st); - return !oideq(oid, &real_oid) ? -1 : 0; + return !oideq(oid, real_oid) ? -1 : 0; } int git_open_cloexec(const char *name, int flags) @@ -2534,6 +2536,7 @@ static int check_stream_oid(git_zstream *stream, int read_loose_object(const char *path, const struct object_id *expected_oid, + struct object_id *real_oid, void **contents, struct object_info *oi, unsigned int oi_flags) @@ -2583,9 +2586,9 @@ int read_loose_object(const char *path, goto out; } if (check_object_signature(the_repository, expected_oid, - *contents, *size, oi->type_name->buf)) { - error(_("hash mismatch for %s (expected %s)"), path, - oid_to_hex(expected_oid)); + *contents, *size, oi->type_name->buf, real_oid)) { + if (oideq(real_oid, null_oid())) + BUG("should only get OID mismatch errors with mapped contents"); free(*contents); goto out; } diff --git a/object-store.h b/object-store.h index 96a5970f314914..9fc69016361e12 100644 --- a/object-store.h +++ b/object-store.h @@ -384,6 +384,7 @@ int oid_object_info_extended(struct repository *r, */ int read_loose_object(const char *path, const struct object_id *expected_oid, + struct object_id *real_oid, void **contents, struct object_info *oi, unsigned int oi_flags); @@ -507,7 +508,8 @@ int unpack_loose_header(git_zstream *stream, unsigned char *map, int parse_loose_header(const char *hdr, struct object_info *oi); int check_object_signature(struct repository *r, const struct object_id *oid, - void *buf, unsigned long size, const char *type); + void *buf, unsigned long size, const char *type, + struct object_id *real_oidp); int finalize_object_file(const char *tmpfile, const char *filename); int check_and_freshen_file(const char *fn, int freshen); diff --git a/object.c b/object.c index 14188453c56706..5467ead3285dd4 100644 --- a/object.c +++ b/object.c @@ -261,7 +261,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid) if ((obj && obj->type == OBJ_BLOB && repo_has_object_file(r, oid)) || (!obj && repo_has_object_file(r, oid) && oid_object_info(r, oid, NULL) == OBJ_BLOB)) { - if (check_object_signature(r, repl, NULL, 0, NULL) < 0) { + if (check_object_signature(r, repl, NULL, 0, NULL, NULL) < 0) { error(_("hash mismatch %s"), oid_to_hex(oid)); return NULL; } @@ -272,7 +272,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid) buffer = repo_read_object_file(r, oid, &type, &size); if (buffer) { if (check_object_signature(r, repl, buffer, size, - type_name(type)) < 0) { + type_name(type), NULL) < 0) { free(buffer); error(_("hash mismatch %s"), oid_to_hex(repl)); return NULL; diff --git a/pack-check.c b/pack-check.c index 4b089fe8ec051a..e6aa4442c905d4 100644 --- a/pack-check.c +++ b/pack-check.c @@ -142,7 +142,8 @@ static int verify_packfile(struct repository *r, err = error("cannot unpack %s from %s at offset %"PRIuMAX"", oid_to_hex(&oid), p->pack_name, (uintmax_t)entries[i].offset); - else if (check_object_signature(r, &oid, data, size, type_name(type))) + else if (check_object_signature(r, &oid, data, size, + type_name(type), NULL)) err = error("packed %s from %s is corrupt", oid_to_hex(&oid), p->pack_name); else if (fn) { diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 06d38e1fae6fca..72386cfec0efed 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -490,7 +490,7 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' ' # Swap the two to corrupt the repository mv -f "$other_path" "$empty_path" && test_must_fail git fsck 2>err.fsck && - grep "hash mismatch" err.fsck && + grep "hash-path mismatch" err.fsck && # confirm that cat-file is reading the new swapped-in # blob... diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index da2658155c7daa..7d0d57564b5048 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -53,6 +53,7 @@ test_expect_success 'object with hash mismatch' ' ( cd hash-mismatch && oid=$(echo blob | git hash-object -w --stdin) && + oldoid=$oid && old=$(test_oid_to_path "$oid") && new=$(dirname $old)/$(test_oid ff_2) && oid="$(dirname $new)$(basename $new)" && @@ -62,7 +63,7 @@ test_expect_success 'object with hash mismatch' ' cmt=$(echo bogus | git commit-tree $tree) && git update-ref refs/heads/bogus $cmt && test_must_fail git fsck 2>out && - test_i18ngrep "$oid.*corrupt" out + grep "$oldoid: hash-path mismatch, found at: .*$new" out ) ' @@ -71,6 +72,7 @@ test_expect_success 'object with hash and type mismatch' ' ( cd hash-type-mismatch && oid=$(echo blob | git hash-object -w --stdin -t garbage --literally) && + oldoid=$oid && old=$(test_oid_to_path "$oid") && new=$(dirname $old)/$(test_oid ff_2) && oid="$(dirname $new)$(basename $new)" && @@ -80,8 +82,8 @@ test_expect_success 'object with hash and type mismatch' ' cmt=$(echo bogus | git commit-tree $tree) && git update-ref refs/heads/bogus $cmt && test_must_fail git fsck 2>out && - grep "^error: hash mismatch for " out && - grep "^error: $oid: object is of unknown type '"'"'garbage'"'"'" out + grep "^error: $oldoid: hash-path mismatch, found at: .*$new" out && + grep "^error: $oldoid: object is of unknown type '"'"'garbage'"'"'" out ) ' From f6cd080bd2d6d6e721a52661a5da19f8835a01c3 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:22 +0000 Subject: [PATCH 050/198] diff --color-moved: add perf tests Add some tests so we can monitor changes to the performance of the move detection code. The tests record the performance of a single large diff and a sequence of smaller diffs. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- t/perf/p4002-diff-color-moved.sh | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 t/perf/p4002-diff-color-moved.sh diff --git a/t/perf/p4002-diff-color-moved.sh b/t/perf/p4002-diff-color-moved.sh new file mode 100755 index 00000000000000..ad56bcb71e4cd4 --- /dev/null +++ b/t/perf/p4002-diff-color-moved.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +test_description='Tests diff --color-moved performance' +. ./perf-lib.sh + +test_perf_default_repo + +if ! git rev-parse --verify v2.29.0^{commit} >/dev/null +then + skip_all='skipping because tag v2.29.0 was not found' + test_done +fi + +GIT_PAGER_IN_USE=1 +test_export GIT_PAGER_IN_USE + +test_perf 'diff --no-color-moved --no-color-moved-ws large change' ' + git diff --no-color-moved --no-color-moved-ws v2.28.0 v2.29.0 +' + +test_perf 'diff --color-moved --no-color-moved-ws large change' ' + git diff --color-moved=zebra --no-color-moved-ws v2.28.0 v2.29.0 +' + +test_perf 'diff --color-moved-ws=allow-indentation-change large change' ' + git diff --color-moved=zebra --color-moved-ws=allow-indentation-change \ + v2.28.0 v2.29.0 +' + +test_perf 'log --no-color-moved --no-color-moved-ws' ' + git log --no-color-moved --no-color-moved-ws --no-merges --patch \ + -n1000 v2.29.0 +' + +test_perf 'log --color-moved --no-color-moved-ws' ' + git log --color-moved=zebra --no-color-moved-ws --no-merges --patch \ + -n1000 v2.29.0 +' + +test_perf 'log --color-moved-ws=allow-indentation-change' ' + git log --color-moved=zebra --color-moved-ws=allow-indentation-change \ + --no-merges --patch -n1000 v2.29.0 +' + +test_done From 7a07e3ee608167fa9af0adc05a5861125d2d5bf4 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:23 +0000 Subject: [PATCH 051/198] diff --color-moved=zebra: fix alternate coloring b0a2ba4776 ("diff --color-moved=zebra: be stricter with color alternation", 2018-11-23) sought to avoid using the alternate colors unless there are two adjacent moved blocks of the same sign. Unfortunately it contains two bugs that prevented it from fixing the problem properly. Firstly `last_symbol` is reset at the start of each iteration of the loop losing the symbol of the last line and secondly when deciding whether to use the alternate color it should be checking if the current line is the same sign of the last line, not a different sign. The combination of the two errors means that we still use the alternate color when we should do but we also use it when we shouldn't. This is most noticable when using --color-moved-ws=allow-indentation-change with hunks like -this line gets indented + this line gets indented where the post image is colored with newMovedAlternate rather than newMoved. While this does not matter much, the next commit will change the coloring to be correct in this case, so lets fix the bug here to make it clear why the output is changing and add a regression test. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 4 +-- t/t4015-diff-whitespace.sh | 72 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index 52c791574b71c8..cb068f8258c039 100644 --- a/diff.c +++ b/diff.c @@ -1142,6 +1142,7 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; int n, flipped_block = 0, block_length = 0; + enum diff_symbol last_symbol = 0; for (n = 0; n < o->emitted_symbols->nr; n++) { @@ -1149,7 +1150,6 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - enum diff_symbol last_symbol = 0; switch (l->s) { case DIFF_SYMBOL_PLUS: @@ -1214,7 +1214,7 @@ static void mark_color_as_moved(struct diff_options *o, } if (adjust_last_block(o, n, block_length) && - pmb_nr && last_symbol != l->s) + pmb_nr && last_symbol == l->s) flipped_block = (flipped_block + 1) % 2; else flipped_block = 0; diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 2c13b62d3c6548..920114cd795c01 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1442,6 +1442,78 @@ test_expect_success 'detect permutations inside moved code -- dimmed-zebra' ' test_cmp expected actual ' +test_expect_success 'zebra alternate color is only used when necessary' ' + cat >old.txt <<-\EOF && + line 1A should be marked as oldMoved newMovedAlternate + line 1B should be marked as oldMoved newMovedAlternate + unchanged + line 2A should be marked as oldMoved newMovedAlternate + line 2B should be marked as oldMoved newMovedAlternate + line 3A should be marked as oldMovedAlternate newMoved + line 3B should be marked as oldMovedAlternate newMoved + unchanged + line 4A should be marked as oldMoved newMovedAlternate + line 4B should be marked as oldMoved newMovedAlternate + line 5A should be marked as oldMovedAlternate newMoved + line 5B should be marked as oldMovedAlternate newMoved + line 6A should be marked as oldMoved newMoved + line 6B should be marked as oldMoved newMoved + EOF + cat >new.txt <<-\EOF && + line 1A should be marked as oldMoved newMovedAlternate + line 1B should be marked as oldMoved newMovedAlternate + unchanged + line 3A should be marked as oldMovedAlternate newMoved + line 3B should be marked as oldMovedAlternate newMoved + line 2A should be marked as oldMoved newMovedAlternate + line 2B should be marked as oldMoved newMovedAlternate + unchanged + line 6A should be marked as oldMoved newMoved + line 6B should be marked as oldMoved newMoved + line 4A should be marked as oldMoved newMovedAlternate + line 4B should be marked as oldMoved newMovedAlternate + line 5A should be marked as oldMovedAlternate newMoved + line 5B should be marked as oldMovedAlternate newMoved + EOF + test_expect_code 1 git diff --no-index --color --color-moved=zebra \ + --color-moved-ws=allow-indentation-change \ + old.txt new.txt >output && + grep -v index output | test_decode_color >actual && + cat >expected <<-\EOF && + diff --git a/old.txt b/new.txt + --- a/old.txt + +++ b/new.txt + @@ -1,14 +1,14 @@ + -line 1A should be marked as oldMoved newMovedAlternate + -line 1B should be marked as oldMoved newMovedAlternate + + line 1A should be marked as oldMoved newMovedAlternate + + line 1B should be marked as oldMoved newMovedAlternate + unchanged + -line 2A should be marked as oldMoved newMovedAlternate + -line 2B should be marked as oldMoved newMovedAlternate + -line 3A should be marked as oldMovedAlternate newMoved + -line 3B should be marked as oldMovedAlternate newMoved + + line 3A should be marked as oldMovedAlternate newMoved + + line 3B should be marked as oldMovedAlternate newMoved + + line 2A should be marked as oldMoved newMovedAlternate + + line 2B should be marked as oldMoved newMovedAlternate + unchanged + -line 4A should be marked as oldMoved newMovedAlternate + -line 4B should be marked as oldMoved newMovedAlternate + -line 5A should be marked as oldMovedAlternate newMoved + -line 5B should be marked as oldMovedAlternate newMoved + -line 6A should be marked as oldMoved newMoved + -line 6B should be marked as oldMoved newMoved + + line 6A should be marked as oldMoved newMoved + + line 6B should be marked as oldMoved newMoved + + line 4A should be marked as oldMoved newMovedAlternate + + line 4B should be marked as oldMoved newMovedAlternate + + line 5A should be marked as oldMovedAlternate newMoved + + line 5B should be marked as oldMovedAlternate newMoved + EOF + test_cmp expected actual +' + test_expect_success 'cmd option assumes configured colored-moved' ' test_config color.diff.oldMoved "magenta" && test_config color.diff.newMoved "cyan" && From bdbb7734f7a18e41c802552f0125b8e03cd3e4dc Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:24 +0000 Subject: [PATCH 052/198] diff --color-moved: avoid false short line matches and bad zerba coloring When marking moved lines it is possible for a block of potential matched lines to extend past a change in sign when there is a sequence of added lines whose text matches the text of a sequence of deleted and added lines. Most of the time either `match` will be NULL or `pmb_advance_or_null()` will fail when the loop encounters a change of sign but there are corner cases where `match` is non-NULL and `pmb_advance_or_null()` successfully advances the moved block despite the change in sign. One consequence of this is highlighting a short line as moved when it should not be. For example -moved line # Correctly highlighted as moved +short line # Wrongly highlighted as moved context +moved line # Correctly highlighted as moved +short line context -short line The other consequence is coloring a moved addition following a moved deletion in the wrong color. In the example below the first "+moved line 3" should be highlighted as newMoved not newMovedAlternate. -moved line 1 # Correctly highlighted as oldMoved -moved line 2 # Correctly highlighted as oldMovedAlternate +moved line 3 # Wrongly highlighted as newMovedAlternate context # Everything else is highlighted correctly +moved line 2 +moved line 3 context +moved line 1 -moved line 3 These false matches are more likely when using --color-moved-ws with the exception of --color-moved-ws=allow-indentation-change which ties the sign of the current whitespace delta to the sign of the line to avoid this problem. The fix is to check that the sign of the new line being matched is the same as the sign of the line that started the block of potential matches. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 17 ++++++---- t/t4015-diff-whitespace.sh | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index cb068f8258c039..2b51b77fd20fce 100644 --- a/diff.c +++ b/diff.c @@ -1142,7 +1142,7 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; int n, flipped_block = 0, block_length = 0; - enum diff_symbol last_symbol = 0; + enum diff_symbol moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; for (n = 0; n < o->emitted_symbols->nr; n++) { @@ -1168,7 +1168,7 @@ static void mark_color_as_moved(struct diff_options *o, flipped_block = 0; } - if (!match) { + if (pmb_nr && (!match || l->s != moved_symbol)) { int i; adjust_last_block(o, n, block_length); @@ -1177,12 +1177,13 @@ static void mark_color_as_moved(struct diff_options *o, pmb_nr = 0; block_length = 0; flipped_block = 0; - last_symbol = l->s; + } + if (!match) { + moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; continue; } if (o->color_moved == COLOR_MOVED_PLAIN) { - last_symbol = l->s; l->flags |= DIFF_SYMBOL_MOVED_LINE; continue; } @@ -1214,11 +1215,16 @@ static void mark_color_as_moved(struct diff_options *o, } if (adjust_last_block(o, n, block_length) && - pmb_nr && last_symbol == l->s) + pmb_nr && moved_symbol == l->s) flipped_block = (flipped_block + 1) % 2; else flipped_block = 0; + if (pmb_nr) + moved_symbol = l->s; + else + moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; + block_length = 0; } @@ -1228,7 +1234,6 @@ static void mark_color_as_moved(struct diff_options *o, if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; } - last_symbol = l->s; } adjust_last_block(o, n, block_length); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 920114cd795c01..3119a59f071d6f 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1514,6 +1514,71 @@ test_expect_success 'zebra alternate color is only used when necessary' ' test_cmp expected actual ' +test_expect_success 'short lines of opposite sign do not get marked as moved' ' + cat >old.txt <<-\EOF && + this line should be marked as moved + unchanged + unchanged + unchanged + unchanged + too short + this line should be marked as oldMoved newMoved + this line should be marked as oldMovedAlternate newMoved + unchanged 1 + unchanged 2 + unchanged 3 + unchanged 4 + this line should be marked as oldMoved newMoved/newMovedAlternate + EOF + cat >new.txt <<-\EOF && + too short + unchanged + unchanged + this line should be marked as moved + too short + unchanged + unchanged + this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 1 + unchanged 2 + this line should be marked as oldMovedAlternate newMoved + this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 3 + this line should be marked as oldMoved newMoved + unchanged 4 + EOF + test_expect_code 1 git diff --no-index --color --color-moved=zebra \ + old.txt new.txt >output && cat output && + grep -v index output | test_decode_color >actual && + cat >expect <<-\EOF && + diff --git a/old.txt b/new.txt + --- a/old.txt + +++ b/new.txt + @@ -1,13 +1,15 @@ + -this line should be marked as moved + +too short + unchanged + unchanged + +this line should be marked as moved + +too short + unchanged + unchanged + -too short + -this line should be marked as oldMoved newMoved + -this line should be marked as oldMovedAlternate newMoved + +this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 1 + unchanged 2 + +this line should be marked as oldMovedAlternate newMoved + +this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 3 + +this line should be marked as oldMoved newMoved + unchanged 4 + -this line should be marked as oldMoved newMoved/newMovedAlternate + EOF + test_cmp expect actual +' + test_expect_success 'cmd option assumes configured colored-moved' ' test_config color.diff.oldMoved "magenta" && test_config color.diff.newMoved "cyan" && From f52feba8ffe256ee048dece20067976abe8e9db1 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:25 +0000 Subject: [PATCH 053/198] diff: simplify allow-indentation-change delta calculation Now that we reliably end a block when the sign changes we don't need the whitespace delta calculation to rely on the sign. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/diff.c b/diff.c index 2b51b77fd20fce..77c893b266a785 100644 --- a/diff.c +++ b/diff.c @@ -864,23 +864,17 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, a_width = a->indent_width, b_off = b->indent_off, b_width = b->indent_width; - int delta; if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) { *out = INDENT_BLANKLINE; return 1; } - if (a->s == DIFF_SYMBOL_PLUS) - delta = a_width - b_width; - else - delta = b_width - a_width; - if (a_len - a_off != b_len - b_off || memcmp(a->line + a_off, b->line + b_off, a_len - a_off)) return 0; - *out = delta; + *out = a_width - b_width; return 1; } @@ -924,10 +918,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, * match those of the current block and that the text of 'l' and 'cur' * after the indentation match. */ - if (cur->es->s == DIFF_SYMBOL_PLUS) - delta = a_width - c_width; - else - delta = c_width - a_width; + delta = c_width - a_width; /* * If the previous lines of this block were all blank then set its From 8e809cbb2f8f8194ce22257339821809187dd1d0 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:26 +0000 Subject: [PATCH 054/198] diff --color-moved-ws=allow-indentation-change: simplify and optimize If we already have a block of potentially moved lines then as we move down the diff we need to check if the next line of each potentially moved line matches the current line of the diff. The implementation of --color-moved-ws=allow-indentation-change was needlessly performing this check on all the lines in the diff that matched the current line rather than just the current line. To exacerbate the problem finding all the other lines in the diff that match the current line involves a fuzzy lookup so we were wasting even more time performing a second comparison to filter out the non-matching lines. Fixing this reduces time to run git diff --color-moved-ws=allow-indentation-change v2.28.0 v2.29.0 by 93% compared to master and simplifies the code. Test HEAD^ HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.41( 0.38+0.03) 0.41(0.37+0.04) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.83( 0.79+0.04) 0.82(0.79+0.02) -1.2% 4002.3: diff --color-moved-ws=allow-indentation-change large change 13.68(13.59+0.07) 0.92(0.89+0.03) -93.3% 4002.4: log --no-color-moved --no-color-moved-ws 1.31( 1.22+0.08) 1.31(1.21+0.10) +0.0% 4002.5: log --color-moved --no-color-moved-ws 1.47( 1.40+0.07) 1.47(1.36+0.10) +0.0% 4002.6: log --color-moved-ws=allow-indentation-change 1.87( 1.77+0.09) 1.50(1.41+0.09) -19.8% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 65 ++++++++++++++++------------------------------------------ 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/diff.c b/diff.c index 77c893b266a785..55384449170e55 100644 --- a/diff.c +++ b/diff.c @@ -881,35 +881,20 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, static int cmp_in_block_with_wsd(const struct diff_options *o, const struct moved_entry *cur, - const struct moved_entry *match, - struct moved_block *pmb, - int n) + const struct emitted_diff_symbol *l, + struct moved_block *pmb) { - struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - int al = cur->es->len, bl = match->es->len, cl = l->len; + int al = cur->es->len, bl = l->len; const char *a = cur->es->line, - *b = match->es->line, - *c = l->line; + *b = l->line; int a_off = cur->es->indent_off, a_width = cur->es->indent_width, - c_off = l->indent_off, - c_width = l->indent_width; + b_off = l->indent_off, + b_width = l->indent_width; int delta; - /* - * We need to check if 'cur' is equal to 'match'. As those - * are from the same (+/-) side, we do not need to adjust for - * indent changes. However these were found using fuzzy - * matching so we do have to check if they are equal. Here we - * just check the lengths. We delay calling memcmp() to check - * the contents until later as if the length comparison for a - * and c fails we can avoid the call all together. - */ - if (al != bl) - return 1; - /* If 'l' and 'cur' are both blank then they match. */ - if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE) + if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) return 0; /* @@ -918,7 +903,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, * match those of the current block and that the text of 'l' and 'cur' * after the indentation match. */ - delta = c_width - a_width; + delta = b_width - a_width; /* * If the previous lines of this block were all blank then set its @@ -927,9 +912,8 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, if (pmb->wsd == INDENT_BLANKLINE) pmb->wsd = delta; - return !(delta == pmb->wsd && al - a_off == cl - c_off && - !memcmp(a, b, al) && ! - memcmp(a + a_off, c + c_off, al - a_off)); + return !(delta == pmb->wsd && al - a_off == bl - b_off && + !memcmp(a + a_off, b + b_off, al - a_off)); } static int moved_entry_cmp(const void *hashmap_cmp_fn_data, @@ -1030,36 +1014,23 @@ static void pmb_advance_or_null(struct diff_options *o, } static void pmb_advance_or_null_multi_match(struct diff_options *o, - struct moved_entry *match, - struct hashmap *hm, + struct emitted_diff_symbol *l, struct moved_block *pmb, - int pmb_nr, int n) + int pmb_nr) { int i; - char *got_match = xcalloc(1, pmb_nr); - - hashmap_for_each_entry_from(hm, match, ent) { - for (i = 0; i < pmb_nr; i++) { - struct moved_entry *prev = pmb[i].match; - struct moved_entry *cur = (prev && prev->next_line) ? - prev->next_line : NULL; - if (!cur) - continue; - if (!cmp_in_block_with_wsd(o, cur, match, &pmb[i], n)) - got_match[i] |= 1; - } - } for (i = 0; i < pmb_nr; i++) { - if (got_match[i]) { + struct moved_entry *prev = pmb[i].match; + struct moved_entry *cur = (prev && prev->next_line) ? + prev->next_line : NULL; + if (cur && !cmp_in_block_with_wsd(o, cur, l, &pmb[i])) { /* Advance to the next line */ - pmb[i].match = pmb[i].match->next_line; + pmb[i].match = cur; } else { moved_block_clear(&pmb[i]); } } - - free(got_match); } static int shrink_potential_moved_blocks(struct moved_block *pmb, @@ -1181,7 +1152,7 @@ static void mark_color_as_moved(struct diff_options *o, if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n); + pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); else pmb_advance_or_null(o, match, hm, pmb, pmb_nr); From 76c2aacec9b2cd5eb2c089747cfd0c8adbb51de2 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:27 +0000 Subject: [PATCH 055/198] diff --color-moved: call comparison function directly This change will allow us to easily combine pmb_advance_or_null() and pmb_advance_or_null_multi_match() in the next commit. Calling xdiff_compare_lines() directly rather than using a function pointer from the hash map has little effect on the run time. Test HEAD^ HEAD ------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.41(0.37+0.04) 0.41(0.39+0.02) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.82(0.79+0.02) 0.83(0.79+0.03) +1.2% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.92(0.89+0.03) 0.91(0.85+0.05) -1.1% 4002.4: log --no-color-moved --no-color-moved-ws 1.31(1.21+0.10) 1.33(1.22+0.10) +1.5% 4002.5: log --color-moved --no-color-moved-ws 1.47(1.36+0.10) 1.47(1.39+0.08) +0.0% 4002.6: log --color-moved-ws=allow-indentation-change 1.50(1.41+0.09) 1.51(1.42+0.09) +0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/diff.c b/diff.c index 55384449170e55..c056d917d0d70b 100644 --- a/diff.c +++ b/diff.c @@ -995,17 +995,20 @@ static void add_lines_to_move_detection(struct diff_options *o, } static void pmb_advance_or_null(struct diff_options *o, - struct moved_entry *match, - struct hashmap *hm, + struct emitted_diff_symbol *l, struct moved_block *pmb, int pmb_nr) { int i; + unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; + for (i = 0; i < pmb_nr; i++) { struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? prev->next_line : NULL; - if (cur && !hm->cmpfn(o, &cur->ent, &match->ent, NULL)) { + if (cur && xdiff_compare_lines(cur->es->line, cur->es->len, + l->line, l->len, + flags)) { pmb[i].match = cur; } else { pmb[i].match = NULL; @@ -1154,7 +1157,7 @@ static void mark_color_as_moved(struct diff_options *o, COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); else - pmb_advance_or_null(o, match, hm, pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); From f9cbfe64eeeebe87b5eb50dbf83257de287a7886 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:28 +0000 Subject: [PATCH 056/198] diff --color-moved: unify moved block growth functions After the last two commits pmb_advance_or_null() and pmb_advance_or_null_multi_match() differ only in the comparison they perform. Lets simplify the code by combining them into a single function. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/diff.c b/diff.c index c056d917d0d70b..b03f79b626cc0f 100644 --- a/diff.c +++ b/diff.c @@ -1003,36 +1003,23 @@ static void pmb_advance_or_null(struct diff_options *o, unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; for (i = 0; i < pmb_nr; i++) { + int match; struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? prev->next_line : NULL; - if (cur && xdiff_compare_lines(cur->es->line, cur->es->len, - l->line, l->len, - flags)) { - pmb[i].match = cur; - } else { - pmb[i].match = NULL; - } - } -} -static void pmb_advance_or_null_multi_match(struct diff_options *o, - struct emitted_diff_symbol *l, - struct moved_block *pmb, - int pmb_nr) -{ - int i; - - for (i = 0; i < pmb_nr; i++) { - struct moved_entry *prev = pmb[i].match; - struct moved_entry *cur = (prev && prev->next_line) ? - prev->next_line : NULL; - if (cur && !cmp_in_block_with_wsd(o, cur, l, &pmb[i])) { - /* Advance to the next line */ + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + match = cur && + !cmp_in_block_with_wsd(o, cur, l, &pmb[i]); + else + match = cur && + xdiff_compare_lines(cur->es->line, cur->es->len, + l->line, l->len, flags); + if (match) pmb[i].match = cur; - } else { + else moved_block_clear(&pmb[i]); - } } } @@ -1153,11 +1140,7 @@ static void mark_color_as_moved(struct diff_options *o, continue; } - if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); - else - pmb_advance_or_null(o, l, pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); From 72e4640f244b012d980f8e007db9ad6790057db9 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:29 +0000 Subject: [PATCH 057/198] diff --color-moved: shrink potential moved blocks as we go Rather than setting `match` to NULL and then looping over the list of potential matched blocks for a second time to remove blocks with no matches just filter out the blocks with no matches as we go. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/diff.c b/diff.c index b03f79b626cc0f..068473c0be3044 100644 --- a/diff.c +++ b/diff.c @@ -997,12 +997,12 @@ static void add_lines_to_move_detection(struct diff_options *o, static void pmb_advance_or_null(struct diff_options *o, struct emitted_diff_symbol *l, struct moved_block *pmb, - int pmb_nr) + int *pmb_nr) { - int i; + int i, j; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - for (i = 0; i < pmb_nr; i++) { + for (i = 0, j = 0; i < *pmb_nr; i++) { int match; struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? @@ -1017,37 +1017,9 @@ static void pmb_advance_or_null(struct diff_options *o, xdiff_compare_lines(cur->es->line, cur->es->len, l->line, l->len, flags); if (match) - pmb[i].match = cur; - else - moved_block_clear(&pmb[i]); + pmb[j++].match = cur; } -} - -static int shrink_potential_moved_blocks(struct moved_block *pmb, - int pmb_nr) -{ - int lp, rp; - - /* Shrink the set of potential block to the remaining running */ - for (lp = 0, rp = pmb_nr - 1; lp <= rp;) { - while (lp < pmb_nr && pmb[lp].match) - lp++; - /* lp points at the first NULL now */ - - while (rp > -1 && !pmb[rp].match) - rp--; - /* rp points at the last non-NULL */ - - if (lp < pmb_nr && rp > -1 && lp < rp) { - pmb[lp] = pmb[rp]; - memset(&pmb[rp], 0, sizeof(pmb[rp])); - rp--; - lp++; - } - } - - /* Remember the number of running sets */ - return rp + 1; + *pmb_nr = j; } /* @@ -1140,9 +1112,7 @@ static void mark_color_as_moved(struct diff_options *o, continue; } - pmb_advance_or_null(o, l, pmb, pmb_nr); - - pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, &pmb_nr); if (pmb_nr == 0) { /* From b473dcb0de2dc030be804a89df155c69306ab90d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:30 +0000 Subject: [PATCH 058/198] diff --color-moved: stop clearing potential moved blocks moved_block_clear() was introduced in 74d156f4a1 ("diff --color-moved-ws: fix double free crash", 2018-10-04) to free the memory that was allocated when initializing a potential moved block. However since 21536d077f ("diff --color-moved-ws: modify allow-indentation-change", 2018-11-23) initializing a potential moved block no longer allocates any memory. Up until the last commit we were relying on moved_block_clear() to set the `match` pointer to NULL when a block stopped matching, but since that commit we do not clear a moved block that does not match so it does not make sense to clear them elsewhere. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/diff.c b/diff.c index 068473c0be3044..4b5776a5a0ac87 100644 --- a/diff.c +++ b/diff.c @@ -807,11 +807,6 @@ struct moved_block { int wsd; /* The whitespace delta of this block */ }; -static void moved_block_clear(struct moved_block *b) -{ - memset(b, 0, sizeof(*b)); -} - #define INDENT_BLANKLINE INT_MIN static void fill_es_indent_data(struct emitted_diff_symbol *es) @@ -1093,11 +1088,7 @@ static void mark_color_as_moved(struct diff_options *o, } if (pmb_nr && (!match || l->s != moved_symbol)) { - int i; - adjust_last_block(o, n, block_length); - for(i = 0; i < pmb_nr; i++) - moved_block_clear(&pmb[i]); pmb_nr = 0; block_length = 0; flipped_block = 0; @@ -1155,8 +1146,6 @@ static void mark_color_as_moved(struct diff_options *o, } adjust_last_block(o, n, block_length); - for(n = 0; n < pmb_nr; n++) - moved_block_clear(&pmb[n]); free(pmb); } From 447e5899921980c9ba84a5972176763283f4f72a Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:31 +0000 Subject: [PATCH 059/198] diff --color-moved-ws=allow-indentation-change: improve hash lookups As libxdiff does not have a whitespace flag to ignore the indentation the code for --color-moved-ws=allow-indentation-change uses XDF_IGNORE_WHITESPACE and then filters out any hash lookups where there are non-indentation changes. This filtering is inefficient as we have to perform another string comparison. By using the offset data that we have already computed to skip the indentation we can avoid using XDF_IGNORE_WHITESPACE and safely remove the extra checks which improves the performance by 11% and paves the way for the elimination of string comparisons in the next commit. This change slightly increases the run time of other --color-moved modes. This could be avoided by using different comparison functions for the different modes but after the next two commits there is no measurable benefit in doing so. Test HEAD^ HEAD -------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.41(0.38+0.03) 0.41(0.36+0.04) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.82(0.76+0.05) 0.84(0.79+0.04) +2.4% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.91(0.88+0.03) 0.81(0.74+0.06) -11.0% 4002.4: log --no-color-moved --no-color-moved-ws 1.32(1.21+0.10) 1.31(1.19+0.11) -0.8% 4002.5: log --color-moved --no-color-moved-ws 1.47(1.37+0.10) 1.47(1.36+0.11) +0.0% 4002.6: log --color-moved-ws=allow-indentation-change 1.51(1.42+0.09) 1.48(1.37+0.10) -2.0% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 66 +++++++++++++++++----------------------------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/diff.c b/diff.c index 4b5776a5a0ac87..f899083d028349 100644 --- a/diff.c +++ b/diff.c @@ -850,28 +850,15 @@ static void fill_es_indent_data(struct emitted_diff_symbol *es) } static int compute_ws_delta(const struct emitted_diff_symbol *a, - const struct emitted_diff_symbol *b, - int *out) -{ - int a_len = a->len, - b_len = b->len, - a_off = a->indent_off, - a_width = a->indent_width, - b_off = b->indent_off, + const struct emitted_diff_symbol *b) +{ + int a_width = a->indent_width, b_width = b->indent_width; - if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) { - *out = INDENT_BLANKLINE; - return 1; - } - - if (a_len - a_off != b_len - b_off || - memcmp(a->line + a_off, b->line + b_off, a_len - a_off)) - return 0; - - *out = a_width - b_width; + if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) + return INDENT_BLANKLINE; - return 1; + return a_width - b_width; } static int cmp_in_block_with_wsd(const struct diff_options *o, @@ -917,26 +904,17 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, const void *keydata) { const struct diff_options *diffopt = hashmap_cmp_fn_data; - const struct moved_entry *a, *b; + const struct emitted_diff_symbol *a, *b; unsigned flags = diffopt->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - a = container_of(eptr, const struct moved_entry, ent); - b = container_of(entry_or_key, const struct moved_entry, ent); - - if (diffopt->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - /* - * As there is not specific white space config given, - * we'd need to check for a new block, so ignore all - * white space. The setup of the white space - * configuration for the next block is done else where - */ - flags |= XDF_IGNORE_WHITESPACE; + a = container_of(eptr, const struct moved_entry, ent)->es; + b = container_of(entry_or_key, const struct moved_entry, ent)->es; - return !xdiff_compare_lines(a->es->line, a->es->len, - b->es->line, b->es->len, - flags); + return !xdiff_compare_lines(a->line + a->indent_off, + a->len - a->indent_off, + b->line + b->indent_off, + b->len - b->indent_off, flags); } static struct moved_entry *prepare_entry(struct diff_options *o, @@ -945,7 +923,8 @@ static struct moved_entry *prepare_entry(struct diff_options *o, struct moved_entry *ret = xmalloc(sizeof(*ret)); struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - unsigned int hash = xdiff_hash_string(l->line, l->len, flags); + unsigned int hash = xdiff_hash_string(l->line + l->indent_off, + l->len - l->indent_off, flags); hashmap_entry_init(&ret->ent, hash); ret->es = l; @@ -1113,14 +1092,11 @@ static void mark_color_as_moved(struct diff_options *o, hashmap_for_each_entry_from(hm, match, ent) { ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) { - if (compute_ws_delta(l, match->es, - &pmb[pmb_nr].wsd)) - pmb[pmb_nr++].match = match; - } else { + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + pmb[pmb_nr].wsd = compute_ws_delta(l, match->es); + else pmb[pmb_nr].wsd = 0; - pmb[pmb_nr++].match = match; - } + pmb[pmb_nr++].match = match; } if (adjust_last_block(o, n, block_length) && @@ -6240,10 +6216,6 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->color_moved) { struct hashmap add_lines, del_lines; - if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE; - hashmap_init(&del_lines, moved_entry_cmp, o, 0); hashmap_init(&add_lines, moved_entry_cmp, o, 0); From a95081c2471ae8658e0e76abd3a8c6a43a9f09cb Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:32 +0000 Subject: [PATCH 060/198] diff: use designated initializers for emitted_diff_symbol This makes it clearer which fields are being explicitly initialized and will simplify the next commit where we add a new field to the struct. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index f899083d028349..31a20a34240a4a 100644 --- a/diff.c +++ b/diff.c @@ -1460,7 +1460,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, const char *line, int len, unsigned flags) { - struct emitted_diff_symbol e = {line, len, flags, 0, 0, s}; + struct emitted_diff_symbol e = { + .line = line, .len = len, .flags = flags, .s = s + }; if (o->emitted_symbols) append_emitted_diff_symbol(o, &e); From 5458d65b169d2c21d5c87af897e3ac01f31d84dc Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 20 Jul 2021 10:36:33 +0000 Subject: [PATCH 061/198] diff --color-moved: intern strings Taking inspiration from xdl_classify_record() assign an id to each addition and deletion such that lines that match for the current --color-moved-ws mode share the same unique id. This reduces the number of hash lookups a little (calculating the ids still involves one hash lookup per line) but the main benefit is that when growing blocks of potentially moved lines we can replace string comparisons which involve chasing a pointer with a simple integer comparison. On a large diff this commit reduces the time to run diff --color-moved by 33% and diff --color-moved-ws=allow-indentation-change by 26%. Compared to master the time to run diff --color-moved-ws=allow-indentation-change is now reduced by 95% and the overhead compared to --no-color-moved is reduced to 50%. Compared to the previous commit the time to run git log --patch --color-moved is increased slightly, but compared to master there is no change in run time. Test HEAD^ HEAD -------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.41(0.36+0.04) 0.41(0.37+0.03) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.83(0.79+0.03) 0.55(0.52+0.03) -33.7% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.81(0.77+0.04) 0.60(0.55+0.05) -25.9% 4002.4: log --no-color-moved --no-color-moved-ws 1.30(1.20+0.09) 1.31(1.22+0.08) +0.8% 4002.5: log --color-moved --no-color-moved-ws 1.46(1.35+0.11) 1.47(1.30+0.16) +0.7% 4002.6: log --color-moved-ws=allow-indentation-change 1.46(1.38+0.07) 1.47(1.34+0.13) +0.7% Test master HEAD -------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.40( 0.36+0.03) 0.41(0.37+0.03) +2.5% 4002.2: diff --color-moved --no-color-moved-ws large change 0.82( 0.77+0.04) 0.55(0.52+0.03) -32.9% 4002.3: diff --color-moved-ws=allow-indentation-change large change 14.10(14.04+0.04) 0.60(0.55+0.05) -95.7% 4002.4: log --no-color-moved --no-color-moved-ws 1.31( 1.21+0.09) 1.31(1.22+0.08) +0.0% 4002.5: log --color-moved --no-color-moved-ws 1.47( 1.37+0.09) 1.47(1.30+0.16) +0.0% 4002.6: log --color-moved-ws=allow-indentation-change 1.86( 1.76+0.10) 1.47(1.34+0.13) -21.0% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 171 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 95 insertions(+), 76 deletions(-) diff --git a/diff.c b/diff.c index 31a20a34240a4a..2956c8f7103c2a 100644 --- a/diff.c +++ b/diff.c @@ -18,6 +18,7 @@ #include "submodule-config.h" #include "submodule.h" #include "hashmap.h" +#include "mem-pool.h" #include "ll-merge.h" #include "string-list.h" #include "strvec.h" @@ -772,6 +773,7 @@ struct emitted_diff_symbol { int flags; int indent_off; /* Offset to first non-whitespace character */ int indent_width; /* The visual width of the indentation */ + unsigned id; enum diff_symbol s; }; #define EMITTED_DIFF_SYMBOL_INIT {NULL} @@ -797,9 +799,9 @@ static void append_emitted_diff_symbol(struct diff_options *o, } struct moved_entry { - struct hashmap_entry ent; const struct emitted_diff_symbol *es; struct moved_entry *next_line; + struct moved_entry *next_match; }; struct moved_block { @@ -866,24 +868,24 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, const struct emitted_diff_symbol *l, struct moved_block *pmb) { - int al = cur->es->len, bl = l->len; - const char *a = cur->es->line, - *b = l->line; - int a_off = cur->es->indent_off, - a_width = cur->es->indent_width, - b_off = l->indent_off, - b_width = l->indent_width; + int a_width = cur->es->indent_width, b_width = l->indent_width; int delta; - /* If 'l' and 'cur' are both blank then they match. */ - if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) + /* The text of each line must match */ + if (cur->es->id != l->id) + return 1; + + /* + * If 'l' and 'cur' are both blank then we don't need to check the + * indent. We only need to check cur as we know the strings match. + * */ + if (a_width == INDENT_BLANKLINE) return 0; /* * The indent changes of the block are known and stored in pmb->wsd; * however we need to check if the indent changes of the current line - * match those of the current block and that the text of 'l' and 'cur' - * after the indentation match. + * match those of the current block. */ delta = b_width - a_width; @@ -894,22 +896,26 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, if (pmb->wsd == INDENT_BLANKLINE) pmb->wsd = delta; - return !(delta == pmb->wsd && al - a_off == bl - b_off && - !memcmp(a + a_off, b + b_off, al - a_off)); + return delta != pmb->wsd; } -static int moved_entry_cmp(const void *hashmap_cmp_fn_data, - const struct hashmap_entry *eptr, - const struct hashmap_entry *entry_or_key, - const void *keydata) +struct interned_diff_symbol { + struct hashmap_entry ent; + struct emitted_diff_symbol *es; +}; + +static int interned_diff_symbol_cmp(const void *hashmap_cmp_fn_data, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata) { const struct diff_options *diffopt = hashmap_cmp_fn_data; const struct emitted_diff_symbol *a, *b; unsigned flags = diffopt->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - a = container_of(eptr, const struct moved_entry, ent)->es; - b = container_of(entry_or_key, const struct moved_entry, ent)->es; + a = container_of(eptr, const struct interned_diff_symbol, ent)->es; + b = container_of(entry_or_key, const struct interned_diff_symbol, ent)->es; return !xdiff_compare_lines(a->line + a->indent_off, a->len - a->indent_off, @@ -917,55 +923,81 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, b->len - b->indent_off, flags); } -static struct moved_entry *prepare_entry(struct diff_options *o, - int line_no) +static void prepare_entry(struct diff_options *o, struct emitted_diff_symbol *l, + struct interned_diff_symbol *s) { - struct moved_entry *ret = xmalloc(sizeof(*ret)); - struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; unsigned int hash = xdiff_hash_string(l->line + l->indent_off, l->len - l->indent_off, flags); - hashmap_entry_init(&ret->ent, hash); - ret->es = l; - ret->next_line = NULL; - - return ret; + hashmap_entry_init(&s->ent, hash); + s->es = l; } -static void add_lines_to_move_detection(struct diff_options *o, - struct hashmap *add_lines, - struct hashmap *del_lines) +struct moved_entry_list { + struct moved_entry *add, *del; +}; + +static struct moved_entry_list *add_lines_to_move_detection(struct diff_options *o, + struct mem_pool *entry_mem_pool) { struct moved_entry *prev_line = NULL; - + struct mem_pool interned_pool; + struct hashmap interned_map; + struct moved_entry_list *entry_list = NULL; + size_t entry_list_alloc = 0; + unsigned id = 0; int n; + + hashmap_init(&interned_map, interned_diff_symbol_cmp, o, 8096); + mem_pool_init(&interned_pool, 1024 * 1024); + for (n = 0; n < o->emitted_symbols->nr; n++) { - struct hashmap *hm; - struct moved_entry *key; + struct interned_diff_symbol key; + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + struct interned_diff_symbol *s; + struct moved_entry *entry; - switch (o->emitted_symbols->buf[n].s) { - case DIFF_SYMBOL_PLUS: - hm = add_lines; - break; - case DIFF_SYMBOL_MINUS: - hm = del_lines; - break; - default: + if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) { prev_line = NULL; continue; } if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - fill_es_indent_data(&o->emitted_symbols->buf[n]); - key = prepare_entry(o, n); - if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s) - prev_line->next_line = key; + fill_es_indent_data(l); - hashmap_add(hm, &key->ent); - prev_line = key; + prepare_entry(o, l, &key); + s = hashmap_get_entry(&interned_map, &key, ent, &key.ent); + if (s) { + l->id = s->es->id; + } else { + l->id = id; + ALLOC_GROW_BY(entry_list, id, 1, entry_list_alloc); + hashmap_add(&interned_map, + memcpy(mem_pool_alloc(&interned_pool, + sizeof(key)), + &key, sizeof(key))); + } + entry = mem_pool_alloc(entry_mem_pool, sizeof(*entry)); + entry->es = l; + entry->next_line = NULL; + if (prev_line && prev_line->es->s == l->s) + prev_line->next_line = entry; + prev_line = entry; + if (l->s == DIFF_SYMBOL_PLUS) { + entry->next_match = entry_list[l->id].add; + entry_list[l->id].add = entry; + } else { + entry->next_match = entry_list[l->id].del; + entry_list[l->id].del = entry; + } } + + hashmap_clear(&interned_map); + mem_pool_discard(&interned_pool, 0); + + return entry_list; } static void pmb_advance_or_null(struct diff_options *o, @@ -974,7 +1006,6 @@ static void pmb_advance_or_null(struct diff_options *o, int *pmb_nr) { int i, j; - unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; for (i = 0, j = 0; i < *pmb_nr; i++) { int match; @@ -987,9 +1018,8 @@ static void pmb_advance_or_null(struct diff_options *o, match = cur && !cmp_in_block_with_wsd(o, cur, l, &pmb[i]); else - match = cur && - xdiff_compare_lines(cur->es->line, cur->es->len, - l->line, l->len, flags); + match = cur && cur->es->id == l->id; + if (match) pmb[j++].match = cur; } @@ -1034,8 +1064,7 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length) /* Find blocks of moved code, delegate actual coloring decision to helper */ static void mark_color_as_moved(struct diff_options *o, - struct hashmap *add_lines, - struct hashmap *del_lines) + struct moved_entry_list *entry_list) { struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; @@ -1044,23 +1073,15 @@ static void mark_color_as_moved(struct diff_options *o, for (n = 0; n < o->emitted_symbols->nr; n++) { - struct hashmap *hm = NULL; - struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; switch (l->s) { case DIFF_SYMBOL_PLUS: - hm = del_lines; - key = prepare_entry(o, n); - match = hashmap_get_entry(hm, key, ent, NULL); - free(key); + match = entry_list[l->id].del; break; case DIFF_SYMBOL_MINUS: - hm = add_lines; - key = prepare_entry(o, n); - match = hashmap_get_entry(hm, key, ent, NULL); - free(key); + match = entry_list[l->id].add; break; default: flipped_block = 0; @@ -1089,7 +1110,7 @@ static void mark_color_as_moved(struct diff_options *o, * The current line is the start of a new block. * Setup the set of potential blocks. */ - hashmap_for_each_entry_from(hm, match, ent) { + for (; match; match = match->next_match) { ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) @@ -6216,20 +6237,18 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->emitted_symbols) { if (o->color_moved) { - struct hashmap add_lines, del_lines; - - hashmap_init(&del_lines, moved_entry_cmp, o, 0); - hashmap_init(&add_lines, moved_entry_cmp, o, 0); + struct mem_pool entry_pool; + struct moved_entry_list *entry_list; - add_lines_to_move_detection(o, &add_lines, &del_lines); - mark_color_as_moved(o, &add_lines, &del_lines); + mem_pool_init(&entry_pool, 1024 * 1024); + entry_list = add_lines_to_move_detection(o, + &entry_pool); + mark_color_as_moved(o, entry_list); if (o->color_moved == COLOR_MOVED_ZEBRA_DIM) dim_moved_lines(o); - hashmap_clear_and_free(&add_lines, struct moved_entry, - ent); - hashmap_clear_and_free(&del_lines, struct moved_entry, - ent); + mem_pool_discard(&entry_pool, 0); + free(entry_list); } for (i = 0; i < esm.nr; i++) From 4a633b2f4dbffa903ac2e01bf1a722ad3fd77007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 20 Jul 2021 12:24:06 +0200 Subject: [PATCH 062/198] refs/packet: add missing BUG() invocations to reflog callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In e0cc8ac8202 (packed_ref_store: make class into a subclass of `ref_store`, 2017-06-23) a die() was added to packed_create_reflog(), but not to any of the other reflog callbacks, let's do that. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index f8aa97d7998ecb..24a360b719ff25 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1600,6 +1600,7 @@ static int packed_for_each_reflog_ent(struct ref_store *ref_store, const char *refname, each_reflog_ent_fn fn, void *cb_data) { + BUG("packed reference store does not support reflogs"); return 0; } @@ -1608,12 +1609,14 @@ static int packed_for_each_reflog_ent_reverse(struct ref_store *ref_store, each_reflog_ent_fn fn, void *cb_data) { + BUG("packed reference store does not support reflogs"); return 0; } static int packed_reflog_exists(struct ref_store *ref_store, const char *refname) { + BUG("packed reference store does not support reflogs"); return 0; } @@ -1627,6 +1630,7 @@ static int packed_create_reflog(struct ref_store *ref_store, static int packed_delete_reflog(struct ref_store *ref_store, const char *refname) { + BUG("packed reference store does not support reflogs"); return 0; } @@ -1638,6 +1642,7 @@ static int packed_reflog_expire(struct ref_store *ref_store, reflog_expiry_cleanup_fn cleanup_fn, void *policy_cb_data) { + BUG("packed reference store does not support reflogs"); return 0; } From 2c14b391ddc2c43b4a05868b634c8d78854a96a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 00:57:41 +0200 Subject: [PATCH 063/198] test-lib tests: split up "write and run" into two functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the function to write and run tests of the test-lib.sh output into two functions. When this was added back in 565b6fa87bb (tests: refactor mechanics of testing in a sub test-lib, 2012-12-16) there was no reason to do this, but since we started supporting test arguments in 517cd55fd51 (test-lib: self-test that --verbose works, 2013-06-23) we've started to write out duplicate tests simply to test different arguments, now we'll be able to re-use them. This change doesn't consolidate any of those tests yet, it just makes it possible to do so. All the changes in t0000-basic.sh are a simple search-replacement. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/lib-subtest.sh | 45 +++++++++++++++++++++-------- t/t0000-basic.sh | 74 ++++++++++++++++++++++++------------------------ 2 files changed, 70 insertions(+), 49 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 3cfe09911a2f56..58ae316ad5fc25 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -1,3 +1,22 @@ +write_sub_test_lib_test () { + name="$1" descr="$2" # stdin is the body of the test code + mkdir "$name" && + ( + cd "$name" && + write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF && + test_description='$descr (run in sub test-lib) + + This is run in a sub test-lib so that we do not get incorrect + passing metrics + ' + + # Point to the t/test-lib.sh, which isn't in ../ as usual + . "\$TEST_DIRECTORY"/test-lib.sh + EOF + cat >>"$name.sh" + ) +} + _run_sub_test_lib_test_common () { neg="$1" name="$2" descr="$3" # stdin is the body of the test code shift 3 @@ -18,25 +37,15 @@ _run_sub_test_lib_test_common () { esac done - mkdir "$name" && ( + cd "$name" && + # Pretend we're not running under a test harness, whether we # are or not. The test-lib output depends on the setting of # this variable, so we need a stable setting under which to run # the sub-test. sane_unset HARNESS_ACTIVE && - cd "$name" && - write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF && - test_description='$descr (run in sub test-lib) - This is run in a sub test-lib so that we do not get incorrect - passing metrics - ' - - # Point to the t/test-lib.sh, which isn't in ../ as usual - . "\$TEST_DIRECTORY"/test-lib.sh - EOF - cat >>"$name.sh" && export TEST_DIRECTORY && # The child test re-sources GIT-BUILD-OPTIONS and may thus # override the test output directory. We thus pass it as an @@ -55,6 +64,18 @@ _run_sub_test_lib_test_common () { ) } +write_and_run_sub_test_lib_test () { + name="$1" descr="$2" # stdin is the body of the test code + write_sub_test_lib_test "$@" || return 1 + _run_sub_test_lib_test_common '' "$@" +} + +write_and_run_sub_test_lib_test_err () { + name="$1" descr="$2" # stdin is the body of the test code + write_sub_test_lib_test "$@" || return 1 + _run_sub_test_lib_test_common '!' "$@" +} + run_sub_test_lib_test () { _run_sub_test_lib_test_common '' "$@" } diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index a3865dd77ba5e8..a7c5aaacab6bc5 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -68,7 +68,7 @@ test_expect_success 'success is reported like this' ' ' test_expect_success 'pretend we have a fully passing test suite' ' - run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF && + write_and_run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF && for i in 1 2 3 do test_expect_success "passing test #$i" "true" @@ -85,7 +85,7 @@ test_expect_success 'pretend we have a fully passing test suite' ' ' test_expect_success 'pretend we have a partially passing test suite' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ partial-pass "2/3 tests passing" <<-\EOF && test_expect_success "passing test #1" "true" test_expect_success "failing test #2" "false" @@ -103,7 +103,7 @@ test_expect_success 'pretend we have a partially passing test suite' ' ' test_expect_success 'pretend we have a known breakage' ' - run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF && + write_and_run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF && test_expect_success "passing test" "true" test_expect_failure "pretend we have a known breakage" "false" test_done @@ -118,7 +118,7 @@ test_expect_success 'pretend we have a known breakage' ' ' test_expect_success 'pretend we have fixed a known breakage' ' - run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF && + write_and_run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF && test_expect_failure "pretend we have fixed a known breakage" "true" test_done EOF @@ -130,7 +130,7 @@ test_expect_success 'pretend we have fixed a known breakage' ' ' test_expect_success 'pretend we have fixed one of two known breakages (run in sub test-lib)' ' - run_sub_test_lib_test partially-passing-todos \ + write_and_run_sub_test_lib_test partially-passing-todos \ "2 TODO tests, one passing" <<-\EOF && test_expect_failure "pretend we have a known breakage" "false" test_expect_success "pretend we have a passing test" "true" @@ -149,7 +149,7 @@ test_expect_success 'pretend we have fixed one of two known breakages (run in su ' test_expect_success 'pretend we have a pass, fail, and known breakage' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ mixed-results1 "mixed results #1" <<-\EOF && test_expect_success "passing test" "true" test_expect_success "failing test" "false" @@ -168,7 +168,7 @@ test_expect_success 'pretend we have a pass, fail, and known breakage' ' ' test_expect_success 'pretend we have a mix of all possible results' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ mixed-results2 "mixed results #2" <<-\EOF && test_expect_success "passing test" "true" test_expect_success "passing test" "true" @@ -204,7 +204,7 @@ test_expect_success 'pretend we have a mix of all possible results' ' ' test_expect_success 'test --verbose' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ t1234-verbose "test verbose" --verbose <<-\EOF && test_expect_success "passing test" true test_expect_success "test with output" "echo foo" @@ -231,7 +231,7 @@ test_expect_success 'test --verbose' ' ' test_expect_success 'test --verbose-only' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ t2345-verbose-only-2 "test verbose-only=2" \ --verbose-only=2 <<-\EOF && test_expect_success "passing test" true @@ -255,7 +255,7 @@ test_expect_success 'test --verbose-only' ' test_expect_success 'GIT_SKIP_TESTS' ' ( - run_sub_test_lib_test git-skip-tests-basic \ + write_and_run_sub_test_lib_test git-skip-tests-basic \ "GIT_SKIP_TESTS" \ --skip="git.2" <<-\EOF && for i in 1 2 3 @@ -276,7 +276,7 @@ test_expect_success 'GIT_SKIP_TESTS' ' test_expect_success 'GIT_SKIP_TESTS several tests' ' ( - run_sub_test_lib_test git-skip-tests-several \ + write_and_run_sub_test_lib_test git-skip-tests-several \ "GIT_SKIP_TESTS several tests" \ --skip="git.2 git.5" <<-\EOF && for i in 1 2 3 4 5 6 @@ -300,7 +300,7 @@ test_expect_success 'GIT_SKIP_TESTS several tests' ' test_expect_success 'GIT_SKIP_TESTS sh pattern' ' ( - run_sub_test_lib_test git-skip-tests-sh-pattern \ + write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \ "GIT_SKIP_TESTS sh pattern" \ --skip="git.[2-5]" <<-\EOF && for i in 1 2 3 4 5 6 @@ -324,7 +324,7 @@ test_expect_success 'GIT_SKIP_TESTS sh pattern' ' test_expect_success 'GIT_SKIP_TESTS entire suite' ' ( - run_sub_test_lib_test git-skip-tests-entire-suite \ + write_and_run_sub_test_lib_test git-skip-tests-entire-suite \ "GIT_SKIP_TESTS entire suite" \ --skip="git" <<-\EOF && for i in 1 2 3 @@ -341,7 +341,7 @@ test_expect_success 'GIT_SKIP_TESTS entire suite' ' test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' ' ( - run_sub_test_lib_test git-skip-tests-unmatched-suite \ + write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \ "GIT_SKIP_TESTS does not skip unmatched suite" \ --skip="notgit" <<-\EOF && for i in 1 2 3 @@ -361,7 +361,7 @@ test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' ' ' test_expect_success '--run basic' ' - run_sub_test_lib_test run-basic \ + write_and_run_sub_test_lib_test run-basic \ "--run basic" --run="1,3,5" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -382,7 +382,7 @@ test_expect_success '--run basic' ' ' test_expect_success '--run with a range' ' - run_sub_test_lib_test run-range \ + write_and_run_sub_test_lib_test run-range \ "--run with a range" --run="1-3" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -403,7 +403,7 @@ test_expect_success '--run with a range' ' ' test_expect_success '--run with two ranges' ' - run_sub_test_lib_test run-two-ranges \ + write_and_run_sub_test_lib_test run-two-ranges \ "--run with two ranges" --run="1-2,5-6" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -424,7 +424,7 @@ test_expect_success '--run with two ranges' ' ' test_expect_success '--run with a left open range' ' - run_sub_test_lib_test run-left-open-range \ + write_and_run_sub_test_lib_test run-left-open-range \ "--run with a left open range" --run="-3" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -445,7 +445,7 @@ test_expect_success '--run with a left open range' ' ' test_expect_success '--run with a right open range' ' - run_sub_test_lib_test run-right-open-range \ + write_and_run_sub_test_lib_test run-right-open-range \ "--run with a right open range" --run="4-" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -466,7 +466,7 @@ test_expect_success '--run with a right open range' ' ' test_expect_success '--run with basic negation' ' - run_sub_test_lib_test run-basic-neg \ + write_and_run_sub_test_lib_test run-basic-neg \ "--run with basic negation" --run="!3" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -487,7 +487,7 @@ test_expect_success '--run with basic negation' ' ' test_expect_success '--run with two negations' ' - run_sub_test_lib_test run-two-neg \ + write_and_run_sub_test_lib_test run-two-neg \ "--run with two negations" --run="!3,!6" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -508,7 +508,7 @@ test_expect_success '--run with two negations' ' ' test_expect_success '--run a range and negation' ' - run_sub_test_lib_test run-range-and-neg \ + write_and_run_sub_test_lib_test run-range-and-neg \ "--run a range and negation" --run="-4,!2" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -529,7 +529,7 @@ test_expect_success '--run a range and negation' ' ' test_expect_success '--run range negation' ' - run_sub_test_lib_test run-range-neg \ + write_and_run_sub_test_lib_test run-range-neg \ "--run range negation" --run="!1-3" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -550,7 +550,7 @@ test_expect_success '--run range negation' ' ' test_expect_success '--run include, exclude and include' ' - run_sub_test_lib_test run-inc-neg-inc \ + write_and_run_sub_test_lib_test run-inc-neg-inc \ "--run include, exclude and include" \ --run="1-5,!1-3,2" <<-\EOF && for i in 1 2 3 4 5 6 @@ -572,7 +572,7 @@ test_expect_success '--run include, exclude and include' ' ' test_expect_success '--run include, exclude and include, comma separated' ' - run_sub_test_lib_test run-inc-neg-inc-comma \ + write_and_run_sub_test_lib_test run-inc-neg-inc-comma \ "--run include, exclude and include, comma separated" \ --run=1-5,!1-3,2 <<-\EOF && for i in 1 2 3 4 5 6 @@ -594,7 +594,7 @@ test_expect_success '--run include, exclude and include, comma separated' ' ' test_expect_success '--run exclude and include' ' - run_sub_test_lib_test run-neg-inc \ + write_and_run_sub_test_lib_test run-neg-inc \ "--run exclude and include" \ --run="!3-,5" <<-\EOF && for i in 1 2 3 4 5 6 @@ -616,7 +616,7 @@ test_expect_success '--run exclude and include' ' ' test_expect_success '--run empty selectors' ' - run_sub_test_lib_test run-empty-sel \ + write_and_run_sub_test_lib_test run-empty-sel \ "--run empty selectors" \ --run="1,,3,,,5" <<-\EOF && for i in 1 2 3 4 5 6 @@ -638,7 +638,7 @@ test_expect_success '--run empty selectors' ' ' test_expect_success '--run substring selector' ' - run_sub_test_lib_test run-substring-selector \ + write_and_run_sub_test_lib_test run-substring-selector \ "--run empty selectors" \ --run="relevant" <<-\EOF && test_expect_success "relevant test" "true" @@ -662,7 +662,7 @@ test_expect_success '--run substring selector' ' ' test_expect_success '--run keyword selection' ' - run_sub_test_lib_test_err run-inv-range-start \ + write_and_run_sub_test_lib_test_err run-inv-range-start \ "--run invalid range start" \ --run="a-5" <<-\EOF && test_expect_success "passing test #1" "true" @@ -677,7 +677,7 @@ test_expect_success '--run keyword selection' ' ' test_expect_success '--run invalid range end' ' - run_sub_test_lib_test_err run-inv-range-end \ + write_and_run_sub_test_lib_test_err run-inv-range-end \ "--run invalid range end" \ --run="1-z" <<-\EOF && test_expect_success "passing test #1" "true" @@ -692,7 +692,7 @@ test_expect_success '--run invalid range end' ' ' test_expect_success 'tests respect prerequisites' ' - run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF && + write_and_run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF && test_set_prereq HAVEIT test_expect_success HAVEIT "prereq is satisfied" "true" @@ -722,7 +722,7 @@ test_expect_success 'tests respect prerequisites' ' ' test_expect_success 'tests respect lazy prerequisites' ' - run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF && + write_and_run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF && test_lazy_prereq LAZY_TRUE true test_expect_success LAZY_TRUE "lazy prereq is satisifed" "true" @@ -746,7 +746,7 @@ test_expect_success 'tests respect lazy prerequisites' ' ' test_expect_success 'nested lazy prerequisites' ' - run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF && + write_and_run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF && test_lazy_prereq NESTED_INNER " >inner && @@ -772,7 +772,7 @@ test_expect_success 'nested lazy prerequisites' ' ' test_expect_success 'lazy prereqs do not turn off tracing' ' - run_sub_test_lib_test lazy-prereq-and-tracing \ + write_and_run_sub_test_lib_test lazy-prereq-and-tracing \ "lazy prereqs and -x" -v -x <<-\EOF && test_lazy_prereq LAZY true @@ -785,7 +785,7 @@ test_expect_success 'lazy prereqs do not turn off tracing' ' ' test_expect_success 'tests clean up after themselves' ' - run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF && + write_and_run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF && clean=no test_expect_success "do cleanup" " test_when_finished clean=yes @@ -805,7 +805,7 @@ test_expect_success 'tests clean up after themselves' ' ' test_expect_success 'tests clean up even on failures' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ failing-cleanup "Failing tests with cleanup commands" <<-\EOF && test_expect_success "tests clean up even after a failure" " touch clean-after-failure && @@ -834,7 +834,7 @@ test_expect_success 'tests clean up even on failures' ' ' test_expect_success 'test_atexit is run' ' - run_sub_test_lib_test_err \ + write_and_run_sub_test_lib_test_err \ atexit-cleanup "Run atexit commands" -i <<-\EOF && test_expect_success "tests clean up even after a failure" " > ../../clean-atexit && From 833a626105faade4f5e961f7119f6461de6339e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 00:57:42 +0200 Subject: [PATCH 064/198] test-lib tests: stop using a subshell in write_sub_test_lib_test() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that this function doesn't handle running the test anymore we can do away with the sub-shell, which was used to scope an "unset" and "export" shell variables. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/lib-subtest.sh | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 58ae316ad5fc25..21fa570d0b25b0 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -1,20 +1,17 @@ write_sub_test_lib_test () { name="$1" descr="$2" # stdin is the body of the test code mkdir "$name" && - ( - cd "$name" && - write_script "$name.sh" "$TEST_SHELL_PATH" <<-EOF && - test_description='$descr (run in sub test-lib) + write_script "$name/$name.sh" "$TEST_SHELL_PATH" <<-EOF && + test_description='$descr (run in sub test-lib) - This is run in a sub test-lib so that we do not get incorrect - passing metrics - ' + This is run in a sub test-lib so that we do not get incorrect + passing metrics + ' - # Point to the t/test-lib.sh, which isn't in ../ as usual - . "\$TEST_DIRECTORY"/test-lib.sh - EOF - cat >>"$name.sh" - ) + # Point to the t/test-lib.sh, which isn't in ../ as usual + . "\$TEST_DIRECTORY"/test-lib.sh + EOF + cat >>"$name/$name.sh" } _run_sub_test_lib_test_common () { From 57b10b205a4fcdbf6a0fcf6c56b706837fce8ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 00:57:43 +0200 Subject: [PATCH 065/198] test-lib tests: don't provide a description for the sub-tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the $test_description provided for the generated subtests to be constant, since the only purpose of having it is that test-lib.sh will barf if it isn't supplied. The other purpose of having it was to effectively split up the test description between the argument to test_expect_success and the argument to "write_and_run_sub_test_lib_test". Let's only use one of the two. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/lib-subtest.sh | 12 ++-- t/t0000-basic.sh | 142 +++++++++++++++++++++-------------------------- 2 files changed, 66 insertions(+), 88 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 21fa570d0b25b0..cdbc2d933206bb 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -1,12 +1,8 @@ write_sub_test_lib_test () { - name="$1" descr="$2" # stdin is the body of the test code + name="$1" # stdin is the body of the test code mkdir "$name" && write_script "$name/$name.sh" "$TEST_SHELL_PATH" <<-EOF && - test_description='$descr (run in sub test-lib) - - This is run in a sub test-lib so that we do not get incorrect - passing metrics - ' + test_description='A test of test-lib.sh itself' # Point to the t/test-lib.sh, which isn't in ../ as usual . "\$TEST_DIRECTORY"/test-lib.sh @@ -15,8 +11,8 @@ write_sub_test_lib_test () { } _run_sub_test_lib_test_common () { - neg="$1" name="$2" descr="$3" # stdin is the body of the test code - shift 3 + neg="$1" name="$2" # stdin is the body of the test code + shift 2 # intercept pseudo-options at the front of the argument list that we # will not pass to child script diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index a7c5aaacab6bc5..6fdd5f43caecc3 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -67,8 +67,8 @@ test_expect_success 'success is reported like this' ' : ' -test_expect_success 'pretend we have a fully passing test suite' ' - write_and_run_sub_test_lib_test full-pass "3 passing tests" <<-\EOF && +test_expect_success 'subtest: 3 passing tests' ' + write_and_run_sub_test_lib_test full-pass <<-\EOF && for i in 1 2 3 do test_expect_success "passing test #$i" "true" @@ -84,9 +84,8 @@ test_expect_success 'pretend we have a fully passing test suite' ' EOF ' -test_expect_success 'pretend we have a partially passing test suite' ' - write_and_run_sub_test_lib_test_err \ - partial-pass "2/3 tests passing" <<-\EOF && +test_expect_success 'subtest: 2/3 tests passing' ' + write_and_run_sub_test_lib_test_err partial-pass <<-\EOF && test_expect_success "passing test #1" "true" test_expect_success "failing test #2" "false" test_expect_success "passing test #3" "true" @@ -102,8 +101,8 @@ test_expect_success 'pretend we have a partially passing test suite' ' EOF ' -test_expect_success 'pretend we have a known breakage' ' - write_and_run_sub_test_lib_test failing-todo "A failing TODO test" <<-\EOF && +test_expect_success 'subtest: a failing TODO test' ' + write_and_run_sub_test_lib_test failing-todo <<-\EOF && test_expect_success "passing test" "true" test_expect_failure "pretend we have a known breakage" "false" test_done @@ -117,8 +116,8 @@ test_expect_success 'pretend we have a known breakage' ' EOF ' -test_expect_success 'pretend we have fixed a known breakage' ' - write_and_run_sub_test_lib_test passing-todo "A passing TODO test" <<-\EOF && +test_expect_success 'subtest: a passing TODO test' ' + write_and_run_sub_test_lib_test passing-todo <<-\EOF && test_expect_failure "pretend we have fixed a known breakage" "true" test_done EOF @@ -129,9 +128,8 @@ test_expect_success 'pretend we have fixed a known breakage' ' EOF ' -test_expect_success 'pretend we have fixed one of two known breakages (run in sub test-lib)' ' - write_and_run_sub_test_lib_test partially-passing-todos \ - "2 TODO tests, one passing" <<-\EOF && +test_expect_success 'subtest: 2 TODO tests, one passin' ' + write_and_run_sub_test_lib_test partially-passing-todos <<-\EOF && test_expect_failure "pretend we have a known breakage" "false" test_expect_success "pretend we have a passing test" "true" test_expect_failure "pretend we have fixed another known breakage" "true" @@ -148,9 +146,8 @@ test_expect_success 'pretend we have fixed one of two known breakages (run in su EOF ' -test_expect_success 'pretend we have a pass, fail, and known breakage' ' - write_and_run_sub_test_lib_test_err \ - mixed-results1 "mixed results #1" <<-\EOF && +test_expect_success 'subtest: mixed results: pass, failure and a TODO test' ' + write_and_run_sub_test_lib_test_err mixed-results1 <<-\EOF && test_expect_success "passing test" "true" test_expect_success "failing test" "false" test_expect_failure "pretend we have a known breakage" "false" @@ -167,9 +164,8 @@ test_expect_success 'pretend we have a pass, fail, and known breakage' ' EOF ' -test_expect_success 'pretend we have a mix of all possible results' ' - write_and_run_sub_test_lib_test_err \ - mixed-results2 "mixed results #2" <<-\EOF && +test_expect_success 'subtest: mixed results: a mixture of all possible results' ' + write_and_run_sub_test_lib_test_err mixed-results2 <<-\EOF && test_expect_success "passing test" "true" test_expect_success "passing test" "true" test_expect_success "passing test" "true" @@ -203,9 +199,8 @@ test_expect_success 'pretend we have a mix of all possible results' ' EOF ' -test_expect_success 'test --verbose' ' - write_and_run_sub_test_lib_test_err \ - t1234-verbose "test verbose" --verbose <<-\EOF && +test_expect_success 'subtest: --verbose option' ' + write_and_run_sub_test_lib_test_err t1234-verbose --verbose <<-\EOF && test_expect_success "passing test" true test_expect_success "test with output" "echo foo" test_expect_success "failing test" false @@ -230,9 +225,9 @@ test_expect_success 'test --verbose' ' EOF ' -test_expect_success 'test --verbose-only' ' +test_expect_success 'subtest: --verbose-only option' ' write_and_run_sub_test_lib_test_err \ - t2345-verbose-only-2 "test verbose-only=2" \ + t2345-verbose-only-2 \ --verbose-only=2 <<-\EOF && test_expect_success "passing test" true test_expect_success "test with output" "echo foo" @@ -253,10 +248,9 @@ test_expect_success 'test --verbose-only' ' EOF ' -test_expect_success 'GIT_SKIP_TESTS' ' +test_expect_success 'subtest: skip one with GIT_SKIP_TESTS' ' ( write_and_run_sub_test_lib_test git-skip-tests-basic \ - "GIT_SKIP_TESTS" \ --skip="git.2" <<-\EOF && for i in 1 2 3 do @@ -274,10 +268,9 @@ test_expect_success 'GIT_SKIP_TESTS' ' ) ' -test_expect_success 'GIT_SKIP_TESTS several tests' ' +test_expect_success 'subtest: skip several with GIT_SKIP_TESTS' ' ( write_and_run_sub_test_lib_test git-skip-tests-several \ - "GIT_SKIP_TESTS several tests" \ --skip="git.2 git.5" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -298,10 +291,9 @@ test_expect_success 'GIT_SKIP_TESTS several tests' ' ) ' -test_expect_success 'GIT_SKIP_TESTS sh pattern' ' +test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' ' ( write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \ - "GIT_SKIP_TESTS sh pattern" \ --skip="git.[2-5]" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -322,10 +314,9 @@ test_expect_success 'GIT_SKIP_TESTS sh pattern' ' ) ' -test_expect_success 'GIT_SKIP_TESTS entire suite' ' +test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' ' ( write_and_run_sub_test_lib_test git-skip-tests-entire-suite \ - "GIT_SKIP_TESTS entire suite" \ --skip="git" <<-\EOF && for i in 1 2 3 do @@ -339,10 +330,9 @@ test_expect_success 'GIT_SKIP_TESTS entire suite' ' ) ' -test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' ' +test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' ' ( write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \ - "GIT_SKIP_TESTS does not skip unmatched suite" \ --skip="notgit" <<-\EOF && for i in 1 2 3 do @@ -360,9 +350,8 @@ test_expect_success 'GIT_SKIP_TESTS does not skip unmatched suite' ' ) ' -test_expect_success '--run basic' ' - write_and_run_sub_test_lib_test run-basic \ - "--run basic" --run="1,3,5" <<-\EOF && +test_expect_success 'subtest: --run basic' ' + write_and_run_sub_test_lib_test run-basic --run="1,3,5" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -381,9 +370,9 @@ test_expect_success '--run basic' ' EOF ' -test_expect_success '--run with a range' ' +test_expect_success 'subtest: --run with a range' ' write_and_run_sub_test_lib_test run-range \ - "--run with a range" --run="1-3" <<-\EOF && + --run="1-3" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -402,9 +391,9 @@ test_expect_success '--run with a range' ' EOF ' -test_expect_success '--run with two ranges' ' +test_expect_success 'subtest: --run with two ranges' ' write_and_run_sub_test_lib_test run-two-ranges \ - "--run with two ranges" --run="1-2,5-6" <<-\EOF && + --run="1-2,5-6" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -423,9 +412,9 @@ test_expect_success '--run with two ranges' ' EOF ' -test_expect_success '--run with a left open range' ' +test_expect_success 'subtest: --run with a left open range' ' write_and_run_sub_test_lib_test run-left-open-range \ - "--run with a left open range" --run="-3" <<-\EOF && + --run="-3" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -444,9 +433,9 @@ test_expect_success '--run with a left open range' ' EOF ' -test_expect_success '--run with a right open range' ' +test_expect_success 'subtest: --run with a right open range' ' write_and_run_sub_test_lib_test run-right-open-range \ - "--run with a right open range" --run="4-" <<-\EOF && + --run="4-" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -465,9 +454,9 @@ test_expect_success '--run with a right open range' ' EOF ' -test_expect_success '--run with basic negation' ' +test_expect_success 'subtest: --run with basic negation' ' write_and_run_sub_test_lib_test run-basic-neg \ - "--run with basic negation" --run="!3" <<-\EOF && + --run="!3" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -486,9 +475,9 @@ test_expect_success '--run with basic negation' ' EOF ' -test_expect_success '--run with two negations' ' +test_expect_success 'subtest: --run with two negations' ' write_and_run_sub_test_lib_test run-two-neg \ - "--run with two negations" --run="!3,!6" <<-\EOF && + --run="!3,!6" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -507,9 +496,9 @@ test_expect_success '--run with two negations' ' EOF ' -test_expect_success '--run a range and negation' ' +test_expect_success 'subtest: --run a range and negation' ' write_and_run_sub_test_lib_test run-range-and-neg \ - "--run a range and negation" --run="-4,!2" <<-\EOF && + --run="-4,!2" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -528,9 +517,9 @@ test_expect_success '--run a range and negation' ' EOF ' -test_expect_success '--run range negation' ' +test_expect_success 'subtest: --run range negation' ' write_and_run_sub_test_lib_test run-range-neg \ - "--run range negation" --run="!1-3" <<-\EOF && + --run="!1-3" <<-\EOF && for i in 1 2 3 4 5 6 do test_expect_success "passing test #$i" "true" @@ -549,9 +538,8 @@ test_expect_success '--run range negation' ' EOF ' -test_expect_success '--run include, exclude and include' ' +test_expect_success 'subtest: --run include, exclude and include' ' write_and_run_sub_test_lib_test run-inc-neg-inc \ - "--run include, exclude and include" \ --run="1-5,!1-3,2" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -571,9 +559,8 @@ test_expect_success '--run include, exclude and include' ' EOF ' -test_expect_success '--run include, exclude and include, comma separated' ' +test_expect_success 'subtest: --run include, exclude and include, comma separated' ' write_and_run_sub_test_lib_test run-inc-neg-inc-comma \ - "--run include, exclude and include, comma separated" \ --run=1-5,!1-3,2 <<-\EOF && for i in 1 2 3 4 5 6 do @@ -593,9 +580,8 @@ test_expect_success '--run include, exclude and include, comma separated' ' EOF ' -test_expect_success '--run exclude and include' ' +test_expect_success 'subtest: --run exclude and include' ' write_and_run_sub_test_lib_test run-neg-inc \ - "--run exclude and include" \ --run="!3-,5" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -615,9 +601,8 @@ test_expect_success '--run exclude and include' ' EOF ' -test_expect_success '--run empty selectors' ' +test_expect_success 'subtest: --run empty selectors' ' write_and_run_sub_test_lib_test run-empty-sel \ - "--run empty selectors" \ --run="1,,3,,,5" <<-\EOF && for i in 1 2 3 4 5 6 do @@ -637,9 +622,8 @@ test_expect_success '--run empty selectors' ' EOF ' -test_expect_success '--run substring selector' ' +test_expect_success 'subtest: --run substring selector' ' write_and_run_sub_test_lib_test run-substring-selector \ - "--run empty selectors" \ --run="relevant" <<-\EOF && test_expect_success "relevant test" "true" for i in 1 2 3 4 5 6 @@ -661,9 +645,8 @@ test_expect_success '--run substring selector' ' EOF ' -test_expect_success '--run keyword selection' ' +test_expect_success 'subtest: --run keyword selection' ' write_and_run_sub_test_lib_test_err run-inv-range-start \ - "--run invalid range start" \ --run="a-5" <<-\EOF && test_expect_success "passing test #1" "true" test_done @@ -676,9 +659,8 @@ test_expect_success '--run keyword selection' ' EOF_ERR ' -test_expect_success '--run invalid range end' ' +test_expect_success 'subtest: --run invalid range end' ' write_and_run_sub_test_lib_test_err run-inv-range-end \ - "--run invalid range end" \ --run="1-z" <<-\EOF && test_expect_success "passing test #1" "true" test_done @@ -691,8 +673,8 @@ test_expect_success '--run invalid range end' ' EOF_ERR ' -test_expect_success 'tests respect prerequisites' ' - write_and_run_sub_test_lib_test prereqs "tests respect prereqs" <<-\EOF && +test_expect_success 'subtest: tests respect prerequisites' ' + write_and_run_sub_test_lib_test prereqs <<-\EOF && test_set_prereq HAVEIT test_expect_success HAVEIT "prereq is satisfied" "true" @@ -721,8 +703,8 @@ test_expect_success 'tests respect prerequisites' ' EOF ' -test_expect_success 'tests respect lazy prerequisites' ' - write_and_run_sub_test_lib_test lazy-prereqs "respect lazy prereqs" <<-\EOF && +test_expect_success 'subtest: tests respect lazy prerequisites' ' + write_and_run_sub_test_lib_test lazy-prereqs <<-\EOF && test_lazy_prereq LAZY_TRUE true test_expect_success LAZY_TRUE "lazy prereq is satisifed" "true" @@ -745,8 +727,8 @@ test_expect_success 'tests respect lazy prerequisites' ' EOF ' -test_expect_success 'nested lazy prerequisites' ' - write_and_run_sub_test_lib_test nested-lazy "nested lazy prereqs" <<-\EOF && +test_expect_success 'subtest: nested lazy prerequisites' ' + write_and_run_sub_test_lib_test nested-lazy <<-\EOF && test_lazy_prereq NESTED_INNER " >inner && @@ -771,9 +753,9 @@ test_expect_success 'nested lazy prerequisites' ' EOF ' -test_expect_success 'lazy prereqs do not turn off tracing' ' +test_expect_success 'subtest: lazy prereqs do not turn off tracing' ' write_and_run_sub_test_lib_test lazy-prereq-and-tracing \ - "lazy prereqs and -x" -v -x <<-\EOF && + -v -x <<-\EOF && test_lazy_prereq LAZY true test_expect_success lazy "test_have_prereq LAZY && echo trace" @@ -784,8 +766,8 @@ test_expect_success 'lazy prereqs do not turn off tracing' ' grep "echo trace" lazy-prereq-and-tracing/err ' -test_expect_success 'tests clean up after themselves' ' - write_and_run_sub_test_lib_test cleanup "test with cleanup" <<-\EOF && +test_expect_success 'subtest: tests clean up after themselves' ' + write_and_run_sub_test_lib_test cleanup <<-\EOF && clean=no test_expect_success "do cleanup" " test_when_finished clean=yes @@ -804,9 +786,9 @@ test_expect_success 'tests clean up after themselves' ' EOF ' -test_expect_success 'tests clean up even on failures' ' +test_expect_success 'subtest: tests clean up even on failures' ' write_and_run_sub_test_lib_test_err \ - failing-cleanup "Failing tests with cleanup commands" <<-\EOF && + failing-cleanup <<-\EOF && test_expect_success "tests clean up even after a failure" " touch clean-after-failure && test_when_finished rm clean-after-failure && @@ -833,9 +815,9 @@ test_expect_success 'tests clean up even on failures' ' EOF ' -test_expect_success 'test_atexit is run' ' +test_expect_success 'subtest: test_atexit is run' ' write_and_run_sub_test_lib_test_err \ - atexit-cleanup "Run atexit commands" -i <<-\EOF && + atexit-cleanup -i <<-\EOF && test_expect_success "tests clean up even after a failure" " > ../../clean-atexit && test_atexit rm ../../clean-atexit && From 5a1fc39dc7b630900a15e8a9cd5eb42f9539e43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 14:54:59 +0200 Subject: [PATCH 066/198] progress.c tests: make start/stop verbs on stdin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the usage of the "test-tool progress" introduced in 2bb74b53a49 (Test the progress display, 2019-09-16) to take command like "start" and "stop" on stdin, instead of running them implicitly. This makes for tests that are easier to read, since the recipe will mirror the API usage, and allows for easily testing invalid usage that would yield (or should yield) a BUG(), e.g. providing two "start" calls in a row. A subsequent commit will add such stress tests. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-progress.c | 43 +++++++++++++++++++-------- t/t0500-progress-display.sh | 59 +++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 35 deletions(-) diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c index 5d05cbe7894097..685c0a7c49a20d 100644 --- a/t/helper/test-progress.c +++ b/t/helper/test-progress.c @@ -3,6 +3,9 @@ * * Reads instructions from standard input, one instruction per line: * + * "start[ [ ]]" - Call start_progress(title, total), + * when "start" use a title of + * "Working hard" with a total of 0. * "progress <items>" - Call display_progress() with the given item count * as parameter. * "throughput <bytes> <millis> - Call display_throughput() with the given @@ -10,6 +13,7 @@ * specify the time elapsed since the * start_progress() call. * "update" - Set the 'progress_update' flag. + * "stop" - Call stop_progress(). * * See 't0500-progress-display.sh' for examples. */ @@ -22,31 +26,41 @@ int cmd__progress(int argc, const char **argv) { - int total = 0; - const char *title; + const char *default_title = "Working hard"; + char *detached_title = NULL; struct strbuf line = STRBUF_INIT; - struct progress *progress; + struct progress *progress = NULL; const char *usage[] = { - "test-tool progress [--total=<n>] <progress-title>", + "test-tool progress <stdin", NULL }; struct option options[] = { - OPT_INTEGER(0, "total", &total, "total number of items"), OPT_END(), }; argc = parse_options(argc, argv, NULL, options, usage, 0); - if (argc != 1) - die("need a title for the progress output"); - title = argv[0]; + if (argc) + usage_with_options(usage, options); progress_testing = 1; - progress = start_progress(title, total); while (strbuf_getline(&line, stdin) != EOF) { char *end; - if (skip_prefix(line.buf, "progress ", (const char **) &end)) { + if (!strcmp(line.buf, "start")) { + progress = start_progress(default_title, 0); + } else if (skip_prefix(line.buf, "start ", (const char **) &end)) { + uint64_t total = strtoull(end, &end, 10); + if (*end == '\0') { + progress = start_progress(default_title, total); + } else if (*end == ' ') { + free(detached_title); + detached_title = strbuf_detach(&line, NULL); + progress = start_progress(end + 1, total); + } else { + die("invalid input: '%s'\n", line.buf); + } + } else if (skip_prefix(line.buf, "progress ", (const char **) &end)) { uint64_t item_count = strtoull(end, &end, 10); if (*end != '\0') die("invalid input: '%s'\n", line.buf); @@ -63,12 +77,15 @@ int cmd__progress(int argc, const char **argv) die("invalid input: '%s'\n", line.buf); progress_test_ns = test_ms * 1000 * 1000; display_throughput(progress, byte_count); - } else if (!strcmp(line.buf, "update")) + } else if (!strcmp(line.buf, "update")) { progress_test_force_update(); - else + } else if (!strcmp(line.buf, "stop")) { + stop_progress(&progress); + } else { die("invalid input: '%s'\n", line.buf); + } } - stop_progress(&progress); + free(detached_title); return 0; } diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index 22058b503ac78c..ca96ac1fa556de 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -17,6 +17,7 @@ test_expect_success 'simple progress display' ' EOF cat >in <<-\EOF && + start 0 update progress 1 update @@ -25,8 +26,9 @@ test_expect_success 'simple progress display' ' progress 4 update progress 5 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -41,11 +43,13 @@ test_expect_success 'progress display with total' ' EOF cat >in <<-\EOF && + start 3 progress 1 progress 2 progress 3 + stop EOF - test-tool progress --total=3 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -62,14 +66,14 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6 progress 100 progress 1000 progress 10000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -88,16 +92,15 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && - update + start 100000 Working hard.......2.........3.........4.........5.........6 progress 1 update progress 2 progress 10000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -116,14 +119,14 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6 progress 25000 progress 50000 progress 75000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -140,14 +143,14 @@ Working hard.......2.........3.........4.........5.........6.........7.........: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6.........7......... progress 25000 progress 50000 progress 75000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6.........7........." \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -164,12 +167,14 @@ test_expect_success 'progress shortens - crazy caller' ' EOF cat >in <<-\EOF && + start 1000 progress 100 progress 200 progress 1 progress 1000 + stop EOF - test-tool progress --total=1000 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -185,6 +190,7 @@ test_expect_success 'progress display with throughput' ' EOF cat >in <<-\EOF && + start throughput 102400 1000 update progress 10 @@ -197,8 +203,9 @@ test_expect_success 'progress display with throughput' ' throughput 409600 4000 update progress 40 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -214,6 +221,7 @@ test_expect_success 'progress display with throughput and total' ' EOF cat >in <<-\EOF && + start 40 throughput 102400 1000 progress 10 throughput 204800 2000 @@ -222,8 +230,9 @@ test_expect_success 'progress display with throughput and total' ' progress 30 throughput 409600 4000 progress 40 + stop EOF - test-tool progress --total=40 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -239,6 +248,7 @@ test_expect_success 'cover up after throughput shortens' ' EOF cat >in <<-\EOF && + start throughput 409600 1000 update progress 1 @@ -251,8 +261,9 @@ test_expect_success 'cover up after throughput shortens' ' throughput 1638400 4000 update progress 4 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -267,6 +278,7 @@ test_expect_success 'cover up after throughput shortens a lot' ' EOF cat >in <<-\EOF && + start throughput 1 1000 update progress 1 @@ -276,8 +288,9 @@ test_expect_success 'cover up after throughput shortens a lot' ' throughput 3145728 3000 update progress 3 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -285,6 +298,7 @@ test_expect_success 'cover up after throughput shortens a lot' ' test_expect_success 'progress generates traces' ' cat >in <<-\EOF && + start 40 throughput 102400 1000 update progress 10 @@ -297,10 +311,11 @@ test_expect_success 'progress generates traces' ' throughput 409600 4000 update progress 40 + stop EOF - GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress --total=40 \ - "Working hard" <in 2>stderr && + GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress \ + <in 2>stderr && # t0212/parse_events.perl intentionally omits regions and data. test_region progress "Working hard" trace.event && From e3940c1f60ae5d19984848d484858f17e4892ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:00 +0200 Subject: [PATCH 067/198] progress.c tests: test some invalid usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test what happens when we "stop" without a "start", omit the "stop" after a "start", or try to start two concurrent progress bars. This extends the trace2 tests added in 98a13647408 (trace2: log progress time and throughput, 2020-05-12). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0500-progress-display.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index ca96ac1fa556de..ffa819ca1db95a 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -323,4 +323,37 @@ test_expect_success 'progress generates traces' ' grep "\"key\":\"total_bytes\",\"value\":\"409600\"" trace.event ' +test_expect_success 'progress generates traces: stop / start' ' + cat >in <<-\EOF && + start + stop + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-startstop.event" test-tool progress \ + <in 2>stderr && + test_region progress "Working hard" trace-startstop.event +' + +test_expect_success 'progress generates traces: start without stop' ' + cat >in <<-\EOF && + start + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-start.event" test-tool progress \ + <in 2>stderr && + grep region_enter.*progress trace-start.event && + ! grep region_leave.*progress trace-start.event +' + +test_expect_success 'progress generates traces: stop without start' ' + cat >in <<-\EOF && + stop + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-stop.event" test-tool progress \ + <in 2>stderr && + ! grep region_enter.*progress trace-stop.event && + ! grep region_leave.*progress trace-stop.event +' + test_done From 06b6d15050e105d6b881a5f821cbe027c67c9569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:01 +0200 Subject: [PATCH 068/198] progress.c: move signal handler functions lower MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the signal handler functions to just before the start_progress_delay() where they'll be referenced, instead of having them at the top of the file. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 92 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/progress.c b/progress.c index 680c6a8bf93b51..893cb0fe56f321 100644 --- a/progress.c +++ b/progress.c @@ -53,50 +53,6 @@ static volatile sig_atomic_t progress_update; */ int progress_testing; uint64_t progress_test_ns = 0; -void progress_test_force_update(void) -{ - progress_update = 1; -} - - -static void progress_interval(int signum) -{ - progress_update = 1; -} - -static void set_progress_signal(void) -{ - struct sigaction sa; - struct itimerval v; - - if (progress_testing) - return; - - progress_update = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = progress_interval; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - sigaction(SIGALRM, &sa, NULL); - - v.it_interval.tv_sec = 1; - v.it_interval.tv_usec = 0; - v.it_value = v.it_interval; - setitimer(ITIMER_REAL, &v, NULL); -} - -static void clear_progress_signal(void) -{ - struct itimerval v = {{0,},}; - - if (progress_testing) - return; - - setitimer(ITIMER_REAL, &v, NULL); - signal(SIGALRM, SIG_IGN); - progress_update = 0; -} static int is_foreground_fd(int fd) { @@ -249,6 +205,54 @@ void display_progress(struct progress *progress, uint64_t n) display(progress, n, NULL); } +static void progress_interval(int signum) +{ + progress_update = 1; +} + +/* + * The progress_test_force_update() function is intended for testing + * the progress output, i.e. exclusively for 'test-tool progress'. + */ +void progress_test_force_update(void) +{ + progress_update = 1; +} + +static void set_progress_signal(void) +{ + struct sigaction sa; + struct itimerval v; + + if (progress_testing) + return; + + progress_update = 0; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = progress_interval; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sigaction(SIGALRM, &sa, NULL); + + v.it_interval.tv_sec = 1; + v.it_interval.tv_usec = 0; + v.it_value = v.it_interval; + setitimer(ITIMER_REAL, &v, NULL); +} + +static void clear_progress_signal(void) +{ + struct itimerval v = {{0,},}; + + if (progress_testing) + return; + + setitimer(ITIMER_REAL, &v, NULL); + signal(SIGALRM, SIG_IGN); + progress_update = 0; +} + static struct progress *start_progress_delay(const char *title, uint64_t total, unsigned delay, unsigned sparse) { From 7133df11c2f3a20d7511285fba19895adedf475c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:02 +0200 Subject: [PATCH 069/198] progress.c: call progress_interval() from progress_test_force_update() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the progress_test_force_update() function in terms of progress_interval(). For documentation purposes these two functions have the same body, but different names. Let's just define the test function by calling progress_interval() with SIGALRM ourselves. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/progress.c b/progress.c index 893cb0fe56f321..7fcc513717a262 100644 --- a/progress.c +++ b/progress.c @@ -216,7 +216,7 @@ static void progress_interval(int signum) */ void progress_test_force_update(void) { - progress_update = 1; + progress_interval(SIGALRM); } static void set_progress_signal(void) From 0b5066cbef96ca33aa807d90191b0780cbdadcc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:03 +0200 Subject: [PATCH 070/198] progress.c: stop eagerly fflush(stderr) when not a terminal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the clear intention of the combination of 137a0d0ef56 (Flush progress message buffer in display()., 2007-11-19) and 85cb8906f0e (progress: no progress in background, 2015-04-13) to call fflush(stderr) when we have a stderr in the foreground, but we ended up always calling fflush(stderr) seemingly by omission. Let's not. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/progress.c b/progress.c index 7fcc513717a262..1fade5808de3fe 100644 --- a/progress.c +++ b/progress.c @@ -91,7 +91,8 @@ static void display(struct progress *progress, uint64_t n, const char *done) } if (show_update) { - if (is_foreground_fd(fileno(stderr)) || done) { + int stderr_is_foreground_fd = is_foreground_fd(fileno(stderr)); + if (stderr_is_foreground_fd || done) { const char *eol = done ? done : "\r"; size_t clear_len = counters_sb->len < last_count_len ? last_count_len - counters_sb->len + 1 : @@ -115,7 +116,8 @@ static void display(struct progress *progress, uint64_t n, const char *done) fprintf(stderr, "%s: %s%*s", progress->title, counters_sb->buf, (int) clear_len, eol); } - fflush(stderr); + if (stderr_is_foreground_fd) + fflush(stderr); } progress_update = 0; } From 95b87268f6c8fc8c20bcf3940240a6a8ea9a9ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:04 +0200 Subject: [PATCH 071/198] progress.c: add temporary variable from progress struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a temporary "progress" variable for the dereferenced p_progress pointer to a "struct progress *". Before 98a13647408 (trace2: log progress time and throughput, 2020-05-12) we didn't dereference "p_progress" in this function, now that we do it's easier to read the code if we work with a "progress" struct pointer like everywhere else, instead of a pointer to a pointer. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/progress.c b/progress.c index 1fade5808de3fe..1ab7d19deb8cc7 100644 --- a/progress.c +++ b/progress.c @@ -331,15 +331,16 @@ void stop_progress(struct progress **p_progress) finish_if_sparse(*p_progress); if (*p_progress) { + struct progress *progress = *p_progress; trace2_data_intmax("progress", the_repository, "total_objects", (*p_progress)->total); if ((*p_progress)->throughput) trace2_data_intmax("progress", the_repository, "total_bytes", - (*p_progress)->throughput->curr_total); + progress->throughput->curr_total); - trace2_region_leave("progress", (*p_progress)->title, the_repository); + trace2_region_leave("progress", progress->title, the_repository); } stop_progress_msg(p_progress, _("done")); From 92565d0f5b71de79bfb6624866f70f8f7f277e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:05 +0200 Subject: [PATCH 072/198] pack-bitmap-write.c: add a missing stop_progress() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug that's been here since 7cc8f971085 (pack-objects: implement bitmap writing, 2013-12-21), we did not call stop_progress() if we reached the early exit in this function. This will matter in a subsequent commit where we BUG(...) out if this happens, and matters now e.g. because we don't have a corresponding "region_end" for the progress trace2 event. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap-write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 88d9e696a546a8..6e110e41ea4da3 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -550,6 +550,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) push_bitmapped_commit(indexed_commits[i]); + stop_progress(&writer.progress); return; } From 85996a71a468341ab6b8c10c4542894b23316ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 22 Jul 2021 14:55:06 +0200 Subject: [PATCH 073/198] progress.c: add & assert a "global_progress" variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The progress.c code makes a hard assumption that only one progress bar be active at a time (see [1] for a bug where this wasn't the case), but nothing has asserted that that's the case. Let's add a BUG() that'll trigger if two progress bars are active at the same time. There's an alternate test-only approach to doing the same thing[2], but by doing this for all progress bars we'll have a canary to check if we have any unexpected interaction between the "sig_atomic_t progress_update" variable and this global struct. I am then planning on using this scaffolding in the future to fix a limitation in the progress output, namely the current limitation of the progress.c bar code that any update must pro-actively go through the likes of display_progress(). If we e.g. hang forever before the first display_progress(), or in the middle of a loop that would call display_progress() the user will only see either no output, or output frozen at the last display_progress() that would have done an update (e.g. in cases where progress_update was "1" due to an earlier signal). This change does not fix that, but sets up the structure for solving that and other related problems by juggling this "global_progress" struct. Later changes will make more use of the "global_progress" than only using it for these assertions. 1. 6f9d5f2fda1 (commit-graph: fix progress of reachable commits, 2020-07-09) 2. https://lore.kernel.org/git/20210620200303.2328957-3-szeder.dev@gmail.com Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 17 +++++++++++++---- t/t0500-progress-display.sh | 11 +++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/progress.c b/progress.c index 1ab7d19deb8cc7..14a023f4b433d8 100644 --- a/progress.c +++ b/progress.c @@ -46,6 +46,7 @@ struct progress { }; static volatile sig_atomic_t progress_update; +static struct progress *global_progress; /* * These are only intended for testing the progress output, i.e. exclusively @@ -221,11 +222,15 @@ void progress_test_force_update(void) progress_interval(SIGALRM); } -static void set_progress_signal(void) +static void set_progress_signal(struct progress *progress) { struct sigaction sa; struct itimerval v; + if (global_progress) + BUG("should have no global_progress in set_progress_signal()"); + global_progress = progress; + if (progress_testing) return; @@ -243,10 +248,14 @@ static void set_progress_signal(void) setitimer(ITIMER_REAL, &v, NULL); } -static void clear_progress_signal(void) +static void clear_progress_signal(struct progress *progress) { struct itimerval v = {{0,},}; + if (!global_progress) + BUG("should have a global_progress in clear_progress_signal()"); + global_progress = NULL; + if (progress_testing) return; @@ -270,7 +279,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, strbuf_init(&progress->counters_sb, 0); progress->title_len = utf8_strwidth(title); progress->split = 0; - set_progress_signal(); + set_progress_signal(progress); trace2_region_enter("progress", title, the_repository); return progress; } @@ -374,7 +383,7 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) display(progress, progress->last_value, buf); free(buf); } - clear_progress_signal(); + clear_progress_signal(progress); strbuf_release(&progress->counters_sb); if (progress->throughput) strbuf_release(&progress->throughput->display); diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index ffa819ca1db95a..124d33c96b3f01 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -296,6 +296,17 @@ test_expect_success 'cover up after throughput shortens a lot' ' test_cmp expect out ' +test_expect_success 'BUG: start two concurrent progress bars' ' + cat >in <<-\EOF && + start 0 one + start 0 two + EOF + + test_must_fail test-tool progress \ + <in 2>stderr && + grep -E "^BUG: .*: should have no global_progress in set_progress_signal\(\)$" stderr +' + test_expect_success 'progress generates traces' ' cat >in <<-\EOF && start 40 From e6edfe9778b8767e6627d68b12283472dfc31f75 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:24 -0400 Subject: [PATCH 074/198] pack-bitmap.c: harden 'test_bitmap_walk()' to check type bitmaps The special `--test-bitmap` mode of `git rev-list` is used to compare the result of an object traversal with a bitmap to check its integrity. This mode does not, however, assert that the types of reachable objects are stored correctly. Harden this mode by teaching it to also check that each time an object's bit is marked, the corresponding bit should be set in exactly one of the type bitmaps (whose type matches the object's true type). Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index d90e1d9d8cb526..a1425f30541e0a 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1301,10 +1301,52 @@ void count_bitmap_commit_list(struct bitmap_index *bitmap_git, struct bitmap_test_data { struct bitmap_index *bitmap_git; struct bitmap *base; + struct bitmap *commits; + struct bitmap *trees; + struct bitmap *blobs; + struct bitmap *tags; struct progress *prg; size_t seen; }; +static void test_bitmap_type(struct bitmap_test_data *tdata, + struct object *obj, int pos) +{ + enum object_type bitmap_type = OBJ_NONE; + int bitmaps_nr = 0; + + if (bitmap_get(tdata->commits, pos)) { + bitmap_type = OBJ_COMMIT; + bitmaps_nr++; + } + if (bitmap_get(tdata->trees, pos)) { + bitmap_type = OBJ_TREE; + bitmaps_nr++; + } + if (bitmap_get(tdata->blobs, pos)) { + bitmap_type = OBJ_BLOB; + bitmaps_nr++; + } + if (bitmap_get(tdata->tags, pos)) { + bitmap_type = OBJ_TAG; + bitmaps_nr++; + } + + if (bitmap_type == OBJ_NONE) + die("object %s not found in type bitmaps", + oid_to_hex(&obj->oid)); + + if (bitmaps_nr > 1) + die("object %s does not have a unique type", + oid_to_hex(&obj->oid)); + + if (bitmap_type != obj->type) + die("object %s: real type %s, expected: %s", + oid_to_hex(&obj->oid), + type_name(obj->type), + type_name(bitmap_type)); +} + static void test_show_object(struct object *object, const char *name, void *data) { @@ -1314,6 +1356,7 @@ static void test_show_object(struct object *object, const char *name, bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&object->oid)); + test_bitmap_type(tdata, object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); display_progress(tdata->prg, ++tdata->seen); @@ -1328,6 +1371,7 @@ static void test_show_commit(struct commit *commit, void *data) &commit->object.oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid)); + test_bitmap_type(tdata, &commit->object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); display_progress(tdata->prg, ++tdata->seen); @@ -1375,6 +1419,10 @@ void test_bitmap_walk(struct rev_info *revs) tdata.bitmap_git = bitmap_git; tdata.base = bitmap_new(); + tdata.commits = ewah_to_bitmap(bitmap_git->commits); + tdata.trees = ewah_to_bitmap(bitmap_git->trees); + tdata.blobs = ewah_to_bitmap(bitmap_git->blobs); + tdata.tags = ewah_to_bitmap(bitmap_git->tags); tdata.prg = start_progress("Verifying bitmap entries", result_popcnt); tdata.seen = 0; From 8abf388fb56f28494951d907eb383bb7a1df35cc Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:27 -0400 Subject: [PATCH 075/198] pack-bitmap-write.c: gracefully fail to write non-closed bitmaps The set of objects covered by a bitmap must be closed under reachability, since it must be the case that there is a valid bit position assigned for every possible reachable object (otherwise the bitmaps would be incomplete). Pack bitmaps are never written from 'git repack' unless repacking all-into-one, and so we never write non-closed bitmaps (except in the case of partial clones where we aren't guaranteed to have all objects). But multi-pack bitmaps change this, since it isn't known whether the set of objects in the MIDX is closed under reachability until walking them. Plumb through a bit that is set when a reachable object isn't found. As soon as a reachable object isn't found in the set of objects to include in the bitmap, bitmap_writer_build() knows that the set is not closed, and so it now fails gracefully. A test is added in t0410 to trigger a bitmap write without full reachability closure by removing local copies of some reachable objects from a promisor remote. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/pack-objects.c | 3 +- pack-bitmap-write.c | 76 ++++++++++++++++++++++++++++------------ pack-bitmap.h | 2 +- t/t0410-partial-clone.sh | 9 ++++- 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de00adbb9e0d51..8a523624a16eb2 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1256,7 +1256,8 @@ static void write_pack_file(void) bitmap_writer_show_progress(progress); bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); - bitmap_writer_build(&to_pack); + if (bitmap_writer_build(&to_pack) < 0) + die(_("failed to write bitmap index")); bitmap_writer_finish(written_list, nr_written, tmpname.buf, write_bitmap_options); write_bitmap_index = 0; diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 88d9e696a546a8..d374f7884b4114 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -125,15 +125,20 @@ static inline void push_bitmapped_commit(struct commit *commit) writer.selected_nr++; } -static uint32_t find_object_pos(const struct object_id *oid) +static uint32_t find_object_pos(const struct object_id *oid, int *found) { struct object_entry *entry = packlist_find(writer.to_pack, oid); if (!entry) { - die("Failed to write bitmap index. Packfile doesn't have full closure " + if (found) + *found = 0; + warning("Failed to write bitmap index. Packfile doesn't have full closure " "(object %s is missing)", oid_to_hex(oid)); + return 0; } + if (found) + *found = 1; return oe_in_pack_pos(writer.to_pack, entry); } @@ -331,9 +336,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb) bb->commits_nr = bb->commits_alloc = 0; } -static void fill_bitmap_tree(struct bitmap *bitmap, - struct tree *tree) +static int fill_bitmap_tree(struct bitmap *bitmap, + struct tree *tree) { + int found; uint32_t pos; struct tree_desc desc; struct name_entry entry; @@ -342,9 +348,11 @@ static void fill_bitmap_tree(struct bitmap *bitmap, * If our bit is already set, then there is nothing to do. Both this * tree and all of its children will be set. */ - pos = find_object_pos(&tree->object.oid); + pos = find_object_pos(&tree->object.oid, &found); + if (!found) + return -1; if (bitmap_get(bitmap, pos)) - return; + return 0; bitmap_set(bitmap, pos); if (parse_tree(tree) < 0) @@ -355,11 +363,15 @@ static void fill_bitmap_tree(struct bitmap *bitmap, while (tree_entry(&desc, &entry)) { switch (object_type(entry.mode)) { case OBJ_TREE: - fill_bitmap_tree(bitmap, - lookup_tree(the_repository, &entry.oid)); + if (fill_bitmap_tree(bitmap, + lookup_tree(the_repository, &entry.oid)) < 0) + return -1; break; case OBJ_BLOB: - bitmap_set(bitmap, find_object_pos(&entry.oid)); + pos = find_object_pos(&entry.oid, &found); + if (!found) + return -1; + bitmap_set(bitmap, pos); break; default: /* Gitlink, etc; not reachable */ @@ -368,15 +380,18 @@ static void fill_bitmap_tree(struct bitmap *bitmap, } free_tree_buffer(tree); + return 0; } -static void fill_bitmap_commit(struct bb_commit *ent, - struct commit *commit, - struct prio_queue *queue, - struct prio_queue *tree_queue, - struct bitmap_index *old_bitmap, - const uint32_t *mapping) +static int fill_bitmap_commit(struct bb_commit *ent, + struct commit *commit, + struct prio_queue *queue, + struct prio_queue *tree_queue, + struct bitmap_index *old_bitmap, + const uint32_t *mapping) { + int found; + uint32_t pos; if (!ent->bitmap) ent->bitmap = bitmap_new(); @@ -401,11 +416,16 @@ static void fill_bitmap_commit(struct bb_commit *ent, * Mark ourselves and queue our tree. The commit * walk ensures we cover all parents. */ - bitmap_set(ent->bitmap, find_object_pos(&c->object.oid)); + pos = find_object_pos(&c->object.oid, &found); + if (!found) + return -1; + bitmap_set(ent->bitmap, pos); prio_queue_put(tree_queue, get_commit_tree(c)); for (p = c->parents; p; p = p->next) { - int pos = find_object_pos(&p->item->object.oid); + pos = find_object_pos(&p->item->object.oid, &found); + if (!found) + return -1; if (!bitmap_get(ent->bitmap, pos)) { bitmap_set(ent->bitmap, pos); prio_queue_put(queue, p->item); @@ -413,8 +433,12 @@ static void fill_bitmap_commit(struct bb_commit *ent, } } - while (tree_queue->nr) - fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue)); + while (tree_queue->nr) { + if (fill_bitmap_tree(ent->bitmap, + prio_queue_get(tree_queue)) < 0) + return -1; + } + return 0; } static void store_selected(struct bb_commit *ent, struct commit *commit) @@ -432,7 +456,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit) kh_value(writer.bitmaps, hash_pos) = stored; } -void bitmap_writer_build(struct packing_data *to_pack) +int bitmap_writer_build(struct packing_data *to_pack) { struct bitmap_builder bb; size_t i; @@ -441,6 +465,7 @@ void bitmap_writer_build(struct packing_data *to_pack) struct prio_queue tree_queue = { NULL }; struct bitmap_index *old_bitmap; uint32_t *mapping; + int closed = 1; /* until proven otherwise */ writer.bitmaps = kh_init_oid_map(); writer.to_pack = to_pack; @@ -463,8 +488,11 @@ void bitmap_writer_build(struct packing_data *to_pack) struct commit *child; int reused = 0; - fill_bitmap_commit(ent, commit, &queue, &tree_queue, - old_bitmap, mapping); + if (fill_bitmap_commit(ent, commit, &queue, &tree_queue, + old_bitmap, mapping) < 0) { + closed = 0; + break; + } if (ent->selected) { store_selected(ent, commit); @@ -499,7 +527,9 @@ void bitmap_writer_build(struct packing_data *to_pack) stop_progress(&writer.progress); - compute_xor_offsets(); + if (closed) + compute_xor_offsets(); + return closed ? 0 : -1; } /** diff --git a/pack-bitmap.h b/pack-bitmap.h index 99d733eb264e9f..020cd8d868f9d3 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -87,7 +87,7 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit); void bitmap_writer_select_commits(struct commit **indexed_commits, unsigned int indexed_commits_nr, int max_bitmaps); -void bitmap_writer_build(struct packing_data *to_pack); +int bitmap_writer_build(struct packing_data *to_pack); void bitmap_writer_finish(struct pack_idx_entry **index, uint32_t index_nr, const char *filename, diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 584a039b851dbb..1667450917f3c9 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -536,7 +536,13 @@ test_expect_success 'gc does not repack promisor objects if there are none' ' repack_and_check () { rm -rf repo2 && cp -r repo repo2 && - git -C repo2 repack $1 -d && + if test x"$1" = "x--must-fail" + then + shift + test_must_fail git -C repo2 repack $1 -d + else + git -C repo2 repack $1 -d + fi && git -C repo2 fsck && git -C repo2 cat-file -e $2 && @@ -561,6 +567,7 @@ test_expect_success 'repack -d does not irreversibly delete promisor objects' ' printf "$THREE\n" | pack_as_from_promisor && delete_object repo "$ONE" && + repack_and_check --must-fail -ab "$TWO" "$THREE" && repack_and_check -a "$TWO" "$THREE" && repack_and_check -A "$TWO" "$THREE" && repack_and_check -l "$TWO" "$THREE" From 96a4c82c75ea42b97fa31a0197b83f3c9a4604d3 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:29 -0400 Subject: [PATCH 076/198] pack-bitmap-write.c: free existing bitmaps When writing a new bitmap, the bitmap writer code attempts to read the existing bitmap (if one is present). This is done in order to quickly permute the bits of any bitmaps for commits which appear in the existing bitmap, and were also selected for the new bitmap. But since this code was added in 341fa34887 (pack-bitmap-write: use existing bitmaps, 2020-12-08), the resources associated with opening an existing bitmap were never released. It's fine to ignore this, but it's bad hygiene. It will also cause a problem for the multi-pack-index builtin, which will be responsible not only for writing bitmaps, but also for expiring any old multi-pack bitmaps. If an existing bitmap was reused here, it will also be expired. That will cause a problem on platforms which require file resources to be closed before unlinking them, like Windows. Avoid this by ensuring we close reused bitmaps with free_bitmap_index() before removing them. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap-write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index d374f7884b4114..142fd0adb86e6f 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -520,6 +520,7 @@ int bitmap_writer_build(struct packing_data *to_pack) clear_prio_queue(&queue); clear_prio_queue(&tree_queue); bitmap_builder_clear(&bb); + free_bitmap_index(old_bitmap); free(mapping); trace2_region_leave("pack-bitmap-write", "building_bitmaps_total", From a6e6feff88e4ddade96410ea7f59a7624b2a6981 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:32 -0400 Subject: [PATCH 077/198] Documentation: describe MIDX-based bitmaps Update the technical documentation to describe the multi-pack bitmap format. This patch merely introduces the new format, and describes its high-level ideas. Git does not yet know how to read nor write these multi-pack variants, and so the subsequent patches will: - Introduce code to interpret multi-pack bitmaps, according to this document. - Then, introduce code to write multi-pack bitmaps from the 'git multi-pack-index write' sub-command. Finally, the implementation will gain tests in subsequent patches (as opposed to inline with the patch teaching Git how to write multi-pack bitmaps) to avoid a cyclic dependency. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/technical/bitmap-format.txt | 71 ++++++++++++++++---- Documentation/technical/multi-pack-index.txt | 10 +-- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt index f8c18a0f7aec2b..04b3ec2178574a 100644 --- a/Documentation/technical/bitmap-format.txt +++ b/Documentation/technical/bitmap-format.txt @@ -1,6 +1,44 @@ GIT bitmap v1 format ==================== +== Pack and multi-pack bitmaps + +Bitmaps store reachability information about the set of objects in a packfile, +or a multi-pack index (MIDX). The former is defined obviously, and the latter is +defined as the union of objects in packs contained in the MIDX. + +A bitmap may belong to either one pack, or the repository's multi-pack index (if +it exists). A repository may have at most one bitmap. + +An object is uniquely described by its bit position within a bitmap: + + - If the bitmap belongs to a packfile, the __n__th bit corresponds to + the __n__th object in pack order. For a function `offset` which maps + objects to their byte offset within a pack, pack order is defined as + follows: + + o1 <= o2 <==> offset(o1) <= offset(o2) + + - If the bitmap belongs to a MIDX, the __n__th bit corresponds to the + __n__th object in MIDX order. With an additional function `pack` which + maps objects to the pack they were selected from by the MIDX, MIDX order + is defined as follows: + + o1 <= o2 <==> pack(o1) <= pack(o2) /\ offset(o1) <= offset(o2) + + The ordering between packs is done according to the MIDX's .rev file. + Notably, the preferred pack sorts ahead of all other packs. + +The on-disk representation (described below) of a bitmap is the same regardless +of whether or not that bitmap belongs to a packfile or a MIDX. The only +difference is the interpretation of the bits, which is described above. + +Certain bitmap extensions are supported (see: Appendix B). No extensions are +required for bitmaps corresponding to packfiles. For bitmaps that correspond to +MIDXs, both the bit-cache and rev-cache extensions are required. + +== On-disk format + - A header appears at the beginning: 4-byte signature: {'B', 'I', 'T', 'M'} @@ -14,17 +52,19 @@ GIT bitmap v1 format The following flags are supported: - BITMAP_OPT_FULL_DAG (0x1) REQUIRED - This flag must always be present. It implies that the bitmap - index has been generated for a packfile with full closure - (i.e. where every single object in the packfile can find - its parent links inside the same packfile). This is a - requirement for the bitmap index format, also present in JGit, - that greatly reduces the complexity of the implementation. + This flag must always be present. It implies that the + bitmap index has been generated for a packfile or + multi-pack index (MIDX) with full closure (i.e. where + every single object in the packfile/MIDX can find its + parent links inside the same packfile/MIDX). This is a + requirement for the bitmap index format, also present in + JGit, that greatly reduces the complexity of the + implementation. - BITMAP_OPT_HASH_CACHE (0x4) If present, the end of the bitmap file contains `N` 32-bit name-hash values, one per object in the - pack. The format and meaning of the name-hash is + pack/MIDX. The format and meaning of the name-hash is described below. 4-byte entry count (network byte order) @@ -33,7 +73,8 @@ GIT bitmap v1 format 20-byte checksum - The SHA1 checksum of the pack this bitmap index belongs to. + The SHA1 checksum of the pack/MIDX this bitmap index + belongs to. - 4 EWAH bitmaps that act as type indexes @@ -50,7 +91,7 @@ GIT bitmap v1 format - Tags In each bitmap, the `n`th bit is set to true if the `n`th object - in the packfile is of that type. + in the packfile or multi-pack index is of that type. The obvious consequence is that the OR of all 4 bitmaps will result in a full set (all bits set), and the AND of all 4 bitmaps will @@ -62,8 +103,9 @@ GIT bitmap v1 format Each entry contains the following: - 4-byte object position (network byte order) - The position **in the index for the packfile** where the - bitmap for this commit is found. + The position **in the index for the packfile or + multi-pack index** where the bitmap for this commit is + found. - 1-byte XOR-offset The xor offset used to compress this bitmap. For an entry @@ -146,10 +188,11 @@ Name-hash cache --------------- If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains -a cache of 32-bit values, one per object in the pack. The value at +a cache of 32-bit values, one per object in the pack/MIDX. The value at position `i` is the hash of the pathname at which the `i`th object -(counting in index order) in the pack can be found. This can be fed -into the delta heuristics to compare objects with similar pathnames. +(counting in index or multi-pack index order) in the pack/MIDX can be found. +This can be fed into the delta heuristics to compare objects with similar +pathnames. The hash algorithm used is: diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt index fb688976c4c033..1a73c3ee203ea0 100644 --- a/Documentation/technical/multi-pack-index.txt +++ b/Documentation/technical/multi-pack-index.txt @@ -71,14 +71,10 @@ Future Work still reducing the number of binary searches required for object lookups. -- The reachability bitmap is currently paired directly with a single - packfile, using the pack-order as the object order to hopefully - compress the bitmaps well using run-length encoding. This could be - extended to pair a reachability bitmap with a multi-pack-index. If - the multi-pack-index is extended to store a "stable object order" +- If the multi-pack-index is extended to store a "stable object order" (a function Order(hash) = integer that is constant for a given hash, - even as the multi-pack-index is updated) then a reachability bitmap - could point to a multi-pack-index and be updated independently. + even as the multi-pack-index is updated) then MIDX bitmaps could be + updated independently of the MIDX. - Packfiles can be marked as "special" using empty files that share the initial name but replace ".pack" with ".keep" or ".promisor". From e523ab28f71c1aea46a0972e16e79b7dbd851491 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:35 -0400 Subject: [PATCH 078/198] midx: clear auxiliary .rev after replacing the MIDX When writing a new multi-pack index, write_midx_internal() attempts to clean up any auxiliary files (currently just the MIDX's `.rev` file, but soon to include a `.bitmap`, too) corresponding to the MIDX it's replacing. This step should happen after the new MIDX is written into place, since doing so beforehand means that the old MIDX could be read without its corresponding .rev file. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 21d6a05e887283..3759263a22f86d 100644 --- a/midx.c +++ b/midx.c @@ -1076,10 +1076,11 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * if (flags & MIDX_WRITE_REV_INDEX) write_midx_reverse_index(midx_name, midx_hash, &ctx); - clear_midx_files_ext(the_repository, ".rev", midx_hash); commit_lock_file(&lk); + clear_midx_files_ext(the_repository, ".rev", midx_hash); + cleanup: for (i = 0; i < ctx.nr; i++) { if (ctx.info[i].p) { From 85fec428eaaf050537e7657a91a27da77d6e2af0 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:38 -0400 Subject: [PATCH 079/198] midx: reject empty `--preferred-pack`'s The soon-to-be-implemented multi-pack bitmap treats object in the first bit position specially by assuming that all objects in the pack it was selected from are also represented from that pack in the MIDX. In other words, the pack from which the first object was selected must also have all of its other objects selected from that same pack in the MIDX in case of any duplicates. But this assumption relies on the fact that there is at least one object in that pack to begin with; otherwise the object in the first bit position isn't from a preferred pack, in which case we can no longer assume that all objects in that pack were also selected from the same pack. Guard this assumption by checking the number of objects in the given preferred pack, and failing if the given pack is empty. To make sure we can safely perform this check, open any packs which are contained in an existing MIDX via prepare_midx_pack(). The same is done for new packs via the add_pack_to_midx() callback, but packs picked up from a previous MIDX will not yet have these opened. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-multi-pack-index.txt | 6 +++--- midx.c | 29 ++++++++++++++++++++++++++ t/t5319-multi-pack-index.sh | 17 +++++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index ffd601bc17b4dc..c9b063d31e1df8 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -37,9 +37,9 @@ write:: -- --preferred-pack=<pack>:: Optionally specify the tie-breaking pack used when - multiple packs contain the same object. If not given, - ties are broken in favor of the pack with the lowest - mtime. + multiple packs contain the same object. `<pack>` must + contain at least one object. If not given, ties are + broken in favor of the pack with the lowest mtime. -- verify:: diff --git a/midx.c b/midx.c index 3759263a22f86d..774eff028fe46c 100644 --- a/midx.c +++ b/midx.c @@ -924,6 +924,25 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]); ctx.info[ctx.nr].p = NULL; ctx.info[ctx.nr].expired = 0; + + if (flags & MIDX_WRITE_REV_INDEX) { + /* + * If generating a reverse index, need to have + * packed_git's loaded to compare their + * mtimes and object count. + */ + if (prepare_midx_pack(the_repository, ctx.m, i)) { + error(_("could not load pack")); + result = 1; + goto cleanup; + } + + if (open_pack_index(ctx.m->packs[i])) + die(_("could not open index for %s"), + ctx.m->packs[i]->pack_name); + ctx.info[ctx.nr].p = ctx.m->packs[i]; + } + ctx.nr++; } } @@ -951,6 +970,16 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * } } + if (ctx.preferred_pack_idx > -1) { + struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; + if (!preferred->num_objects) { + error(_("cannot select preferred pack %s with no objects"), + preferred->pack_name); + result = 1; + goto cleanup; + } + } + ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr, ctx.preferred_pack_idx); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 5641d158dfc4c6..909f9bf7b8ee79 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -277,6 +277,23 @@ test_expect_success 'midx picks objects from preferred pack' ' ) ' +test_expect_success 'preferred packs must be non-empty' ' + test_when_finished rm -rf preferred.git && + git init preferred.git && + ( + cd preferred.git && + + test_commit base && + git repack -ad && + + empty="$(git pack-objects $objdir/pack/pack </dev/null)" && + + test_must_fail git multi-pack-index write \ + --preferred-pack=pack-$empty.pack 2>err && + grep "with no objects" err + ) +' + test_expect_success 'verify multi-pack-index success' ' git multi-pack-index verify --object-dir=$objdir ' From 60c38a109b815340720c3b85b10c8bea4d54fd5e Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:41 -0400 Subject: [PATCH 080/198] midx: infer preferred pack when not given one In 9218c6a40c (midx: allow marking a pack as preferred, 2021-03-30), the multi-pack index code learned how to select a pack which all duplicate objects are selected from. That is, if an object appears in multiple packs, select the copy in the preferred pack before breaking ties according to the other rules like pack mtime and readdir() order. Not specifying a preferred pack can cause serious problems with multi-pack reachability bitmaps, because these bitmaps rely on having at least one pack from which all duplicates are selected. Not having such a pack causes problems with the code in pack-objects to reuse packs verbatim (e.g., that code assumes that a delta object in a chunk of pack sent verbatim will have its base object sent from the same pack). So why does not marking a pack preferred cause problems here? The reason is roughly as follows: - Ties are broken (when handling duplicate objects) by sorting according to midx_oid_compare(), which sorts objects by OID, preferred-ness, pack mtime, and finally pack ID (more on that later). - The psuedo pack-order (described in Documentation/technical/pack-format.txt under the section "multi-pack-index reverse indexes") is computed by midx_pack_order(), and sorts by pack ID and pack offset, with preferred packs sorting first. - But! Pack IDs come from incrementing the pack count in add_pack_to_midx(), which is a callback to for_each_file_in_pack_dir(), meaning that pack IDs are assigned in readdir() order. When specifying a preferred pack, all of that works fine, because duplicate objects are correctly resolved in favor of the copy in the preferred pack, and the preferred pack sorts first in the object order. "Sorting first" is critical, because the bitmap code relies on finding out which pack holds the first object in the MIDX's pseudo pack-order to determine which pack is preferred. But if we didn't specify a preferred pack, and the pack which comes first in readdir() order does not also have the lowest timestamp, then it's possible that that pack (the one that sorts first in pseudo-pack order, which the bitmap code will treat as the preferred one) did *not* have all duplicate objects resolved in its favor, resulting in breakage. The fix is simple: pick a (semi-arbitrary, non-empty) preferred pack when none was specified. This forces that pack to have duplicates resolved in its favor, and (critically) to sort first in pseudo-pack order. Unfortunately, testing this behavior portably isn't possible, since it depends on readdir() order which isn't guaranteed by POSIX. (Note that multi-pack reachability bitmaps have yet to be implemented; so in that sense this patch is fixing a bug which does not yet exist. But by having this patch beforehand, we can prevent the bug from ever materializing.) Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/midx.c b/midx.c index 774eff028fe46c..112197a2849762 100644 --- a/midx.c +++ b/midx.c @@ -959,15 +959,57 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) goto cleanup; - ctx.preferred_pack_idx = -1; if (preferred_pack_name) { + int found = 0; for (i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; + found = 1; break; } } + + if (!found) + warning(_("unknown preferred pack: '%s'"), + preferred_pack_name); + } else if (ctx.nr && (flags & MIDX_WRITE_REV_INDEX)) { + struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; + ctx.preferred_pack_idx = 0; + + if (packs_to_drop && packs_to_drop->nr) + BUG("cannot write a MIDX bitmap during expiration"); + + /* + * set a preferred pack when writing a bitmap to ensure that + * the pack from which the first object is selected in pseudo + * pack-order has all of its objects selected from that pack + * (and not another pack containing a duplicate) + */ + for (i = 1; i < ctx.nr; i++) { + struct packed_git *p = ctx.info[i].p; + + if (!oldest->num_objects || p->mtime < oldest->mtime) { + oldest = p; + ctx.preferred_pack_idx = i; + } + } + + if (!oldest->num_objects) { + /* + * If all packs are empty; unset the preferred index. + * This is acceptable since there will be no duplicate + * objects to resolve, so the preferred value doesn't + * matter. + */ + ctx.preferred_pack_idx = -1; + } + } else { + /* + * otherwise don't mark any pack as preferred to avoid + * interfering with expiration logic below + */ + ctx.preferred_pack_idx = -1; } if (ctx.preferred_pack_idx > -1) { @@ -1048,11 +1090,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); - - if (!preferred) - warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); - else { + if (preferred) { uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), From f714355889ed5085a19f678eb432c87d2e0e8a46 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:43 -0400 Subject: [PATCH 081/198] midx: close linked MIDXs, avoid leaking memory When a repository has at least one alternate, the MIDX belonging to each alternate is accessed through the `next` pointer on the main object store's copy of the MIDX. close_midx() didn't bother to close any of the linked MIDXs. It likewise didn't free the memory pointed to by `m`, leaving uninitialized bytes with live pointers to them left around in the heap. Clean this up by closing linked MIDXs, and freeing up the memory pointed to by each of them. When callers call close_midx(), then they can discard the entire linked list of MIDXs and set their pointer to the head of that list to NULL. This isn't strictly required for the upcoming patches, but it makes it much more difficult (though still possible, for e.g., by calling `close_midx(m->next)` which leaves `m->next` pointing at uninitialized bytes) to have pointers to uninitialized memory. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/midx.c b/midx.c index 112197a2849762..5007faff1b73d9 100644 --- a/midx.c +++ b/midx.c @@ -195,6 +195,8 @@ void close_midx(struct multi_pack_index *m) if (!m) return; + close_midx(m->next); + munmap((unsigned char *)m->data, m->data_len); for (i = 0; i < m->num_packs; i++) { @@ -203,6 +205,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + free(m); } int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id) From c50613c46d35deba7bce0156e87e6d447fed0c1e Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:46 -0400 Subject: [PATCH 082/198] midx: avoid opening multiple MIDXs when writing Opening multiple instance of the same MIDX can lead to problems like two separate packed_git structures which represent the same pack being added to the repository's object store. The above scenario can happen because prepare_midx_pack() checks if `m->packs[pack_int_id]` is NULL in order to determine if a pack has been opened and installed in the repository before. But a caller can construct two copies of the same MIDX by calling get_multi_pack_index() and load_multi_pack_index() since the former manipulates the object store directly but the latter is a lower-level routine which allocates a new MIDX for each call. So if prepare_midx_pack() is called on multiple MIDXs with the same pack_int_id, then that pack will be installed twice in the object store's packed_git pointer. This can lead to problems in, for e.g., the pack-bitmap code, which does something like the following (in pack-bitmap.c:open_pack_bitmap()): struct bitmap_index *bitmap_git = ...; for (p = get_all_packs(r); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) ret = 0; } which is a problem if two copies of the same pack exist in the packed_git list because pack-bitmap.c:open_pack_bitmap_1() contains a conditional like the following: if (bitmap_git->pack || bitmap_git->midx) { /* ignore extra bitmap file; we can only handle one */ warning("ignoring extra bitmap file: %s", packfile->pack_name); close(fd); return -1; } Avoid this scenario by not letting write_midx_internal() open a MIDX that isn't also pointed at by the object store. So long as this is the case, other routines should prefer to open MIDXs with get_multi_pack_index() or reprepare_packed_git() instead of creating instances on their own. Because get_multi_pack_index() returns `r->object_store->multi_pack_index` if it is non-NULL, we'll only have one instance of a MIDX open at one time, avoiding these problems. To encourage this, drop the `struct multi_pack_index *` parameter from `write_midx_internal()`, and rely instead on the `object_dir` to find (or initialize) the correct MIDX instance. Likewise, replace the call to `close_midx()` with `close_object_store()`, since we're about to replace the MIDX with a new one and should invalidate the object store's memory of any MIDX that might have existed beforehand. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/midx.c b/midx.c index 5007faff1b73d9..9893795e298bb1 100644 --- a/midx.c +++ b/midx.c @@ -888,7 +888,7 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, static void clear_midx_files_ext(struct repository *r, const char *ext, unsigned char *keep_hash); -static int write_midx_internal(const char *object_dir, struct multi_pack_index *m, +static int write_midx_internal(const char *object_dir, struct string_list *packs_to_drop, const char *preferred_pack_name, unsigned flags) @@ -899,6 +899,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * struct hashfile *f = NULL; struct lock_file lk; struct write_midx_context ctx = { 0 }; + struct multi_pack_index *cur; int pack_name_concat_len = 0; int dropped_packs = 0; int result = 0; @@ -909,10 +910,14 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * die_errno(_("unable to create leading directories of %s"), midx_name); - if (m) - ctx.m = m; - else - ctx.m = load_multi_pack_index(object_dir, 1); + for (cur = get_multi_pack_index(the_repository); cur; cur = cur->next) { + if (!strcmp(object_dir, cur->object_dir)) { + ctx.m = cur; + break; + } + } + if (!ctx.m) + ctx.m = get_local_multi_pack_index(the_repository); ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs : 16; @@ -1172,8 +1177,7 @@ int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags) { - return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name, - flags); + return write_midx_internal(object_dir, NULL, preferred_pack_name, flags); } struct clear_midx_data { @@ -1447,8 +1451,10 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla free(count); - if (packs_to_drop.nr) - result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags); + if (packs_to_drop.nr) { + result = write_midx_internal(object_dir, &packs_to_drop, NULL, flags); + m = NULL; + } string_list_clear(&packs_to_drop, 0); return result; @@ -1637,7 +1643,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, goto cleanup; } - result = write_midx_internal(object_dir, m, NULL, NULL, flags); + result = write_midx_internal(object_dir, NULL, NULL, flags); m = NULL; cleanup: From 8544b76cf029c3b032d70ca6e9406faf689685fd Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:49 -0400 Subject: [PATCH 083/198] pack-bitmap.c: introduce 'bitmap_num_objects()' A subsequent patch to support reading MIDX bitmaps will be less noisy after extracting a generic function to return how many objects are contained in a bitmap. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index a1425f30541e0a..1eee7f7fbaa080 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -136,6 +136,11 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) return b; } +static uint32_t bitmap_num_objects(struct bitmap_index *index) +{ + return index->pack->num_objects; +} + static int load_bitmap_header(struct bitmap_index *index) { struct bitmap_disk_header *header = (void *)index->map; @@ -154,7 +159,7 @@ static int load_bitmap_header(struct bitmap_index *index) /* Parse known bitmap format options */ { uint32_t flags = ntohs(header->options); - size_t cache_size = st_mult(index->pack->num_objects, sizeof(uint32_t)); + size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t)); unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz; if ((flags & BITMAP_OPT_FULL_DAG) == 0) @@ -399,7 +404,7 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, if (pos < kh_end(positions)) { int bitmap_pos = kh_value(positions, pos); - return bitmap_pos + bitmap_git->pack->num_objects; + return bitmap_pos + bitmap_num_objects(bitmap_git); } return -1; @@ -451,7 +456,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, bitmap_pos = kh_value(eindex->positions, hash_pos); } - return bitmap_pos + bitmap_git->pack->num_objects; + return bitmap_pos + bitmap_num_objects(bitmap_git); } struct bitmap_show_data { @@ -650,7 +655,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { struct object *obj; - if (!bitmap_get(objects, bitmap_git->pack->num_objects + i)) + if (!bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) continue; obj = eindex->objects[i]; @@ -808,7 +813,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, * individually. */ for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_git->pack->num_objects; + uint32_t pos = i + bitmap_num_objects(bitmap_git); if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) @@ -835,7 +840,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, oi.sizep = &size; - if (pos < pack->num_objects) { + if (pos < bitmap_num_objects(bitmap_git)) { off_t ofs = pack_pos_to_offset(pack, pos); if (packed_object_info(the_repository, pack, ofs, &oi) < 0) { struct object_id oid; @@ -845,7 +850,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, } } else { struct eindex *eindex = &bitmap_git->ext_index; - struct object *obj = eindex->objects[pos - pack->num_objects]; + struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)]; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); } @@ -887,7 +892,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, } for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_git->pack->num_objects; + uint32_t pos = i + bitmap_num_objects(bitmap_git); if (eindex->objects[i]->type == OBJ_BLOB && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos) && @@ -1113,8 +1118,8 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, enum object_type type; unsigned long size; - if (pos >= bitmap_git->pack->num_objects) - return; /* not actually in the pack */ + if (pos >= bitmap_num_objects(bitmap_git)) + return; /* not actually in the pack or MIDX */ offset = header = pack_pos_to_offset(bitmap_git->pack, pos); type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); @@ -1180,6 +1185,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct pack_window *w_curs = NULL; size_t i = 0; uint32_t offset; + uint32_t objects_nr = bitmap_num_objects(bitmap_git); assert(result); @@ -1187,8 +1193,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, i++; /* Don't mark objects not in the packfile */ - if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD) - i = bitmap_git->pack->num_objects / BITS_IN_EWORD; + if (i > objects_nr / BITS_IN_EWORD) + i = objects_nr / BITS_IN_EWORD; reuse = bitmap_word_alloc(i); memset(reuse->words, 0xFF, i * sizeof(eword_t)); @@ -1272,7 +1278,7 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { if (eindex->objects[i]->type == type && - bitmap_get(objects, bitmap_git->pack->num_objects + i)) + bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) count++; } @@ -1493,7 +1499,7 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, uint32_t i, num_objects; uint32_t *reposition; - num_objects = bitmap_git->pack->num_objects; + num_objects = bitmap_num_objects(bitmap_git); CALLOC_ARRAY(reposition, num_objects); for (i = 0; i < num_objects; ++i) { @@ -1576,7 +1582,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) { struct bitmap *result = bitmap_git->result; - struct packed_git *pack = bitmap_git->pack; struct eindex *eindex = &bitmap_git->ext_index; off_t total = 0; struct object_info oi = OBJECT_INFO_INIT; @@ -1588,7 +1593,7 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) for (i = 0; i < eindex->count; i++) { struct object *obj = eindex->objects[i]; - if (!bitmap_get(result, pack->num_objects + i)) + if (!bitmap_get(result, bitmap_num_objects(bitmap_git) + i)) continue; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) From e765bea088db73f830197ecf15f246e7e208395d Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:51 -0400 Subject: [PATCH 084/198] pack-bitmap.c: introduce 'nth_bitmap_object_oid()' A subsequent patch to support reading MIDX bitmaps will be less noisy after extracting a generic function to fetch the nth OID contained in the bitmap. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 1eee7f7fbaa080..dd292e669aa553 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -223,6 +223,13 @@ static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos) #define MAX_XOR_OFFSET 160 +static int nth_bitmap_object_oid(struct bitmap_index *index, + struct object_id *oid, + uint32_t n) +{ + return nth_packed_object_id(oid, index->pack, n); +} + static int load_bitmap_entries_v1(struct bitmap_index *index) { uint32_t i; @@ -242,7 +249,7 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) xor_offset = read_u8(index->map, &index->map_pos); flags = read_u8(index->map, &index->map_pos); - if (nth_packed_object_id(&oid, index->pack, commit_idx_pos) < 0) + if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0) return error("corrupt ewah bitmap: commit index %u out of range", (unsigned)commit_idx_pos); @@ -844,8 +851,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, off_t ofs = pack_pos_to_offset(pack, pos); if (packed_object_info(the_repository, pack, ofs, &oi) < 0) { struct object_id oid; - nth_packed_object_id(&oid, pack, - pack_pos_to_index(pack, pos)); + nth_bitmap_object_oid(bitmap_git, &oid, + pack_pos_to_index(pack, pos)); die(_("unable to get size of %s"), oid_to_hex(&oid)); } } else { From 0c24be7bc7ad2b6af6fe624cf907189fbcac85d3 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:54 -0400 Subject: [PATCH 085/198] pack-bitmap.c: introduce 'bitmap_is_preferred_refname()' In a recent commit, pack-objects learned support for the 'pack.preferBitmapTips' configuration. This patch prepares the multi-pack bitmap code to respect this configuration, too. The yet-to-be implemented code will find that it is more efficient to check whether each reference contains a prefix found in the configured set of values rather than doing an additional traversal. Implement a function 'bitmap_is_preferred_refname()' which will perform that check. Its caller will be added in a subsequent patch. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap.c | 16 ++++++++++++++++ pack-bitmap.h | 1 + 2 files changed, 17 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index dd292e669aa553..2b11cf944c4dd6 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1634,3 +1634,19 @@ const struct string_list *bitmap_preferred_tips(struct repository *r) { return repo_config_get_value_multi(r, "pack.preferbitmaptips"); } + +int bitmap_is_preferred_refname(struct repository *r, const char *refname) +{ + const struct string_list *preferred_tips = bitmap_preferred_tips(r); + struct string_list_item *item; + + if (!preferred_tips) + return 0; + + for_each_string_list_item(item, preferred_tips) { + if (starts_with(refname, item->string)) + return 1; + } + + return 0; +} diff --git a/pack-bitmap.h b/pack-bitmap.h index 020cd8d868f9d3..52ea10de5117ab 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -94,5 +94,6 @@ void bitmap_writer_finish(struct pack_idx_entry **index, uint16_t options); const struct string_list *bitmap_preferred_tips(struct repository *r); +int bitmap_is_preferred_refname(struct repository *r, const char *refname); #endif From bfa5c3ba868f31512f04136505f2208bfef4d171 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:56 -0400 Subject: [PATCH 086/198] pack-bitmap.c: avoid redundant calls to try_partial_reuse try_partial_reuse() is used to mark any bits in the beginning of a bitmap whose objects can be reused verbatim from the pack they came from. Currently this function returns void, and signals nothing to the caller when bits could not be reused. But multi-pack bitmaps would benefit from having such a signal, because they may try to pass objects which are in bounds, but from a pack other than the preferred one. Any extra calls are noops because of a conditional in reuse_partial_packfile_from_bitmap(), but those loop iterations can be avoided by letting try_partial_reuse() indicate when it can't accept any more bits for reuse, and then listening to that signal. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 2b11cf944c4dd6..20fa19fba132c8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1116,22 +1116,26 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, return NULL; } -static void try_partial_reuse(struct bitmap_index *bitmap_git, - size_t pos, - struct bitmap *reuse, - struct pack_window **w_curs) +/* + * -1 means "stop trying further objects"; 0 means we may or may not have + * reused, but you can keep feeding bits. + */ +static int try_partial_reuse(struct bitmap_index *bitmap_git, + size_t pos, + struct bitmap *reuse, + struct pack_window **w_curs) { off_t offset, header; enum object_type type; unsigned long size; if (pos >= bitmap_num_objects(bitmap_git)) - return; /* not actually in the pack or MIDX */ + return -1; /* not actually in the pack or MIDX */ offset = header = pack_pos_to_offset(bitmap_git->pack, pos); type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); if (type < 0) - return; /* broken packfile, punt */ + return -1; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; @@ -1148,9 +1152,9 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, base_offset = get_delta_base(bitmap_git->pack, w_curs, &offset, type, header); if (!base_offset) - return; + return 0; if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0) - return; + return 0; /* * We assume delta dependencies always point backwards. This @@ -1162,7 +1166,7 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * odd parameters. */ if (base_pos >= pos) - return; + return 0; /* * And finally, if we're not sending the base as part of our @@ -1173,13 +1177,14 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * object_entry code path handle it. */ if (!bitmap_get(reuse, base_pos)) - return; + return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ bitmap_set(reuse, pos); + return 0; } int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, @@ -1215,10 +1220,23 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, break; offset += ewah_bit_ctz64(word >> offset); - try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs); + if (try_partial_reuse(bitmap_git, pos + offset, reuse, + &w_curs) < 0) { + /* + * try_partial_reuse indicated we couldn't reuse + * any bits, so there is no point in trying more + * bits in the current word, or any other words + * in result. + * + * Jump out of both loops to avoid future + * unnecessary calls to try_partial_reuse. + */ + goto done; + } } } +done: unuse_pack(&w_curs); *entries = bitmap_popcount(reuse); From df01d79fbe0c953f00e5d28c09f968bdfb2e5c63 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:19:59 -0400 Subject: [PATCH 087/198] pack-bitmap: read multi-pack bitmaps This prepares the code in pack-bitmap to interpret the new multi-pack bitmaps described in Documentation/technical/bitmap-format.txt, which mostly involves converting bit positions to accommodate looking them up in a MIDX. Note that there are currently no writers who write multi-pack bitmaps, and that this will be implemented in the subsequent commit. Note also that get_midx_checksum() and get_midx_filename() are made non-static so they can be called from pack-bitmap.c. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/pack-objects.c | 5 + midx.c | 4 +- midx.h | 2 + pack-bitmap-write.c | 2 +- pack-bitmap.c | 357 ++++++++++++++++++++++++++++++++++++----- pack-bitmap.h | 6 + packfile.c | 2 +- 7 files changed, 336 insertions(+), 42 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8a523624a16eb2..e11d3ac2e5d2a1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1124,6 +1124,11 @@ static void write_reused_pack(struct hashfile *f) break; offset += ewah_bit_ctz64(word >> offset); + /* + * Can use bit positions directly, even for MIDX + * bitmaps. See comment in try_partial_reuse() + * for why. + */ write_reused_pack_one(pos + offset, f, &w_curs); display_progress(progress_state, ++written); } diff --git a/midx.c b/midx.c index 9893795e298bb1..9a971105336a7f 100644 --- a/midx.c +++ b/midx.c @@ -48,12 +48,12 @@ static uint8_t oid_version(void) } } -static const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const unsigned char *get_midx_checksum(struct multi_pack_index *m) { return m->data + m->data_len - the_hash_algo->rawsz; } -static char *get_midx_filename(const char *object_dir) +char *get_midx_filename(const char *object_dir) { return xstrfmt("%s/pack/multi-pack-index", object_dir); } diff --git a/midx.h b/midx.h index 8684cf0fefe81d..1172df1a711489 100644 --- a/midx.h +++ b/midx.h @@ -42,6 +42,8 @@ struct multi_pack_index { #define MIDX_PROGRESS (1 << 0) #define MIDX_WRITE_REV_INDEX (1 << 1) +const unsigned char *get_midx_checksum(struct multi_pack_index *m); +char *get_midx_filename(const char *object_dir); char *get_midx_rev_filename(struct multi_pack_index *m); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 142fd0adb86e6f..9c55c1531e1f55 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -48,7 +48,7 @@ void bitmap_writer_show_progress(int show) } /** - * Build the initial type index for the packfile + * Build the initial type index for the packfile or multi-pack-index */ void bitmap_writer_build_type_index(struct packing_data *to_pack, struct pack_idx_entry **index, diff --git a/pack-bitmap.c b/pack-bitmap.c index 20fa19fba132c8..9b76c0cd276dba 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -13,6 +13,7 @@ #include "repository.h" #include "object-store.h" #include "list-objects-filter-options.h" +#include "midx.h" #include "config.h" /* @@ -35,8 +36,15 @@ struct stored_bitmap { * the active bitmap index is the largest one. */ struct bitmap_index { - /* Packfile to which this bitmap index belongs to */ + /* + * The pack or multi-pack index (MIDX) that this bitmap index belongs + * to. + * + * Exactly one of these must be non-NULL; this specifies the object + * order used to interpret this bitmap. + */ struct packed_git *pack; + struct multi_pack_index *midx; /* * Mark the first `reuse_objects` in the packfile as reused: @@ -71,6 +79,9 @@ struct bitmap_index { /* If not NULL, this is a name-hash cache pointing into map. */ uint32_t *hashes; + /* The checksum of the packfile or MIDX; points into map. */ + const unsigned char *checksum; + /* * Extended index. * @@ -138,6 +149,8 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) static uint32_t bitmap_num_objects(struct bitmap_index *index) { + if (index->midx) + return index->midx->num_objects; return index->pack->num_objects; } @@ -175,6 +188,7 @@ static int load_bitmap_header(struct bitmap_index *index) } index->entry_count = ntohl(header->entry_count); + index->checksum = header->checksum; index->map_pos += header_size; return 0; } @@ -227,6 +241,8 @@ static int nth_bitmap_object_oid(struct bitmap_index *index, struct object_id *oid, uint32_t n) { + if (index->midx) + return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1; return nth_packed_object_id(oid, index->pack, n); } @@ -274,7 +290,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) return 0; } -static char *pack_bitmap_filename(struct packed_git *p) +char *midx_bitmap_filename(struct multi_pack_index *midx) +{ + return xstrfmt("%s-%s.bitmap", + get_midx_filename(midx->object_dir), + hash_to_hex(get_midx_checksum(midx))); +} + +char *pack_bitmap_filename(struct packed_git *p) { size_t len; @@ -283,6 +306,57 @@ static char *pack_bitmap_filename(struct packed_git *p) return xstrfmt("%.*s.bitmap", (int)len, p->pack_name); } +static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, + struct multi_pack_index *midx) +{ + struct stat st; + char *idx_name = midx_bitmap_filename(midx); + int fd = git_open(idx_name); + + free(idx_name); + + if (fd < 0) + return -1; + + if (fstat(fd, &st)) { + close(fd); + return -1; + } + + if (bitmap_git->pack || bitmap_git->midx) { + /* ignore extra bitmap file; we can only handle one */ + warning("ignoring extra bitmap file: %s", + get_midx_filename(midx->object_dir)); + close(fd); + return -1; + } + + bitmap_git->midx = midx; + bitmap_git->map_size = xsize_t(st.st_size); + bitmap_git->map_pos = 0; + bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, + MAP_PRIVATE, fd, 0); + close(fd); + + if (load_bitmap_header(bitmap_git) < 0) + goto cleanup; + + if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) + goto cleanup; + + if (load_midx_revindex(bitmap_git->midx) < 0) { + warning(_("multi-pack bitmap is missing required reverse index")); + goto cleanup; + } + return 0; + +cleanup: + munmap(bitmap_git->map, bitmap_git->map_size); + bitmap_git->map_size = 0; + bitmap_git->map = NULL; + return -1; +} + static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile) { int fd; @@ -304,7 +378,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git return -1; } - if (bitmap_git->pack) { + if (bitmap_git->pack || bitmap_git->midx) { + /* ignore extra bitmap file; we can only handle one */ warning("ignoring extra bitmap file: %s", packfile->pack_name); close(fd); return -1; @@ -326,13 +401,39 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git return 0; } -static int load_pack_bitmap(struct bitmap_index *bitmap_git) +static int load_reverse_index(struct bitmap_index *bitmap_git) +{ + if (bitmap_is_midx(bitmap_git)) { + uint32_t i; + int ret; + + /* + * The multi-pack-index's .rev file is already loaded via + * open_pack_bitmap_1(). + * + * But we still need to open the individual pack .rev files, + * since we will need to make use of them in pack-objects. + */ + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) + die(_("load_reverse_index: could not open pack")); + ret = load_pack_revindex(bitmap_git->midx->packs[i]); + if (ret) + return ret; + } + return 0; + } + return load_pack_revindex(bitmap_git->pack); +} + +static int load_bitmap(struct bitmap_index *bitmap_git) { assert(bitmap_git->map); bitmap_git->bitmaps = kh_init_oid_map(); bitmap_git->ext_index.positions = kh_init_oid_pos(); - if (load_pack_revindex(bitmap_git->pack)) + + if (load_reverse_index(bitmap_git)) goto failed; if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || @@ -376,11 +477,47 @@ static int open_pack_bitmap(struct repository *r, return ret; } +static int open_midx_bitmap(struct repository *r, + struct bitmap_index *bitmap_git) +{ + struct multi_pack_index *midx; + + assert(!bitmap_git->map); + + for (midx = get_multi_pack_index(r); midx; midx = midx->next) { + if (!open_midx_bitmap_1(bitmap_git, midx)) + return 0; + } + return -1; +} + +static int open_bitmap(struct repository *r, + struct bitmap_index *bitmap_git) +{ + assert(!bitmap_git->map); + + if (!open_midx_bitmap(r, bitmap_git)) + return 0; + return open_pack_bitmap(r, bitmap_git); +} + struct bitmap_index *prepare_bitmap_git(struct repository *r) { struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); - if (!open_pack_bitmap(r, bitmap_git) && !load_pack_bitmap(bitmap_git)) + if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git)) + return bitmap_git; + + free_bitmap_index(bitmap_git); + return NULL; +} + +struct bitmap_index *prepare_midx_bitmap_git(struct repository *r, + struct multi_pack_index *midx) +{ + struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); + + if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git)) return bitmap_git; free_bitmap_index(bitmap_git); @@ -430,10 +567,26 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, return pos; } +static int bitmap_position_midx(struct bitmap_index *bitmap_git, + const struct object_id *oid) +{ + uint32_t want, got; + if (!bsearch_midx(oid, bitmap_git->midx, &want)) + return -1; + + if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0) + return -1; + return got; +} + static int bitmap_position(struct bitmap_index *bitmap_git, const struct object_id *oid) { - int pos = bitmap_position_packfile(bitmap_git, oid); + int pos; + if (bitmap_is_midx(bitmap_git)) + pos = bitmap_position_midx(bitmap_git, oid); + else + pos = bitmap_position_packfile(bitmap_git, oid); return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid); } @@ -726,6 +879,7 @@ static void show_objects_for_type( continue; for (offset = 0; offset < BITS_IN_EWORD; ++offset) { + struct packed_git *pack; struct object_id oid; uint32_t hash = 0, index_pos; off_t ofs; @@ -735,14 +889,28 @@ static void show_objects_for_type( offset += ewah_bit_ctz64(word >> offset); - index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); - ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); - nth_packed_object_id(&oid, bitmap_git->pack, index_pos); + if (bitmap_is_midx(bitmap_git)) { + struct multi_pack_index *m = bitmap_git->midx; + uint32_t pack_id; + + index_pos = pack_pos_to_midx(m, pos + offset); + ofs = nth_midxed_offset(m, index_pos); + nth_midxed_object_oid(&oid, m, index_pos); + + pack_id = nth_midxed_pack_int_id(m, index_pos); + pack = bitmap_git->midx->packs[pack_id]; + } else { + index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); + ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); + + pack = bitmap_git->pack; + } if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs); + show_reach(&oid, object_type, 0, hash, pack, ofs); } } } @@ -754,8 +922,13 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, struct object *object = roots->item; roots = roots->next; - if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0) - return 1; + if (bitmap_is_midx(bitmap_git)) { + if (bsearch_midx(&object->oid, bitmap_git->midx, NULL)) + return 1; + } else { + if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0) + return 1; + } } return 0; @@ -841,14 +1014,26 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, uint32_t pos) { - struct packed_git *pack = bitmap_git->pack; unsigned long size; struct object_info oi = OBJECT_INFO_INIT; oi.sizep = &size; if (pos < bitmap_num_objects(bitmap_git)) { - off_t ofs = pack_pos_to_offset(pack, pos); + struct packed_git *pack; + off_t ofs; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos); + uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); + + pack = bitmap_git->midx->packs[pack_id]; + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); + } else { + pack = bitmap_git->pack; + ofs = pack_pos_to_offset(pack, pos); + } + if (packed_object_info(the_repository, pack, ofs, &oi) < 0) { struct object_id oid; nth_bitmap_object_oid(bitmap_git, &oid, @@ -1029,7 +1214,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, /* try to open a bitmapped pack, but don't parse it yet * because we may not need to use it */ CALLOC_ARRAY(bitmap_git, 1); - if (open_pack_bitmap(revs->repo, bitmap_git) < 0) + if (open_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; for (i = 0; i < revs->pending.nr; ++i) { @@ -1073,7 +1258,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, * from disk. this is the point of no return; after this the rev_list * becomes invalidated and we must perform the revwalk through bitmaps */ - if (load_pack_bitmap(bitmap_git) < 0) + if (load_bitmap(bitmap_git) < 0) goto cleanup; object_array_clear(&revs->pending); @@ -1121,19 +1306,43 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, * reused, but you can keep feeding bits. */ static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct packed_git *pack, size_t pos, struct bitmap *reuse, struct pack_window **w_curs) { - off_t offset, header; + off_t offset, delta_obj_offset; enum object_type type; unsigned long size; - if (pos >= bitmap_num_objects(bitmap_git)) - return -1; /* not actually in the pack or MIDX */ + /* + * try_partial_reuse() is called either on (a) objects in the + * bitmapped pack (in the case of a single-pack bitmap) or (b) + * objects in the preferred pack of a multi-pack bitmap. + * Importantly, the latter can pretend as if only a single pack + * exists because: + * + * - The first pack->num_objects bits of a MIDX bitmap are + * reserved for the preferred pack, and + * + * - Ties due to duplicate objects are always resolved in + * favor of the preferred pack. + * + * Therefore we do not need to ever ask the MIDX for its copy of + * an object by OID, since it will always select it from the + * preferred pack. Likewise, the selected copy of the base + * object for any deltas will reside in the same pack. + * + * This means that we can reuse pos when looking up the bit in + * the reuse bitmap, too, since bits corresponding to the + * preferred pack precede all bits from other packs. + */ + + if (pos >= pack->num_objects) + return -1; /* not actually in the pack or MIDX preferred pack */ - offset = header = pack_pos_to_offset(bitmap_git->pack, pos); - type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); + offset = delta_obj_offset = pack_pos_to_offset(pack, pos); + type = unpack_object_header(pack, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ @@ -1149,11 +1358,11 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(bitmap_git->pack, w_curs, - &offset, type, header); + base_offset = get_delta_base(pack, w_curs, &offset, type, + delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0) + if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) return 0; /* @@ -1187,24 +1396,48 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git, return 0; } +static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) +{ + struct multi_pack_index *m = bitmap_git->midx; + if (!m) + BUG("midx_preferred_pack: requires non-empty MIDX"); + return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); +} + int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct packed_git **packfile_out, uint32_t *entries, struct bitmap **reuse_out) { + struct packed_git *pack; struct bitmap *result = bitmap_git->result; struct bitmap *reuse; struct pack_window *w_curs = NULL; size_t i = 0; uint32_t offset; - uint32_t objects_nr = bitmap_num_objects(bitmap_git); + uint32_t objects_nr; assert(result); + load_reverse_index(bitmap_git); + + if (bitmap_is_midx(bitmap_git)) + pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; + else + pack = bitmap_git->pack; + objects_nr = pack->num_objects; + while (i < result->word_alloc && result->words[i] == (eword_t)~0) i++; - /* Don't mark objects not in the packfile */ + /* + * Don't mark objects not in the packfile or preferred pack. This bitmap + * marks objects eligible for reuse, but the pack-reuse code only + * understands how to reuse a single pack. Since the preferred pack is + * guaranteed to have all bases for its deltas (in a multi-pack bitmap), + * we use it instead of another pack. In single-pack bitmaps, the choice + * is made for us. + */ if (i > objects_nr / BITS_IN_EWORD) i = objects_nr / BITS_IN_EWORD; @@ -1220,8 +1453,8 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, break; offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(bitmap_git, pos + offset, reuse, - &w_curs) < 0) { + if (try_partial_reuse(bitmap_git, pack, pos + offset, + reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more @@ -1250,7 +1483,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = bitmap_git->pack; + *packfile_out = pack; *reuse_out = reuse; return 0; } @@ -1524,6 +1757,12 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, uint32_t i, num_objects; uint32_t *reposition; + if (!bitmap_is_midx(bitmap_git)) + load_reverse_index(bitmap_git); + else if (load_midx_revindex(bitmap_git->midx) < 0) + BUG("rebuild_existing_bitmaps: missing required rev-cache " + "extension"); + num_objects = bitmap_num_objects(bitmap_git); CALLOC_ARRAY(reposition, num_objects); @@ -1531,8 +1770,13 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, struct object_id oid; struct object_entry *oe; - nth_packed_object_id(&oid, bitmap_git->pack, - pack_pos_to_index(bitmap_git->pack, i)); + if (bitmap_is_midx(bitmap_git)) + nth_midxed_object_oid(&oid, + bitmap_git->midx, + pack_pos_to_midx(bitmap_git->midx, i)); + else + nth_packed_object_id(&oid, bitmap_git->pack, + pack_pos_to_index(bitmap_git->pack, i)); oe = packlist_find(mapping, &oid); if (oe) @@ -1558,6 +1802,19 @@ void free_bitmap_index(struct bitmap_index *b) free(b->ext_index.hashes); bitmap_free(b->result); bitmap_free(b->haves); + if (bitmap_is_midx(b)) { + /* + * Multi-pack bitmaps need to have resources associated with + * their on-disk reverse indexes unmapped so that stale .rev and + * .bitmap files can be removed. + * + * Unlike pack-based bitmaps, multi-pack bitmaps can be read and + * written in the same 'git multi-pack-index write --bitmap' + * process. Close resources so they can be removed safely on + * platforms like Windows. + */ + close_midx_revindex(b->midx); + } free(b); } @@ -1572,7 +1829,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, enum object_type object_type) { struct bitmap *result = bitmap_git->result; - struct packed_git *pack = bitmap_git->pack; off_t total = 0; struct ewah_iterator it; eword_t filter; @@ -1589,15 +1845,35 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, continue; for (offset = 0; offset < BITS_IN_EWORD; offset++) { - size_t pos; - if ((word >> offset) == 0) break; offset += ewah_bit_ctz64(word >> offset); - pos = base + offset; - total += pack_pos_to_offset(pack, pos + 1) - - pack_pos_to_offset(pack, pos); + + if (bitmap_is_midx(bitmap_git)) { + uint32_t pack_pos; + uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset); + off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos); + + uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); + struct packed_git *pack = bitmap_git->midx->packs[pack_id]; + + if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) { + struct object_id oid; + nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos); + + die(_("could not find %s in pack %s at offset %"PRIuMAX), + oid_to_hex(&oid), + pack->pack_name, + (uintmax_t)offset); + } + + total += pack_pos_to_offset(pack, pack_pos + 1) - offset; + } else { + size_t pos = base + offset; + total += pack_pos_to_offset(bitmap_git->pack, pos + 1) - + pack_pos_to_offset(bitmap_git->pack, pos); + } } } @@ -1648,6 +1924,11 @@ off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git, return total; } +int bitmap_is_midx(struct bitmap_index *bitmap_git) +{ + return !!bitmap_git->midx; +} + const struct string_list *bitmap_preferred_tips(struct repository *r) { return repo_config_get_value_multi(r, "pack.preferbitmaptips"); diff --git a/pack-bitmap.h b/pack-bitmap.h index 52ea10de5117ab..81664f933f02a0 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -44,6 +44,8 @@ typedef int (*show_reachable_fn)( struct bitmap_index; struct bitmap_index *prepare_bitmap_git(struct repository *r); +struct bitmap_index *prepare_midx_bitmap_git(struct repository *r, + struct multi_pack_index *midx); void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags); void traverse_bitmap_commit_list(struct bitmap_index *, @@ -92,6 +94,10 @@ void bitmap_writer_finish(struct pack_idx_entry **index, uint32_t index_nr, const char *filename, uint16_t options); +char *midx_bitmap_filename(struct multi_pack_index *midx); +char *pack_bitmap_filename(struct packed_git *p); + +int bitmap_is_midx(struct bitmap_index *bitmap_git); const struct string_list *bitmap_preferred_tips(struct repository *r); int bitmap_is_preferred_refname(struct repository *r, const char *refname); diff --git a/packfile.c b/packfile.c index 755aa7aec5efbf..e855b932082bdc 100644 --- a/packfile.c +++ b/packfile.c @@ -860,7 +860,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!strcmp(file_name, "multi-pack-index")) return; if (starts_with(file_name, "multi-pack-index") && - ends_with(file_name, ".rev")) + (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev"))) return; if (ends_with(file_name, ".idx") || ends_with(file_name, ".rev") || From c287aeb9a1a95da846d6750df399af0b4af626c5 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:02 -0400 Subject: [PATCH 088/198] pack-bitmap: write multi-pack bitmaps Write multi-pack bitmaps in the format described by Documentation/technical/bitmap-format.txt, inferring their presence with the absence of '--bitmap'. To write a multi-pack bitmap, this patch attempts to reuse as much of the existing machinery from pack-objects as possible. Specifically, the MIDX code prepares a packing_data struct that pretends as if a single packfile has been generated containing all of the objects contained within the MIDX. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-multi-pack-index.txt | 12 +- builtin/multi-pack-index.c | 2 + midx.c | 208 ++++++++++++++++++++++++- midx.h | 1 + 4 files changed, 214 insertions(+), 9 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index c9b063d31e1df8..ed52459a9d5bd0 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress] - [--preferred-pack=<pack>] <subcommand> + [--preferred-pack=<pack>] [--[no-]bitmap] <subcommand> DESCRIPTION ----------- @@ -40,6 +40,9 @@ write:: multiple packs contain the same object. `<pack>` must contain at least one object. If not given, ties are broken in favor of the pack with the lowest mtime. + + --[no-]bitmap:: + Control whether or not a multi-pack bitmap is written. -- verify:: @@ -81,6 +84,13 @@ EXAMPLES $ git multi-pack-index write ----------------------------------------------- +* Write a MIDX file for the packfiles in the current .git folder with a +corresponding bitmap. ++ +------------------------------------------------------------- +$ git multi-pack-index write --preferred-pack=<pack> --bitmap +------------------------------------------------------------- + * Write a MIDX file for the packfiles in an alternate object store. + ----------------------------------------------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5d3ea445fdb3a8..bf6fa982e3f29c 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -68,6 +68,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) OPT_STRING(0, "preferred-pack", &opts.preferred_pack, N_("preferred-pack"), N_("pack for reuse when computing a multi-pack bitmap")), + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_END(), }; diff --git a/midx.c b/midx.c index 9a971105336a7f..9028af2a9d8ade 100644 --- a/midx.c +++ b/midx.c @@ -13,6 +13,10 @@ #include "repository.h" #include "chunk-format.h" #include "pack.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "revision.h" +#include "list-objects.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -888,6 +892,166 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, static void clear_midx_files_ext(struct repository *r, const char *ext, unsigned char *keep_hash); +static void prepare_midx_packing_data(struct packing_data *pdata, + struct write_midx_context *ctx) +{ + uint32_t i; + + memset(pdata, 0, sizeof(struct packing_data)); + prepare_packing_data(the_repository, pdata); + + for (i = 0; i < ctx->entries_nr; i++) { + struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]]; + struct object_entry *to = packlist_alloc(pdata, &from->oid); + + oe_set_in_pack(pdata, to, + ctx->info[ctx->pack_perm[from->pack_int_id]].p); + } +} + +static int add_ref_to_pending(const char *refname, + const struct object_id *oid, + int flag, void *cb_data) +{ + struct rev_info *revs = (struct rev_info*)cb_data; + struct object *object; + + if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", refname); + return 0; + } + + object = parse_object_or_die(oid, refname); + if (object->type != OBJ_COMMIT) + return 0; + + add_pending_object(revs, object, ""); + if (bitmap_is_preferred_refname(revs->repo, refname)) + object->flags |= NEEDS_BITMAP; + return 0; +} + +struct bitmap_commit_cb { + struct commit **commits; + size_t commits_nr, commits_alloc; + + struct write_midx_context *ctx; +}; + +static const struct object_id *bitmap_oid_access(size_t index, + const void *_entries) +{ + const struct pack_midx_entry *entries = _entries; + return &entries[index].oid; +} + +static void bitmap_show_commit(struct commit *commit, void *_data) +{ + struct bitmap_commit_cb *data = _data; + int pos = oid_pos(&commit->object.oid, data->ctx->entries, + data->ctx->entries_nr, + bitmap_oid_access); + if (pos < 0) + return; + + ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc); + data->commits[data->commits_nr++] = commit; +} + +static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p, + struct write_midx_context *ctx) +{ + struct rev_info revs; + struct bitmap_commit_cb cb = {0}; + + cb.ctx = ctx; + + repo_init_revisions(the_repository, &revs, NULL); + setup_revisions(0, NULL, &revs, NULL); + for_each_ref(add_ref_to_pending, &revs); + + /* + * Skipping promisor objects here is intentional, since it only excludes + * them from the list of reachable commits that we want to select from + * when computing the selection of MIDX'd commits to receive bitmaps. + * + * Reachability bitmaps do require that their objects be closed under + * reachability, but fetching any objects missing from promisors at this + * point is too late. But, if one of those objects can be reached from + * an another object that is included in the bitmap, then we will + * complain later that we don't have reachability closure (and fail + * appropriately). + */ + fetch_if_missing = 0; + revs.exclude_promisor_objects = 1; + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + + traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb); + if (indexed_commits_nr_p) + *indexed_commits_nr_p = cb.commits_nr; + + return cb.commits; +} + +static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, + struct write_midx_context *ctx, + unsigned flags) +{ + struct packing_data pdata; + struct pack_idx_entry **index; + struct commit **commits = NULL; + uint32_t i, commits_nr; + char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); + int ret; + + prepare_midx_packing_data(&pdata, ctx); + + commits = find_commits_for_midx_bitmap(&commits_nr, ctx); + + /* + * Build the MIDX-order index based on pdata.objects (which is already + * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of + * this order). + */ + ALLOC_ARRAY(index, pdata.nr_objects); + for (i = 0; i < pdata.nr_objects; i++) + index[i] = &pdata.objects[i].idx; + + bitmap_writer_show_progress(flags & MIDX_PROGRESS); + bitmap_writer_build_type_index(&pdata, index, pdata.nr_objects); + + /* + * bitmap_writer_finish expects objects in lex order, but pack_order + * gives us exactly that. use it directly instead of re-sorting the + * array. + * + * This changes the order of objects in 'index' between + * bitmap_writer_build_type_index and bitmap_writer_finish. + * + * The same re-ordering takes place in the single-pack bitmap code via + * write_idx_file(), which is called by finish_tmp_packfile(), which + * happens between bitmap_writer_build_type_index() and + * bitmap_writer_finish(). + */ + for (i = 0; i < pdata.nr_objects; i++) + index[ctx->pack_order[i]] = &pdata.objects[i].idx; + + bitmap_writer_select_commits(commits, commits_nr, -1); + ret = bitmap_writer_build(&pdata); + if (ret < 0) + goto cleanup; + + bitmap_writer_set_checksum(midx_hash); + bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, 0); + +cleanup: + free(index); + free(bitmap_name); + return ret; +} + static int write_midx_internal(const char *object_dir, struct string_list *packs_to_drop, const char *preferred_pack_name, @@ -930,7 +1094,7 @@ static int write_midx_internal(const char *object_dir, ctx.info[ctx.nr].orig_pack_int_id = i; ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]); - ctx.info[ctx.nr].p = NULL; + ctx.info[ctx.nr].p = ctx.m->packs[i]; ctx.info[ctx.nr].expired = 0; if (flags & MIDX_WRITE_REV_INDEX) { @@ -964,8 +1128,26 @@ static int write_midx_internal(const char *object_dir, for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); - if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) - goto cleanup; + if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) { + struct bitmap_index *bitmap_git; + int bitmap_exists; + int want_bitmap = flags & MIDX_WRITE_BITMAP; + + bitmap_git = prepare_midx_bitmap_git(the_repository, ctx.m); + bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); + free_bitmap_index(bitmap_git); + + if (bitmap_exists || !want_bitmap) { + /* + * The correct MIDX already exists, and so does a + * corresponding bitmap (or one wasn't requested). + */ + if (!want_bitmap) + clear_midx_files_ext(the_repository, ".bitmap", + NULL); + goto cleanup; + } + } if (preferred_pack_name) { int found = 0; @@ -981,7 +1163,8 @@ static int write_midx_internal(const char *object_dir, if (!found) warning(_("unknown preferred pack: '%s'"), preferred_pack_name); - } else if (ctx.nr && (flags & MIDX_WRITE_REV_INDEX)) { + } else if (ctx.nr && + (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; ctx.preferred_pack_idx = 0; @@ -1113,9 +1296,6 @@ static int write_midx_internal(const char *object_dir, hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk)); - if (ctx.m) - close_midx(ctx.m); - if (ctx.nr - dropped_packs == 0) { error(_("no pack files to index.")); result = 1; @@ -1146,14 +1326,24 @@ static int write_midx_internal(const char *object_dir, finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX) + if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) ctx.pack_order = midx_pack_order(&ctx); if (flags & MIDX_WRITE_REV_INDEX) write_midx_reverse_index(midx_name, midx_hash, &ctx); + if (flags & MIDX_WRITE_BITMAP) { + if (write_midx_bitmap(midx_name, midx_hash, &ctx, flags) < 0) { + error(_("could not write multi-pack bitmap")); + result = 1; + goto cleanup; + } + } + + close_midx(ctx.m); commit_lock_file(&lk); + clear_midx_files_ext(the_repository, ".bitmap", midx_hash); clear_midx_files_ext(the_repository, ".rev", midx_hash); cleanup: @@ -1170,6 +1360,7 @@ static int write_midx_internal(const char *object_dir, free(ctx.pack_perm); free(ctx.pack_order); free(midx_name); + return result; } @@ -1230,6 +1421,7 @@ void clear_midx_file(struct repository *r) if (remove_path(midx)) die(_("failed to clear multi-pack-index at %s"), midx); + clear_midx_files_ext(r, ".bitmap", NULL); clear_midx_files_ext(r, ".rev", NULL); free(midx); diff --git a/midx.h b/midx.h index 1172df1a711489..350f4d0a7b4c20 100644 --- a/midx.h +++ b/midx.h @@ -41,6 +41,7 @@ struct multi_pack_index { #define MIDX_PROGRESS (1 << 0) #define MIDX_WRITE_REV_INDEX (1 << 1) +#define MIDX_WRITE_BITMAP (1 << 2) const unsigned char *get_midx_checksum(struct multi_pack_index *m); char *get_midx_filename(const char *object_dir); From f359671f63ee27715ed049c681df6efb7d7280c7 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:04 -0400 Subject: [PATCH 089/198] t5310: move some tests to lib-bitmap.sh We'll soon be adding a test script that will cover many of the same bitmap concepts as t5310, but for MIDX bitmaps. Let's pull out as many of the applicable tests as we can so we don't have to rewrite them. There should be no functional change to t5310; we still run the same operations in the same order. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-bitmap.sh | 236 ++++++++++++++++++++++++++++++++++++++++ t/t5310-pack-bitmaps.sh | 229 +------------------------------------- 2 files changed, 241 insertions(+), 224 deletions(-) diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index fe3f98be24f302..ecb5d0e05d44ae 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -1,3 +1,6 @@ +# Helpers for scripts testing bitamp functionality; see t5310 for +# example usage. + # Compare a file containing rev-list bitmap traversal output to its non-bitmap # counterpart. You can't just use test_cmp for this, because the two produce # subtly different output: @@ -24,3 +27,236 @@ test_bitmap_traversal () { test_cmp "$1.normalized" "$2.normalized" && rm -f "$1.normalized" "$2.normalized" } + +# To ensure the logic for "maximal commits" is exercised, make +# the repository a bit more complicated. +# +# other second +# * * +# (99 commits) (99 commits) +# * * +# |\ /| +# | * octo-other octo-second * | +# |/|\_________ ____________/|\| +# | \ \/ __________/ | +# | | ________/\ / | +# * |/ * merge-right * +# | _|__________/ \____________ | +# |/ | \| +# (l1) * * merge-left * (r1) +# | / \________________________ | +# |/ \| +# (l2) * * (r2) +# \___________________________ | +# \| +# * (base) +# +# We only push bits down the first-parent history, which +# makes some of these commits unimportant! +# +# The important part for the maximal commit algorithm is how +# the bitmasks are extended. Assuming starting bit positions +# for second (bit 0) and other (bit 1), the bitmasks at the +# end should be: +# +# second: 1 (maximal, selected) +# other: 01 (maximal, selected) +# (base): 11 (maximal) +# +# This complicated history was important for a previous +# version of the walk that guarantees never walking a +# commit multiple times. That goal might be important +# again, so preserve this complicated case. For now, this +# test will guarantee that the bitmaps are computed +# correctly, even with the repeat calculations. +setup_bitmap_history() { + test_expect_success 'setup repo with moderate-sized history' ' + test_commit_bulk --id=file 10 && + git branch -M second && + git checkout -b other HEAD~5 && + test_commit_bulk --id=side 10 && + + # add complicated history setup, including merges and + # ambiguous merge-bases + + git checkout -b merge-left other~2 && + git merge second~2 -m "merge-left" && + + git checkout -b merge-right second~1 && + git merge other~1 -m "merge-right" && + + git checkout -b octo-second second && + git merge merge-left merge-right -m "octopus-second" && + + git checkout -b octo-other other && + git merge merge-left merge-right -m "octopus-other" && + + git checkout other && + git merge octo-other -m "pull octopus" && + + git checkout second && + git merge octo-second -m "pull octopus" && + + # Remove these branches so they are not selected + # as bitmap tips + git branch -D merge-left && + git branch -D merge-right && + git branch -D octo-other && + git branch -D octo-second && + + # add padding to make these merges less interesting + # and avoid having them selected for bitmaps + test_commit_bulk --id=file 100 && + git checkout other && + test_commit_bulk --id=side 100 && + git checkout second && + + bitmaptip=$(git rev-parse second) && + blob=$(echo tagged-blob | git hash-object -w --stdin) && + git tag tagged-blob $blob + ' +} + +rev_list_tests_head () { + test_expect_success "counting commits via bitmap ($state, $branch)" ' + git rev-list --count $branch >expect && + git rev-list --use-bitmap-index --count $branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting partial commits via bitmap ($state, $branch)" ' + git rev-list --count $branch~5..$branch >expect && + git rev-list --use-bitmap-index --count $branch~5..$branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting commits with limit ($state, $branch)" ' + git rev-list --count -n 1 $branch >expect && + git rev-list --use-bitmap-index --count -n 1 $branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting non-linear history ($state, $branch)" ' + git rev-list --count other...second >expect && + git rev-list --use-bitmap-index --count other...second >actual && + test_cmp expect actual + ' + + test_expect_success "counting commits with limiting ($state, $branch)" ' + git rev-list --count $branch -- 1.t >expect && + git rev-list --use-bitmap-index --count $branch -- 1.t >actual && + test_cmp expect actual + ' + + test_expect_success "counting objects via bitmap ($state, $branch)" ' + git rev-list --count --objects $branch >expect && + git rev-list --use-bitmap-index --count --objects $branch >actual && + test_cmp expect actual + ' + + test_expect_success "enumerate commits ($state, $branch)" ' + git rev-list --use-bitmap-index $branch >actual && + git rev-list $branch >expect && + test_bitmap_traversal --no-confirm-bitmaps expect actual + ' + + test_expect_success "enumerate --objects ($state, $branch)" ' + git rev-list --objects --use-bitmap-index $branch >actual && + git rev-list --objects $branch >expect && + test_bitmap_traversal expect actual + ' + + test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" ' + git rev-list --objects --use-bitmap-index $branch tagged-blob >actual && + grep $blob actual + ' +} + +rev_list_tests () { + state=$1 + + for branch in "second" "other" + do + rev_list_tests_head + done +} + +basic_bitmap_tests () { + tip="$1" + test_expect_success 'rev-list --test-bitmap verifies bitmaps' " + git rev-list --test-bitmap "${tip:-HEAD}" + " + + rev_list_tests 'full bitmap' + + test_expect_success 'clone from bitmapped repository' ' + rm -fr clone.git && + git clone --no-local --bare . clone.git && + git rev-parse HEAD >expect && + git --git-dir=clone.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'partial clone from bitmapped repository' ' + test_config uploadpack.allowfilter true && + rm -fr partial-clone.git && + git clone --no-local --bare --filter=blob:none . partial-clone.git && + ( + cd partial-clone.git && + pack=$(echo objects/pack/*.pack) && + git verify-pack -v "$pack" >have && + awk "/blob/ { print \$1 }" <have >blobs && + # we expect this single blob because of the direct ref + git rev-parse refs/tags/tagged-blob >expect && + test_cmp expect blobs + ) + ' + + test_expect_success 'setup further non-bitmapped commits' ' + test_commit_bulk --id=further 10 + ' + + rev_list_tests 'partial bitmap' + + test_expect_success 'fetch (partial bitmap)' ' + git --git-dir=clone.git fetch origin second:second && + git rev-parse HEAD >expect && + git --git-dir=clone.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'enumerating progress counts pack-reused objects' ' + count=$(git rev-list --objects --all --count) && + git repack -adb && + + # check first with only reused objects; confirm that our + # progress showed the right number, and also that we did + # pack-reuse as expected. Check only the final "done" + # line of the meter (there may be an arbitrary number of + # intermediate lines ending with CR). + GIT_PROGRESS_DELAY=0 \ + git pack-objects --all --stdout --progress \ + </dev/null >/dev/null 2>stderr && + grep "Enumerating objects: $count, done" stderr && + grep "pack-reused $count" stderr && + + # now the same but with one non-reused object + git commit --allow-empty -m "an extra commit object" && + GIT_PROGRESS_DELAY=0 \ + git pack-objects --all --stdout --progress \ + </dev/null >/dev/null 2>stderr && + grep "Enumerating objects: $((count+1)), done" stderr && + grep "pack-reused $count" stderr + ' +} + +# have_delta <obj> <expected_base> +# +# Note that because this relies on cat-file, it might find _any_ copy of an +# object in the repository. The caller is responsible for making sure +# there's only one (e.g., via "repack -ad", or having just fetched a copy). +have_delta () { + echo $2 >expect && + echo $1 | git cat-file --batch-check="%(deltabase)" >actual && + test_cmp expect actual +} diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index b02838750e49a3..4318f84d534516 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -25,93 +25,10 @@ has_any () { grep -Ff "$1" "$2" } -# To ensure the logic for "maximal commits" is exercised, make -# the repository a bit more complicated. -# -# other second -# * * -# (99 commits) (99 commits) -# * * -# |\ /| -# | * octo-other octo-second * | -# |/|\_________ ____________/|\| -# | \ \/ __________/ | -# | | ________/\ / | -# * |/ * merge-right * -# | _|__________/ \____________ | -# |/ | \| -# (l1) * * merge-left * (r1) -# | / \________________________ | -# |/ \| -# (l2) * * (r2) -# \___________________________ | -# \| -# * (base) -# -# We only push bits down the first-parent history, which -# makes some of these commits unimportant! -# -# The important part for the maximal commit algorithm is how -# the bitmasks are extended. Assuming starting bit positions -# for second (bit 0) and other (bit 1), the bitmasks at the -# end should be: -# -# second: 1 (maximal, selected) -# other: 01 (maximal, selected) -# (base): 11 (maximal) -# -# This complicated history was important for a previous -# version of the walk that guarantees never walking a -# commit multiple times. That goal might be important -# again, so preserve this complicated case. For now, this -# test will guarantee that the bitmaps are computed -# correctly, even with the repeat calculations. - -test_expect_success 'setup repo with moderate-sized history' ' - test_commit_bulk --id=file 10 && - git branch -M second && - git checkout -b other HEAD~5 && - test_commit_bulk --id=side 10 && - - # add complicated history setup, including merges and - # ambiguous merge-bases - - git checkout -b merge-left other~2 && - git merge second~2 -m "merge-left" && - - git checkout -b merge-right second~1 && - git merge other~1 -m "merge-right" && - - git checkout -b octo-second second && - git merge merge-left merge-right -m "octopus-second" && - - git checkout -b octo-other other && - git merge merge-left merge-right -m "octopus-other" && - - git checkout other && - git merge octo-other -m "pull octopus" && - - git checkout second && - git merge octo-second -m "pull octopus" && - - # Remove these branches so they are not selected - # as bitmap tips - git branch -D merge-left && - git branch -D merge-right && - git branch -D octo-other && - git branch -D octo-second && - - # add padding to make these merges less interesting - # and avoid having them selected for bitmaps - test_commit_bulk --id=file 100 && - git checkout other && - test_commit_bulk --id=side 100 && - git checkout second && - - bitmaptip=$(git rev-parse second) && - blob=$(echo tagged-blob | git hash-object -w --stdin) && - git tag tagged-blob $blob && - git config repack.writebitmaps true +setup_bitmap_history + +test_expect_success 'setup writing bitmaps during repack' ' + git config repack.writeBitmaps true ' test_expect_success 'full repack creates bitmaps' ' @@ -123,109 +40,7 @@ test_expect_success 'full repack creates bitmaps' ' grep "\"key\":\"num_maximal_commits\",\"value\":\"107\"" trace ' -test_expect_success 'rev-list --test-bitmap verifies bitmaps' ' - git rev-list --test-bitmap HEAD -' - -rev_list_tests_head () { - test_expect_success "counting commits via bitmap ($state, $branch)" ' - git rev-list --count $branch >expect && - git rev-list --use-bitmap-index --count $branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting partial commits via bitmap ($state, $branch)" ' - git rev-list --count $branch~5..$branch >expect && - git rev-list --use-bitmap-index --count $branch~5..$branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting commits with limit ($state, $branch)" ' - git rev-list --count -n 1 $branch >expect && - git rev-list --use-bitmap-index --count -n 1 $branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting non-linear history ($state, $branch)" ' - git rev-list --count other...second >expect && - git rev-list --use-bitmap-index --count other...second >actual && - test_cmp expect actual - ' - - test_expect_success "counting commits with limiting ($state, $branch)" ' - git rev-list --count $branch -- 1.t >expect && - git rev-list --use-bitmap-index --count $branch -- 1.t >actual && - test_cmp expect actual - ' - - test_expect_success "counting objects via bitmap ($state, $branch)" ' - git rev-list --count --objects $branch >expect && - git rev-list --use-bitmap-index --count --objects $branch >actual && - test_cmp expect actual - ' - - test_expect_success "enumerate commits ($state, $branch)" ' - git rev-list --use-bitmap-index $branch >actual && - git rev-list $branch >expect && - test_bitmap_traversal --no-confirm-bitmaps expect actual - ' - - test_expect_success "enumerate --objects ($state, $branch)" ' - git rev-list --objects --use-bitmap-index $branch >actual && - git rev-list --objects $branch >expect && - test_bitmap_traversal expect actual - ' - - test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" ' - git rev-list --objects --use-bitmap-index $branch tagged-blob >actual && - grep $blob actual - ' -} - -rev_list_tests () { - state=$1 - - for branch in "second" "other" - do - rev_list_tests_head - done -} - -rev_list_tests 'full bitmap' - -test_expect_success 'clone from bitmapped repository' ' - git clone --no-local --bare . clone.git && - git rev-parse HEAD >expect && - git --git-dir=clone.git rev-parse HEAD >actual && - test_cmp expect actual -' - -test_expect_success 'partial clone from bitmapped repository' ' - test_config uploadpack.allowfilter true && - git clone --no-local --bare --filter=blob:none . partial-clone.git && - ( - cd partial-clone.git && - pack=$(echo objects/pack/*.pack) && - git verify-pack -v "$pack" >have && - awk "/blob/ { print \$1 }" <have >blobs && - # we expect this single blob because of the direct ref - git rev-parse refs/tags/tagged-blob >expect && - test_cmp expect blobs - ) -' - -test_expect_success 'setup further non-bitmapped commits' ' - test_commit_bulk --id=further 10 -' - -rev_list_tests 'partial bitmap' - -test_expect_success 'fetch (partial bitmap)' ' - git --git-dir=clone.git fetch origin second:second && - git rev-parse HEAD >expect && - git --git-dir=clone.git rev-parse HEAD >actual && - test_cmp expect actual -' +basic_bitmap_tests test_expect_success 'incremental repack fails when bitmaps are requested' ' test_commit more-1 && @@ -461,40 +276,6 @@ test_expect_success 'truncated bitmap fails gracefully (cache)' ' test_i18ngrep corrupted.bitmap.index stderr ' -test_expect_success 'enumerating progress counts pack-reused objects' ' - count=$(git rev-list --objects --all --count) && - git repack -adb && - - # check first with only reused objects; confirm that our progress - # showed the right number, and also that we did pack-reuse as expected. - # Check only the final "done" line of the meter (there may be an - # arbitrary number of intermediate lines ending with CR). - GIT_PROGRESS_DELAY=0 \ - git pack-objects --all --stdout --progress \ - </dev/null >/dev/null 2>stderr && - grep "Enumerating objects: $count, done" stderr && - grep "pack-reused $count" stderr && - - # now the same but with one non-reused object - git commit --allow-empty -m "an extra commit object" && - GIT_PROGRESS_DELAY=0 \ - git pack-objects --all --stdout --progress \ - </dev/null >/dev/null 2>stderr && - grep "Enumerating objects: $((count+1)), done" stderr && - grep "pack-reused $count" stderr -' - -# have_delta <obj> <expected_base> -# -# Note that because this relies on cat-file, it might find _any_ copy of an -# object in the repository. The caller is responsible for making sure -# there's only one (e.g., via "repack -ad", or having just fetched a copy). -have_delta () { - echo $2 >expect && - echo $1 | git cat-file --batch-check="%(deltabase)" >actual && - test_cmp expect actual -} - # Create a state of history with these properties: # # - refs that allow a client to fetch some new history, while sharing some old From 9efd417210f87441ec2ad4ffaffcc2c8b21dcb2f Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:07 -0400 Subject: [PATCH 090/198] t/helper/test-read-midx.c: add --checksum mode Subsequent tests will want to check for the existence of a multi-pack bitmap which matches the multi-pack-index stored in the pack directory. The multi-pack bitmap includes the hex checksum of the MIDX it corresponds to in its filename (for example, '$packdir/multi-pack-index-<checksum>.bitmap'). As a result, some tests want a way to learn what '<checksum>' is. This helper addresses that need by printing the checksum of the repository's multi-pack-index. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/helper/test-read-midx.c | 16 +++++++++++++++- t/lib-bitmap.sh | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 7c2eb11a8e70ff..cb0d27049a07e1 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -60,12 +60,26 @@ static int read_midx_file(const char *object_dir, int show_objects) return 0; } +static int read_midx_checksum(const char *object_dir) +{ + struct multi_pack_index *m; + + setup_git_directory(); + m = load_multi_pack_index(object_dir, 1); + if (!m) + return 1; + printf("%s\n", hash_to_hex(get_midx_checksum(m))); + return 0; +} + int cmd__read_midx(int argc, const char **argv) { if (!(argc == 2 || argc == 3)) - usage("read-midx [--show-objects] <object-dir>"); + usage("read-midx [--show-objects|--checksum] <object-dir>"); if (!strcmp(argv[1], "--show-objects")) return read_midx_file(argv[2], 1); + else if (!strcmp(argv[1], "--checksum")) + return read_midx_checksum(argv[2]); return read_midx_file(argv[1], 0); } diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index ecb5d0e05d44ae..09cd036f4d50de 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -260,3 +260,7 @@ have_delta () { echo $1 | git cat-file --batch-check="%(deltabase)" >actual && test_cmp expect actual } + +midx_checksum () { + test-tool read-midx --checksum "${1:-.git/objects}" +} From 884c7c77949fbbaba338cb7cce12032543ed6dd5 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:10 -0400 Subject: [PATCH 091/198] t5326: test multi-pack bitmap behavior This patch introduces a new test, t5326, which tests the basic functionality of multi-pack bitmaps. Some trivial behavior is tested, such as: - Whether bitmaps can be generated with more than one pack. - Whether clones can be served with all objects in the bitmap. - Whether follow-up fetches can be served with some objects outside of the server's bitmap These use lib-bitmap's tests (which in turn were pulled from t5310), and we cover cases where the MIDX represents both a single pack and multiple packs. In addition, some non-trivial and MIDX-specific behavior is tested, too, including: - Whether multi-pack bitmaps behave correctly with respect to the pack-reuse machinery when the base for some object is selected from a different pack than the delta. - Whether multi-pack bitmaps correctly respect the pack.preferBitmapTips configuration. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5326-multi-pack-bitmaps.sh | 277 ++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100755 t/t5326-multi-pack-bitmaps.sh diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh new file mode 100755 index 00000000000000..c1b7d633e2338c --- /dev/null +++ b/t/t5326-multi-pack-bitmaps.sh @@ -0,0 +1,277 @@ +#!/bin/sh + +test_description='exercise basic multi-pack bitmap functionality' +. ./test-lib.sh +. "${TEST_DIRECTORY}/lib-bitmap.sh" + +# We'll be writing our own midx and bitmaps, so avoid getting confused by the +# automatic ones. +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + +objdir=.git/objects +midx=$objdir/pack/multi-pack-index + +# midx_pack_source <obj> +midx_pack_source () { + test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2 +} + +setup_bitmap_history + +test_expect_success 'enable core.multiPackIndex' ' + git config core.multiPackIndex true +' + +test_expect_success 'create single-pack midx with bitmaps' ' + git repack -ad && + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap +' + +basic_bitmap_tests + +test_expect_success 'create new additional packs' ' + for i in $(test_seq 1 16) + do + test_commit "$i" && + git repack -d + done && + + git checkout -b other2 HEAD~8 && + for i in $(test_seq 1 8) + do + test_commit "side-$i" && + git repack -d + done && + git checkout second +' + +test_expect_success 'create multi-pack midx with bitmaps' ' + git multi-pack-index write --bitmap && + + ls $objdir/pack/pack-*.pack >packs && + test_line_count = 25 packs && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap +' + +basic_bitmap_tests + +test_expect_success '--no-bitmap is respected when bitmaps exist' ' + git multi-pack-index write --bitmap && + + test_commit respect--no-bitmap && + GIT_TEST_MULTI_PACK_INDEX=0 git repack -d && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + + git multi-pack-index write --no-bitmap && + + test_path_is_file $midx && + test_path_is_missing $midx-$(midx_checksum $objdir).bitmap +' + +test_expect_success 'setup midx with base from later pack' ' + # Write a and b so that "a" is a delta on top of base "b", since Git + # prefers to delete contents out of a base rather than add to a shorter + # object. + test_seq 1 128 >a && + test_seq 1 130 >b && + + git add a b && + git commit -m "initial commit" && + + a=$(git rev-parse HEAD:a) && + b=$(git rev-parse HEAD:b) && + + # In the first pack, "a" is stored as a delta to "b". + p1=$(git pack-objects .git/objects/pack/pack <<-EOF + $a + $b + EOF + ) && + + # In the second pack, "a" is missing, and "b" is not a delta nor base to + # any other object. + p2=$(git pack-objects .git/objects/pack/pack <<-EOF + $b + $(git rev-parse HEAD) + $(git rev-parse HEAD^{tree}) + EOF + ) && + + git prune-packed && + # Use the second pack as the preferred source, so that "b" occurs + # earlier in the MIDX object order, rendering "a" unusable for pack + # reuse. + git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx && + + have_delta $a $b && + test $(midx_pack_source $a) != $(midx_pack_source $b) +' + +rev_list_tests 'full bitmap with backwards delta' + +test_expect_success 'clone with bitmaps enabled' ' + git clone --no-local --bare . clone-reverse-delta.git && + test_when_finished "rm -fr clone-reverse-delta.git" && + + git rev-parse HEAD >expect && + git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual && + test_cmp expect actual +' + +bitmap_reuse_tests() { + from=$1 + to=$2 + + test_expect_success "setup pack reuse tests ($from -> $to)" ' + rm -fr repo && + git init repo && + ( + cd repo && + test_commit_bulk 16 && + git tag old-tip && + + git config core.multiPackIndex true && + if test "MIDX" = "$from" + then + GIT_TEST_MULTI_PACK_INDEX=0 git repack -Ad && + git multi-pack-index write --bitmap + else + GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb + fi + ) + ' + + test_expect_success "build bitmap from existing ($from -> $to)" ' + ( + cd repo && + test_commit_bulk --id=further 16 && + git tag new-tip && + + if test "MIDX" = "$to" + then + GIT_TEST_MULTI_PACK_INDEX=0 git repack -d && + git multi-pack-index write --bitmap + else + GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb + fi + ) + ' + + test_expect_success "verify resulting bitmaps ($from -> $to)" ' + ( + cd repo && + git for-each-ref && + git rev-list --test-bitmap refs/tags/old-tip && + git rev-list --test-bitmap refs/tags/new-tip + ) + ' +} + +bitmap_reuse_tests 'pack' 'MIDX' +bitmap_reuse_tests 'MIDX' 'pack' +bitmap_reuse_tests 'MIDX' 'MIDX' + +test_expect_success 'missing object closure fails gracefully' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit loose && + test_commit packed && + + # Do not pass "--revs"; we want a pack without the "loose" + # commit. + git pack-objects $objdir/pack/pack <<-EOF && + $(git rev-parse packed) + EOF + + test_must_fail git multi-pack-index write --bitmap 2>err && + grep "doesn.t have full closure" err && + test_path_is_missing $midx + ) +' + +test_expect_success 'setup partial bitmaps' ' + test_commit packed && + git repack && + test_commit loose && + git multi-pack-index write --bitmap 2>err && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap +' + +basic_bitmap_tests HEAD~ + +test_expect_success 'removing a MIDX clears stale bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit base && + git repack && + git multi-pack-index write --bitmap && + + # Write a MIDX and bitmap; remove the MIDX but leave the bitmap. + stale_bitmap=$midx-$(midx_checksum $objdir).bitmap && + rm $midx && + + # Then write a new MIDX. + test_commit new && + git repack && + git multi-pack-index write --bitmap && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_missing $stale_bitmap + ) +' + +test_expect_success 'pack.preferBitmapTips' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit_bulk --message="%s" 103 && + + git log --format="%H" >commits.raw && + sort <commits.raw >commits && + + git log --format="create refs/tags/%s %H" HEAD >refs && + git update-ref --stdin <refs && + + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + + test-tool bitmap list-commits | sort >bitmaps && + comm -13 bitmaps commits >before && + test_line_count = 1 before && + + perl -ne "printf(\"create refs/tags/include/%d \", $.); print" \ + <before | git update-ref --stdin && + + rm -fr $midx-$(midx_checksum $objdir).bitmap && + rm -fr $midx-$(midx_checksum $objdir).rev && + rm -fr $midx && + + git -c pack.preferBitmapTips=refs/tags/include \ + multi-pack-index write --bitmap && + test-tool bitmap list-commits | sort >bitmaps && + comm -13 bitmaps commits >after && + + ! test_cmp before after + ) +' + +test_done From 96b5fb98267775c7d0fa209e8a61dddfb2c857ba Mon Sep 17 00:00:00 2001 From: Jeff King <peff@peff.net> Date: Tue, 27 Jul 2021 17:20:12 -0400 Subject: [PATCH 092/198] t0410: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP Generating a MIDX bitmap causes tests which repack in a partial clone to fail because they are missing objects. Missing objects is an expected component of tests in t0410, so disable this knob altogether. Graceful degradation when writing a bitmap with missing objects is tested in t5326. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0410-partial-clone.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 1667450917f3c9..4fd8e83da1f5d2 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -4,6 +4,9 @@ test_description='partial clone' . ./test-lib.sh +# missing promisor objects cause repacks which write bitmaps to fail +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + delete_object () { rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|') } From 491850d49ff9f2fbe0eb6b2b439cda5bc4c7960e Mon Sep 17 00:00:00 2001 From: Jeff King <peff@peff.net> Date: Tue, 27 Jul 2021 17:20:15 -0400 Subject: [PATCH 093/198] t5310: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP Generating a MIDX bitmap confuses many of the tests in t5310, which expect to control whether and how bitmaps are written. Since the relevant MIDX-bitmap tests here are covered already in t5326, let's just disable the flag for the whole t5310 script. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5310-pack-bitmaps.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 4318f84d534516..673baa5c3ccc8a 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -8,6 +8,10 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . "$TEST_DIRECTORY"/lib-bundle.sh . "$TEST_DIRECTORY"/lib-bitmap.sh +# t5310 deals only with single-pack bitmaps, so don't write MIDX bitmaps in +# their place. +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + objpath () { echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" } From b301a8737e1179e46f0cf8e1a73c528a2f91f549 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:18 -0400 Subject: [PATCH 094/198] t5319: don't write MIDX bitmaps in t5319 This test is specifically about generating a midx still respecting a pack-based bitmap file. Generating a MIDX bitmap would confuse the test. Let's override the 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' variable to make sure we don't do so. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5319-multi-pack-index.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 909f9bf7b8ee79..a30095c9d761e2 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -491,7 +491,8 @@ test_expect_success 'repack preserves multi-pack-index when creating packs' ' compare_results_with_midx "after repack" test_expect_success 'multi-pack-index and pack-bitmap' ' - git -c repack.writeBitmaps=true repack -ad && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writeBitmaps=true repack -ad && git multi-pack-index write && git rev-list --test-bitmap HEAD ' From 2b872a8859f1f3988cf84ef730bc98e364d9f180 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:20 -0400 Subject: [PATCH 095/198] t7700: update to work with MIDX bitmap test knob A number of these tests are focused only on pack-based bitmaps and need to be updated to disable 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' where necessary. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7700-repack.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 25b235c0630385..98eda3bfeb56c4 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -63,13 +63,14 @@ test_expect_success 'objects in packs marked .keep are not repacked' ' test_expect_success 'writing bitmaps via command-line can duplicate .keep objects' ' # build on $oid, $packid, and .keep state from previous - git repack -Adbl && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 git repack -Adbl && test_has_duplicate_object true ' test_expect_success 'writing bitmaps via config can duplicate .keep objects' ' # build on $oid, $packid, and .keep state from previous - git -c repack.writebitmaps=true repack -Adl && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writebitmaps=true repack -Adl && test_has_duplicate_object true ' @@ -189,7 +190,9 @@ test_expect_success 'repack --keep-pack' ' test_expect_success 'bitmaps are created by default in bare repos' ' git clone --bare .git bare.git && - git -C bare.git repack -ad && + rm -f bare.git/objects/pack/*.bitmap && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad && bitmap=$(ls bare.git/objects/pack/*.bitmap) && test_path_is_file "$bitmap" ' @@ -200,7 +203,8 @@ test_expect_success 'incremental repack does not complain' ' ' test_expect_success 'bitmaps can be disabled on bare repos' ' - git -c repack.writeBitmaps=false -C bare.git repack -ad && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writeBitmaps=false -C bare.git repack -ad && bitmap=$(ls bare.git/objects/pack/*.bitmap || :) && test -z "$bitmap" ' @@ -211,7 +215,8 @@ test_expect_success 'no bitmaps created if .keep files present' ' keep=${pack%.pack}.keep && test_when_finished "rm -f \"\$keep\"" && >"$keep" && - git -C bare.git repack -ad 2>stderr && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad 2>stderr && test_must_be_empty stderr && find bare.git/objects/pack/ -type f -name "*.bitmap" >actual && test_must_be_empty actual @@ -222,7 +227,8 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' ' blob=$(test-tool genrandom big $((1024*1024)) | git -C bare.git hash-object -w --stdin) && git -C bare.git update-ref refs/tags/big $blob && - git -C bare.git repack -ad 2>stderr && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad 2>stderr && test_must_be_empty stderr && find bare.git/objects/pack -type f -name "*.bitmap" >actual && test_must_be_empty actual From 1786e92e75fe2f161cd48fa11616961d519cb17c Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:23 -0400 Subject: [PATCH 096/198] midx: respect 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' Introduce a new 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' environment variable to also write a multi-pack bitmap when 'GIT_TEST_MULTI_PACK_INDEX' is set. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/repack.c | 12 ++++++++++-- ci/run-build-and-tests.sh | 1 + midx.h | 2 ++ t/README | 4 ++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5f9bc74adc05af..82ab6682726f7a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -515,6 +515,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()) write_bitmaps = 0; + } else if (write_bitmaps && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0) && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) { + write_bitmaps = 0; } if (pack_kept_objects < 0) pack_kept_objects = write_bitmaps > 0; @@ -725,8 +729,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) update_server_info(0); remove_temporary_files(); - if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) - write_midx_file(get_object_directory(), NULL, 0); + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { + unsigned flags = 0; + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) + flags |= MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX; + write_midx_file(get_object_directory(), NULL, flags); + } string_list_clear(&names, 0); string_list_clear(&rollback, 0); diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 3ce81ffee941b2..7ee9ba9325fb3a 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -23,6 +23,7 @@ linux-gcc) export GIT_TEST_COMMIT_GRAPH=1 export GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=1 export GIT_TEST_MULTI_PACK_INDEX=1 + export GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=1 export GIT_TEST_ADD_I_USE_BUILTIN=1 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_WRITE_REV_INDEX=1 diff --git a/midx.h b/midx.h index 350f4d0a7b4c20..aa3da557bb0ae9 100644 --- a/midx.h +++ b/midx.h @@ -8,6 +8,8 @@ struct pack_entry; struct repository; #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" +#define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \ + "GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP" struct multi_pack_index { struct multi_pack_index *next; diff --git a/t/README b/t/README index 1a2072b2c8a282..1311b8e17ad65a 100644 --- a/t/README +++ b/t/README @@ -425,6 +425,10 @@ GIT_TEST_MULTI_PACK_INDEX=<boolean>, when true, forces the multi-pack- index to be written after every 'git repack' command, and overrides the 'core.multiPackIndex' setting to true. +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=<boolean>, when true, sets the +'--bitmap' option on all invocations of 'git multi-pack-index write', +and ignores pack-objects' '--write-bitmap-index'. + GIT_TEST_SIDEBAND_ALL=<boolean>, when true, overrides the 'uploadpack.allowSidebandAll' setting to true, and when false, forces fetch-pack to not request sideband-all (even if the server advertises From 2195bea25eb6e6ef181a1274209ddf1ffe92fca0 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:26 -0400 Subject: [PATCH 097/198] p5310: extract full and partial bitmap tests A new p5326 introduced by the next patch will want these same tests, interjecting its own setup in between. Move them out so that both perf tests can reuse them. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/lib-bitmap.sh | 69 ++++++++++++++++++++++++++++++++++++ t/perf/p5310-pack-bitmaps.sh | 65 ++------------------------------- 2 files changed, 72 insertions(+), 62 deletions(-) create mode 100644 t/perf/lib-bitmap.sh diff --git a/t/perf/lib-bitmap.sh b/t/perf/lib-bitmap.sh new file mode 100644 index 00000000000000..63d3bc7cece599 --- /dev/null +++ b/t/perf/lib-bitmap.sh @@ -0,0 +1,69 @@ +# Helper functions for testing bitmap performance; see p5310. + +test_full_bitmap () { + test_perf 'simulated clone' ' + git pack-objects --stdout --all </dev/null >/dev/null + ' + + test_perf 'simulated fetch' ' + have=$(git rev-list HEAD~100 -1) && + { + echo HEAD && + echo ^$have + } | git pack-objects --revs --stdout >/dev/null + ' + + test_perf 'pack to file (bitmap)' ' + git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null + ' + + test_perf 'rev-list (commits)' ' + git rev-list --all --use-bitmap-index >/dev/null + ' + + test_perf 'rev-list (objects)' ' + git rev-list --all --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list with tag negated via --not --all (objects)' ' + git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list with negative tag (objects)' ' + git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list count with blob:none' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:none >/dev/null + ' + + test_perf 'rev-list count with blob:limit=1k' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:limit=1k >/dev/null + ' + + test_perf 'rev-list count with tree:0' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null + ' + + test_perf 'simulated partial clone' ' + git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null + ' +} + +test_partial_bitmap () { + test_perf 'clone (partial bitmap)' ' + git pack-objects --stdout --all </dev/null >/dev/null + ' + + test_perf 'pack to file (partial bitmap)' ' + git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null + ' + + test_perf 'rev-list with tree filter (partial bitmap)' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null + ' +} diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 452be01056c6b4..7ad4f237bc37ff 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -2,6 +2,7 @@ test_description='Tests pack performance using bitmaps' . ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-bitmap.sh" test_perf_large_repo @@ -25,56 +26,7 @@ test_perf 'repack to disk' ' git repack -ad ' -test_perf 'simulated clone' ' - git pack-objects --stdout --all </dev/null >/dev/null -' - -test_perf 'simulated fetch' ' - have=$(git rev-list HEAD~100 -1) && - { - echo HEAD && - echo ^$have - } | git pack-objects --revs --stdout >/dev/null -' - -test_perf 'pack to file (bitmap)' ' - git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null -' - -test_perf 'rev-list (commits)' ' - git rev-list --all --use-bitmap-index >/dev/null -' - -test_perf 'rev-list (objects)' ' - git rev-list --all --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list with tag negated via --not --all (objects)' ' - git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list with negative tag (objects)' ' - git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list count with blob:none' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=blob:none >/dev/null -' - -test_perf 'rev-list count with blob:limit=1k' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=blob:limit=1k >/dev/null -' - -test_perf 'rev-list count with tree:0' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=tree:0 >/dev/null -' - -test_perf 'simulated partial clone' ' - git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null -' +test_full_bitmap test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past @@ -97,17 +49,6 @@ test_expect_success 'create partial bitmap state' ' git update-ref HEAD $orig_tip ' -test_perf 'clone (partial bitmap)' ' - git pack-objects --stdout --all </dev/null >/dev/null -' - -test_perf 'pack to file (partial bitmap)' ' - git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null -' - -test_perf 'rev-list with tree filter (partial bitmap)' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=tree:0 >/dev/null -' +test_partial_bitmap test_done From 2f168e6f496cfeb40dbffbce1c62af1bcd2bfdbd Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Tue, 27 Jul 2021 17:20:28 -0400 Subject: [PATCH 098/198] p5326: perf tests for MIDX bitmaps These new performance tests demonstrate effectively the same behavior as p5310, but use a multi-pack bitmap instead of a single-pack one. Notably, p5326 does not create a MIDX bitmap with multiple packs. This is so we can measure a direct comparison between it and p5310. Any difference between the two is measuring just the overhead of using MIDX bitmaps. Here are the results of p5310 and p5326 together, measured at the same time and on the same machine (using a Xenon W-2255 CPU): Test HEAD ------------------------------------------------------------------------ 5310.2: repack to disk 96.78(93.39+11.33) 5310.3: simulated clone 9.98(9.79+0.19) 5310.4: simulated fetch 1.75(4.26+0.19) 5310.5: pack to file (bitmap) 28.20(27.87+8.70) 5310.6: rev-list (commits) 0.41(0.36+0.05) 5310.7: rev-list (objects) 1.61(1.54+0.07) 5310.8: rev-list count with blob:none 0.25(0.21+0.04) 5310.9: rev-list count with blob:limit=1k 2.65(2.54+0.10) 5310.10: rev-list count with tree:0 0.23(0.19+0.04) 5310.11: simulated partial clone 4.34(4.21+0.12) 5310.13: clone (partial bitmap) 11.05(12.21+0.48) 5310.14: pack to file (partial bitmap) 31.25(34.22+3.70) 5310.15: rev-list with tree filter (partial bitmap) 0.26(0.22+0.04) versus the same tests (this time using a multi-pack index): Test HEAD ------------------------------------------------------------------------ 5326.2: setup multi-pack index 78.99(75.29+11.58) 5326.3: simulated clone 11.78(11.56+0.22) 5326.4: simulated fetch 1.70(4.49+0.13) 5326.5: pack to file (bitmap) 28.02(27.72+8.76) 5326.6: rev-list (commits) 0.42(0.36+0.06) 5326.7: rev-list (objects) 1.65(1.58+0.06) 5326.8: rev-list count with blob:none 0.26(0.21+0.05) 5326.9: rev-list count with blob:limit=1k 2.97(2.86+0.10) 5326.10: rev-list count with tree:0 0.25(0.20+0.04) 5326.11: simulated partial clone 5.65(5.49+0.16) 5326.13: clone (partial bitmap) 12.22(13.43+0.38) 5326.14: pack to file (partial bitmap) 30.05(31.57+7.25) 5326.15: rev-list with tree filter (partial bitmap) 0.24(0.20+0.04) There is slight overhead in "simulated clone", "simulated partial clone", and "clone (partial bitmap)". Unsurprisingly, that overhead is due to using the MIDX's reverse index to map between bit positions and MIDX positions. This can be reproduced by running "git repack -adb" along with "git multi-pack-index write --bitmap" in a large-ish repository. Then run: $ perf record -o pack.perf git -c core.multiPackIndex=false \ pack-objects --all --stdout >/dev/null </dev/null $ perf record -o midx.perf git -c core.multiPackIndex=true \ pack-objects --all --stdout >/dev/null </dev/null and compare the two with "perf diff -c delta -o 1 pack.perf midx.perf". The most notable results are below (the next largest positive delta is +0.14%): # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... .................. .......................... # +5.86% git [.] nth_midxed_offset +5.24% git [.] nth_midxed_pack_int_id 3.45% +0.97% git [.] offset_to_pack_pos 3.30% +0.57% git [.] pack_pos_to_offset +0.30% git [.] pack_pos_to_midx Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/p5326-multi-pack-bitmaps.sh | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 t/perf/p5326-multi-pack-bitmaps.sh diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh new file mode 100755 index 00000000000000..5845109ac787ac --- /dev/null +++ b/t/perf/p5326-multi-pack-bitmaps.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +test_description='Tests performance using midx bitmaps' +. ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-bitmap.sh" + +test_perf_large_repo + +test_expect_success 'enable multi-pack index' ' + git config core.multiPackIndex true +' + +test_perf 'setup multi-pack index' ' + git repack -ad && + git multi-pack-index write --bitmap +' + +test_full_bitmap + +test_expect_success 'create partial bitmap state' ' + # pick a commit to represent the repo tip in the past + cutoff=$(git rev-list HEAD~100 -1) && + orig_tip=$(git rev-parse HEAD) && + + # now pretend we have just one tip + rm -rf .git/logs .git/refs/* .git/packed-refs && + git update-ref HEAD $cutoff && + + # and then repack, which will leave us with a nice + # big bitmap pack of the "old" history, and all of + # the new history will be loose, as if it had been pushed + # up incrementally and exploded via unpack-objects + git repack -Ad && + git multi-pack-index write --bitmap && + + # and now restore our original tip, as if the pushes + # had happened + git update-ref HEAD $orig_tip +' + +test_partial_bitmap + +test_done From 8dc56c3c62fc8537764a1284baae485d288995c7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Wed, 28 Jul 2021 10:55:07 -0700 Subject: [PATCH 099/198] ll-merge: teach ll_binary_merge() a trivial three-way merge The low-level binary merge code assumed that the caller will not feed trivial merges that would have been resolved at the tree level; because of this, ll_binary_merge() assumes the ancestor is different from either side, always failing the merge in conflict unless -Xours or -Xtheirs is in effect. But "git apply --3way" codepath could ask us to perform three-way merge between two binaries A and B using A as the ancestor version. The current code always fails such an application, but when given a binary patch that turns A into B and asked to apply it to A, there is no reason to fail such a request---we can trivially tell that the result must be B. Arguably, this fix may belong to one level higher at ll_merge() function, which dispatches to lower-level merge drivers, possibly even before it renormalizes the three input buffers. But let's first see how this goes. Signed-off-by: Jerry Zhang <jerry@skydio.com> [jc: stolen new tests from Jerry's patch] Signed-off-by: Junio C Hamano <gitster@pobox.com> --- ll-merge.c | 56 +++++++++++++++++++++++++++------------ t/t4108-apply-threeway.sh | 45 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 17 deletions(-) diff --git a/ll-merge.c b/ll-merge.c index 261657578c756c..301e244971697e 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -46,6 +46,13 @@ void reset_merge_attributes(void) merge_attributes = NULL; } +static int same_mmfile(mmfile_t *a, mmfile_t *b) +{ + if (a->size != b->size) + return 0; + return !memcmp(a->ptr, b->ptr, a->size); +} + /* * Built-in low-levels */ @@ -58,9 +65,18 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused, const struct ll_merge_options *opts, int marker_size) { + int status; mmfile_t *stolen; assert(opts); + /* + * With -Xtheirs or -Xours, we have cleanly merged; + * otherwise we got a conflict, unless 3way trivially + * resolves. + */ + status = (opts->variant == XDL_MERGE_FAVOR_OURS || + opts->variant == XDL_MERGE_FAVOR_THEIRS) ? 0 : 1; + /* * The tentative merge result is the common ancestor for an * internal merge. For the final merge, it is "ours" by @@ -68,18 +84,30 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused, */ if (opts->virtual_ancestor) { stolen = orig; + status = 0; } else { - switch (opts->variant) { - default: - warning("Cannot merge binary files: %s (%s vs. %s)", - path, name1, name2); - /* fallthru */ - case XDL_MERGE_FAVOR_OURS: - stolen = src1; - break; - case XDL_MERGE_FAVOR_THEIRS: + if (same_mmfile(orig, src1)) { stolen = src2; - break; + status = 0; + } else if (same_mmfile(orig, src2)) { + stolen = src1; + status = 0; + } else if (same_mmfile(src1, src2)) { + stolen = src1; + status = 0; + } else { + switch (opts->variant) { + default: + warning("Cannot merge binary files: %s (%s vs. %s)", + path, name1, name2); + /* fallthru */ + case XDL_MERGE_FAVOR_OURS: + stolen = src1; + break; + case XDL_MERGE_FAVOR_THEIRS: + stolen = src2; + break; + } } } @@ -87,13 +115,7 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused, result->size = stolen->size; stolen->ptr = NULL; - /* - * With -Xtheirs or -Xours, we have cleanly merged; - * otherwise we got a conflict. - */ - return opts->variant == XDL_MERGE_FAVOR_OURS || - opts->variant == XDL_MERGE_FAVOR_THEIRS ? - 0 : 1; + return status; } static int ll_xdl_merge(const struct ll_merge_driver *drv_unused, diff --git a/t/t4108-apply-threeway.sh b/t/t4108-apply-threeway.sh index 65147efdea9a00..cc3aa3314a3448 100755 --- a/t/t4108-apply-threeway.sh +++ b/t/t4108-apply-threeway.sh @@ -230,4 +230,49 @@ test_expect_success 'apply with --3way --cached and conflicts' ' test_cmp expect.diff actual.diff ' +test_expect_success 'apply binary file patch' ' + git reset --hard main && + cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + git add bin.png && + git commit -m "add binary file" && + + cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + + git diff --binary >bin.diff && + git reset --hard && + + # Apply must succeed. + git apply bin.diff +' + +test_expect_success 'apply binary file patch with 3way' ' + git reset --hard main && + cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + git add bin.png && + git commit -m "add binary file" && + + cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + + git diff --binary >bin.diff && + git reset --hard && + + # Apply must succeed. + git apply --3way --index bin.diff +' + +test_expect_success 'apply full-index patch with 3way' ' + git reset --hard main && + cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + git add bin.png && + git commit -m "add binary file" && + + cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + + git diff --full-index >bin.diff && + git reset --hard && + + # Apply must succeed. + git apply --3way --index bin.diff +' + test_done From 1578215dab1650b1793b154935d1e8a9bdcec24e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 20 Jul 2021 12:24:07 +0200 Subject: [PATCH 100/198] refs/files: remove unused REF_DELETING in lock_ref_oid_basic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock_ref_oid_basic() function has gradually been replaced by most callers no longer performing a low-level "acquire lock, update and release", and instead using the ref transaction API. So there are only 4 remaining callers of lock_ref_oid_basic(). None of those callers pass REF_DELETING anymore, the last caller went away in 92b1551b1d (refs: resolve symbolic refs first, 2016-04-25). Before that we'd refactored and moved this code in: - 8df4e511387 (struct ref_update: move "have_old" into "flags", 2015-02-17) - 7bd9bcf372d (refs: split filesystem-based refs code into a new file, 2015-11-09) - 165056b2fc (lock_ref_for_update(): new function, 2016-04-24) We then finally stopped using it in 92b1551b1d (noted above). So let's remove the handling of this parameter. By itself this change doesn't benefit us much, but it's the start of even more removal of unused code in and around this function in subsequent commits. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 677b7e4cdd2d05..326f0224218beb 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -934,8 +934,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, if (mustexist) resolve_flags |= RESOLVE_REF_READING; - if (flags & REF_DELETING) - resolve_flags |= RESOLVE_REF_ALLOW_BAD_NAME; files_ref_path(refs, &ref_file, refname); resolved = !!refs_resolve_ref_unsafe(&refs->base, From 9e31fdd015bea033a7210f77b3e892e997588484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 20 Jul 2021 12:24:08 +0200 Subject: [PATCH 101/198] refs/files: remove unused "extras/skip" in lock_ref_oid_basic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock_ref_oid_basic() function has gradually been replaced by use of the file transaction API, there are only 4 remaining callers of it. None of those callers pass non-NULL "extras" and "skip" parameters, the last such caller went away in 92b1551b1d4 (refs: resolve symbolic refs first, 2016-04-25), so let's remove the parameters. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 326f0224218beb..a59823d667ecff 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -915,8 +915,6 @@ static int create_reflock(const char *path, void *cb) static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, const char *refname, const struct object_id *old_oid, - const struct string_list *extras, - const struct string_list *skip, unsigned int flags, int *type, struct strbuf *err) { @@ -950,7 +948,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, last_errno = errno; if (!refs_verify_refname_available( &refs->base, - refname, extras, skip, err)) + refname, NULL, NULL, err)) strbuf_addf(err, "there are still refs under '%s'", refname); goto error_return; @@ -963,7 +961,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, last_errno = errno; if (last_errno != ENOTDIR || !refs_verify_refname_available(&refs->base, refname, - extras, skip, err)) + NULL, NULL, err)) strbuf_addf(err, "unable to resolve reference '%s': %s", refname, strerror(last_errno)); @@ -978,7 +976,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, */ if (is_null_oid(&lock->old_oid) && refs_verify_refname_available(refs->packed_ref_store, refname, - extras, skip, err)) { + NULL, NULL, err)) { last_errno = ENOTDIR; goto error_return; } @@ -1413,8 +1411,8 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, logmoved = log; - lock = lock_ref_oid_basic(refs, newrefname, NULL, NULL, NULL, - REF_NO_DEREF, NULL, &err); + lock = lock_ref_oid_basic(refs, newrefname, NULL, REF_NO_DEREF, NULL, + &err); if (!lock) { if (copy) error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf); @@ -1436,7 +1434,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, goto out; rollback: - lock = lock_ref_oid_basic(refs, oldrefname, NULL, NULL, NULL, + lock = lock_ref_oid_basic(refs, oldrefname, NULL, REF_NO_DEREF, NULL, &err); if (!lock) { error("unable to lock %s for rollback: %s", oldrefname, err.buf); @@ -1845,7 +1843,7 @@ static int files_create_symref(struct ref_store *ref_store, int ret; lock = lock_ref_oid_basic(refs, refname, NULL, - NULL, NULL, REF_NO_DEREF, NULL, + REF_NO_DEREF, NULL, &err); if (!lock) { error("%s", err.buf); @@ -3064,7 +3062,7 @@ static int files_reflog_expire(struct ref_store *ref_store, * reference if --updateref was specified: */ lock = lock_ref_oid_basic(refs, refname, oid, - NULL, NULL, REF_NO_DEREF, + REF_NO_DEREF, &type, &err); if (!lock) { error("cannot lock ref '%s': %s", refname, err.buf); From d7373ed033d019db864d58ed675e4eb2fd649ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 20 Jul 2021 12:24:09 +0200 Subject: [PATCH 102/198] refs/files: remove unused "skip" in lock_raw_ref() too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused "skip" parameter to lock_raw_ref(), it was never used. We do use it when passing "skip" to the refs_rename_ref_available() function in files_copy_or_rename_ref(), but not here. This is part of a larger series that modifies lock_ref_oid_basic() extensively, there will be no more modifications of this function in this series, but since the preceding commit removed this unused parameter from lock_ref_oid_basic(), let's do it here too for consistency. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index a59823d667ecff..af332fa8fe4fe3 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -531,7 +531,6 @@ static void unlock_ref(struct ref_lock *lock) static int lock_raw_ref(struct files_ref_store *refs, const char *refname, int mustexist, const struct string_list *extras, - const struct string_list *skip, struct ref_lock **lock_p, struct strbuf *referent, unsigned int *type, @@ -568,7 +567,7 @@ static int lock_raw_ref(struct files_ref_store *refs, * reason to expect this error to be transitory. */ if (refs_verify_refname_available(&refs->base, refname, - extras, skip, err)) { + extras, NULL, err)) { if (mustexist) { /* * To the user the relevant error is @@ -673,7 +672,7 @@ static int lock_raw_ref(struct files_ref_store *refs, REMOVE_DIR_EMPTY_ONLY)) { if (refs_verify_refname_available( &refs->base, refname, - extras, skip, err)) { + extras, NULL, err)) { /* * The error message set by * verify_refname_available() is OK. @@ -710,7 +709,7 @@ static int lock_raw_ref(struct files_ref_store *refs, */ if (refs_verify_refname_available( refs->packed_ref_store, refname, - extras, skip, err)) + extras, NULL, err)) goto error_return; } @@ -2412,7 +2411,7 @@ static int lock_ref_for_update(struct files_ref_store *refs, } ret = lock_raw_ref(refs, update->refname, mustexist, - affected_refnames, NULL, + affected_refnames, &lock, &referent, &update->type, err); if (ret) { From ada77feb5fdaf2c96e939ebcb88b6a0ed9b1cc6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 20 Jul 2021 12:24:10 +0200 Subject: [PATCH 103/198] refs/debug: re-indent argument list for "prepare" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-indent this argument list that's been mis-indented since it was added in 34c319970d1 (refs/debug: trace into reflog expiry too, 2021-04-23). This makes a subsequent change smaller. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refs/debug.c b/refs/debug.c index 7db4abccc341b0..449ac3e6cc8e68 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -364,8 +364,8 @@ struct debug_reflog_expiry_should_prune { }; static void debug_reflog_expiry_prepare(const char *refname, - const struct object_id *oid, - void *cb_data) + const struct object_id *oid, + void *cb_data) { struct debug_reflog_expiry_should_prune *prune = cb_data; trace_printf_key(&trace_refs, "reflog_expire_prepare: %s\n", refname); From ab3bfeb93eb099a768224a2dc28be3ef254a7f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:22 +0200 Subject: [PATCH 104/198] refs: make repo_dwim_log() accept a NULL oid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the repo_dwim_log() function initially added as dwim_log() in eb3a48221fd (log --reflog: use dwim_log, 2007-02-09) to accept a NULL oid parameter. The refs_resolve_ref_unsafe() function it invokes already deals with it, but it didn't. This allows for a bit more clarity in a reflog-walk.c codepath added in f2eba66d4d1 (Enable HEAD@{...} and make it independent from the current branch, 2007-02-03). We'll shortly use this in builtin/reflog.c as well. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- reflog-walk.c | 3 +-- refs.c | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/reflog-walk.c b/reflog-walk.c index e9cd3283694dec..8ac4b284b6b685 100644 --- a/reflog-walk.c +++ b/reflog-walk.c @@ -158,10 +158,9 @@ int add_reflog_for_walk(struct reflog_walk_info *info, } reflogs = read_complete_reflog(branch); if (!reflogs || reflogs->nr == 0) { - struct object_id oid; char *b; int ret = dwim_log(branch, strlen(branch), - &oid, &b); + NULL, &b); if (ret > 1) free(b); else if (ret == 1) { diff --git a/refs.c b/refs.c index 8b9f7c3a80a0f6..d963543675992a 100644 --- a/refs.c +++ b/refs.c @@ -698,7 +698,7 @@ int repo_dwim_log(struct repository *r, const char *str, int len, strbuf_addf(&path, *p, len, str); ref = refs_resolve_ref_unsafe(refs, path.buf, RESOLVE_REF_READING, - &hash, NULL); + oid ? &hash : NULL, NULL); if (!ref) continue; if (refs_reflog_exists(refs, path.buf)) @@ -710,7 +710,8 @@ int repo_dwim_log(struct repository *r, const char *str, int len, continue; if (!logs_found++) { *log = xstrdup(it); - oidcpy(oid, &hash); + if (oid) + oidcpy(oid, &hash); } if (!warn_ambiguous_refs) break; From 74271b968f29ebf01fd595e1e8dae97da1fa02ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:23 +0200 Subject: [PATCH 105/198] refs/files: add a comment about refs_reflog_exists() call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a comment about why it is that we need to check for the the existence of a reflog we're deleting after we've successfully acquired the lock in files_reflog_expire(). As noted in [1] the lock protocol for reflogs is somewhat intuitive. This early exit code the comment applies to dates all the way back to 4264dc15e19 (git reflog expire, 2006-12-19). 1. https://lore.kernel.org/git/54DCDA42.2060800@alum.mit.edu/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/refs/files-backend.c b/refs/files-backend.c index af332fa8fe4fe3..54153064166d9b 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3068,6 +3068,19 @@ static int files_reflog_expire(struct ref_store *ref_store, strbuf_release(&err); return -1; } + + /* + * When refs are deleted, their reflog is deleted before the + * ref itself is deleted. This is because there is no separate + * lock for reflog; instead we take a lock on the ref with + * lock_ref_oid_basic(). + * + * If a race happens and the reflog doesn't exist after we've + * acquired the lock that's OK. We've got nothing more to do; + * We were asked to delete the reflog, but someone else + * deleted it! The caller doesn't care that we deleted it, + * just that it is deleted. So we can return successfully. + */ if (!refs_reflog_exists(ref_store, refname)) { unlock_ref(lock); return 0; From b3736601d0cc489757eac92545e75f9e75341638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 20 Jul 2021 12:33:24 +0200 Subject: [PATCH 106/198] refs file backend: move raceproof_create_file() here MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the raceproof_create_file() API added to cache.h and object-file.c in 177978f56ad (raceproof_create_file(): new function, 2017-01-06) to its only user, refs/files-backend.c. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- cache.h | 43 ----------------- object-file.c | 68 --------------------------- refs/files-backend.c | 109 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 111 deletions(-) diff --git a/cache.h b/cache.h index ba04ff8bd36b36..eb4dfe6381f548 100644 --- a/cache.h +++ b/cache.h @@ -1202,49 +1202,6 @@ enum scld_error safe_create_leading_directories(char *path); enum scld_error safe_create_leading_directories_const(const char *path); enum scld_error safe_create_leading_directories_no_share(char *path); -/* - * Callback function for raceproof_create_file(). This function is - * expected to do something that makes dirname(path) permanent despite - * the fact that other processes might be cleaning up empty - * directories at the same time. Usually it will create a file named - * path, but alternatively it could create another file in that - * directory, or even chdir() into that directory. The function should - * return 0 if the action was completed successfully. On error, it - * should return a nonzero result and set errno. - * raceproof_create_file() treats two errno values specially: - * - * - ENOENT -- dirname(path) does not exist. In this case, - * raceproof_create_file() tries creating dirname(path) - * (and any parent directories, if necessary) and calls - * the function again. - * - * - EISDIR -- the file already exists and is a directory. In this - * case, raceproof_create_file() removes the directory if - * it is empty (and recursively any empty directories that - * it contains) and calls the function again. - * - * Any other errno causes raceproof_create_file() to fail with the - * callback's return value and errno. - * - * Obviously, this function should be OK with being called again if it - * fails with ENOENT or EISDIR. In other scenarios it will not be - * called again. - */ -typedef int create_file_fn(const char *path, void *cb); - -/* - * Create a file in dirname(path) by calling fn, creating leading - * directories if necessary. Retry a few times in case we are racing - * with another process that is trying to clean up the directory that - * contains path. See the documentation for create_file_fn for more - * details. - * - * Return the value and set the errno that resulted from the most - * recent call of fn. fn is always called at least once, and will be - * called more than once if it returns ENOENT or EISDIR. - */ -int raceproof_create_file(const char *path, create_file_fn fn, void *cb); - int mkdir_in_gitdir(const char *path); char *expand_user_path(const char *path, int real_home); const char *enter_repo(const char *path, int strict); diff --git a/object-file.c b/object-file.c index ecca5a8da00f14..231a02997baced 100644 --- a/object-file.c +++ b/object-file.c @@ -414,74 +414,6 @@ enum scld_error safe_create_leading_directories_const(const char *path) return result; } -int raceproof_create_file(const char *path, create_file_fn fn, void *cb) -{ - /* - * The number of times we will try to remove empty directories - * in the way of path. This is only 1 because if another - * process is racily creating directories that conflict with - * us, we don't want to fight against them. - */ - int remove_directories_remaining = 1; - - /* - * The number of times that we will try to create the - * directories containing path. We are willing to attempt this - * more than once, because another process could be trying to - * clean up empty directories at the same time as we are - * trying to create them. - */ - int create_directories_remaining = 3; - - /* A scratch copy of path, filled lazily if we need it: */ - struct strbuf path_copy = STRBUF_INIT; - - int ret, save_errno; - - /* Sanity check: */ - assert(*path); - -retry_fn: - ret = fn(path, cb); - save_errno = errno; - if (!ret) - goto out; - - if (errno == EISDIR && remove_directories_remaining-- > 0) { - /* - * A directory is in the way. Maybe it is empty; try - * to remove it: - */ - if (!path_copy.len) - strbuf_addstr(&path_copy, path); - - if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) - goto retry_fn; - } else if (errno == ENOENT && create_directories_remaining-- > 0) { - /* - * Maybe the containing directory didn't exist, or - * maybe it was just deleted by a process that is - * racing with us to clean up empty directories. Try - * to create it: - */ - enum scld_error scld_result; - - if (!path_copy.len) - strbuf_addstr(&path_copy, path); - - do { - scld_result = safe_create_leading_directories(path_copy.buf); - if (scld_result == SCLD_OK) - goto retry_fn; - } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0); - } - -out: - strbuf_release(&path_copy); - errno = save_errno; - return ret; -} - static void fill_loose_path(struct strbuf *buf, const struct object_id *oid) { int i; diff --git a/refs/files-backend.c b/refs/files-backend.c index 5d12003471e586..913868b85d33fe 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -852,6 +852,115 @@ static struct ref_iterator *files_ref_iterator_begin( return ref_iterator; } +/* + * Callback function for raceproof_create_file(). This function is + * expected to do something that makes dirname(path) permanent despite + * the fact that other processes might be cleaning up empty + * directories at the same time. Usually it will create a file named + * path, but alternatively it could create another file in that + * directory, or even chdir() into that directory. The function should + * return 0 if the action was completed successfully. On error, it + * should return a nonzero result and set errno. + * raceproof_create_file() treats two errno values specially: + * + * - ENOENT -- dirname(path) does not exist. In this case, + * raceproof_create_file() tries creating dirname(path) + * (and any parent directories, if necessary) and calls + * the function again. + * + * - EISDIR -- the file already exists and is a directory. In this + * case, raceproof_create_file() removes the directory if + * it is empty (and recursively any empty directories that + * it contains) and calls the function again. + * + * Any other errno causes raceproof_create_file() to fail with the + * callback's return value and errno. + * + * Obviously, this function should be OK with being called again if it + * fails with ENOENT or EISDIR. In other scenarios it will not be + * called again. + */ +typedef int create_file_fn(const char *path, void *cb); + +/* + * Create a file in dirname(path) by calling fn, creating leading + * directories if necessary. Retry a few times in case we are racing + * with another process that is trying to clean up the directory that + * contains path. See the documentation for create_file_fn for more + * details. + * + * Return the value and set the errno that resulted from the most + * recent call of fn. fn is always called at least once, and will be + * called more than once if it returns ENOENT or EISDIR. + */ +static int raceproof_create_file(const char *path, create_file_fn fn, void *cb) +{ + /* + * The number of times we will try to remove empty directories + * in the way of path. This is only 1 because if another + * process is racily creating directories that conflict with + * us, we don't want to fight against them. + */ + int remove_directories_remaining = 1; + + /* + * The number of times that we will try to create the + * directories containing path. We are willing to attempt this + * more than once, because another process could be trying to + * clean up empty directories at the same time as we are + * trying to create them. + */ + int create_directories_remaining = 3; + + /* A scratch copy of path, filled lazily if we need it: */ + struct strbuf path_copy = STRBUF_INIT; + + int ret, save_errno; + + /* Sanity check: */ + assert(*path); + +retry_fn: + ret = fn(path, cb); + save_errno = errno; + if (!ret) + goto out; + + if (errno == EISDIR && remove_directories_remaining-- > 0) { + /* + * A directory is in the way. Maybe it is empty; try + * to remove it: + */ + if (!path_copy.len) + strbuf_addstr(&path_copy, path); + + if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) + goto retry_fn; + } else if (errno == ENOENT && create_directories_remaining-- > 0) { + /* + * Maybe the containing directory didn't exist, or + * maybe it was just deleted by a process that is + * racing with us to clean up empty directories. Try + * to create it: + */ + enum scld_error scld_result; + + if (!path_copy.len) + strbuf_addstr(&path_copy, path); + + do { + scld_result = safe_create_leading_directories(path_copy.buf); + if (scld_result == SCLD_OK) + goto retry_fn; + } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0); + } + +out: + strbuf_release(&path_copy); + errno = save_errno; + return ret; +} + static int remove_empty_directories(struct strbuf *path) { /* From 466d936b9db984588689cf881ec49354928b1ceb Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:25 +0200 Subject: [PATCH 107/198] refs: remove EINVAL errno output from specification of read_raw_ref_fn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit does not change code; it documents the fact that an alternate ref backend does not need to return EINVAL from read_raw_ref_fn to function properly. This is correct, because refs_read_raw_ref is only called from; * resolve_ref_unsafe(), which does not care for the EINVAL errno result. * refs_verify_refname_available(), which does not inspect errno. * files-backend.c, where errno is overwritten on failure. * packed-backend.c (is_packed_transaction_needed), which calls it for the packed ref backend, which never emits EINVAL. A grep for EINVAL */*c reveals that no code checks errno against EINVAL after reading references. In addition, the refs.h file does not mention errno at all. A grep over resolve_ref_unsafe() turned up the following callers that inspect errno: * sequencer.c::print_commit_summary, which uses it for die_errno * lock_ref_oid_basic(), which only treats EISDIR and ENOTDIR specially. The files ref backend does use EINVAL. The files backend does not call into the generic API (refs_read_raw), but into the files-specific function (files_read_raw_ref), which we are not changing in this commit. As the errno sideband is unintuitive and error-prone, remove EINVAL value, as a step towards getting rid of the errno sideband altogether. Spotted by Ævar Arnfjörð Bjarmason <avarab@gmail.com>. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/refs-internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 3155708345fcbc..dc0d826c3acc83 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -622,9 +622,9 @@ typedef int reflog_expire_fn(struct ref_store *ref_store, * * Return 0 on success. If the ref doesn't exist, set errno to ENOENT * and return -1. If the ref exists but is neither a symbolic ref nor - * an object ID, it is broken; set REF_ISBROKEN in type, set errno to - * EINVAL, and return -1. If there is another error reading the ref, - * set errno appropriately and return -1. + * an object ID, it is broken; set REF_ISBROKEN in type, and return -1 + * (errno should not be ENOENT) If there is another error reading the + * ref, set errno appropriately and return -1. * * Backend-specific flags might be set in type as well, regardless of * outcome. From ffe2bc99bb77674b69ee7212e597963f2abf150d Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:26 +0200 Subject: [PATCH 108/198] refs/files-backend: stop setting errno from lock_ref_oid_basic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs/files-backend.c::lock_ref_oid_basic() tries to signal how it failed to its callers using errno. It is safe to stop setting errno here, because the callers of this file-scope static function are * files_copy_or_rename_ref() * files_create_symref() * files_reflog_expire() None of them looks at errno after seeing a negative return from lock_ref_oid_basic() to make any decision, and no caller of these three functions looks at errno after they signal a failure by returning a negative value. In particular, * files_copy_or_rename_ref() - here, calls are followed by error() (which performs I/O) or write_ref_to_lockfile() (which calls parse_object() which may perform I/O) * files_create_symref() - here, calls are followed by error() or create_symref_locked() (which performs I/O and does not inspect errno) * files_reflog_expire() - here, calls are followed by error() or refs_reflog_exists() (which calls a function in a vtable that is not documented to use and/or preserve errno) Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 913868b85d33fe..635e7653c29f64 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -982,7 +982,6 @@ static int create_reflock(const char *path, void *cb) /* * Locks a ref returning the lock on success and NULL on failure. - * On failure errno is set to something meaningful. */ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, const char *refname, @@ -991,7 +990,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, { struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; - int last_errno = 0; files_assert_main_repository(refs, "lock_ref_oid_basic"); assert(err); @@ -1002,11 +1000,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, if (!refs_resolve_ref_unsafe(&refs->base, refname, RESOLVE_REF_NO_RECURSE, &lock->old_oid, type)) { - last_errno = errno; if (!refs_verify_refname_available(&refs->base, refname, NULL, NULL, err)) strbuf_addf(err, "unable to resolve reference '%s': %s", - refname, strerror(last_errno)); + refname, strerror(errno)); goto error_return; } @@ -1019,15 +1016,12 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, */ if (is_null_oid(&lock->old_oid) && refs_verify_refname_available(refs->packed_ref_store, refname, - NULL, NULL, err)) { - last_errno = ENOTDIR; + NULL, NULL, err)) goto error_return; - } lock->ref_name = xstrdup(refname); if (raceproof_create_file(ref_file.buf, create_reflock, &lock->lk)) { - last_errno = errno; unable_to_lock_message(ref_file.buf, errno, err); goto error_return; } @@ -1044,7 +1038,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, out: strbuf_release(&ref_file); - errno = last_errno; return lock; } From 8bb2a971949c50787809f14ccf1d2a5d5324f4e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:24 +0200 Subject: [PATCH 109/198] reflog expire: don't lock reflogs using previously seen OID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During reflog expiry, the cmd_reflog_expire() function first iterates over all reflogs in logs/*, and then one-by-one acquires the lock for each one and expires it. This behavior has been with us since this command was implemented in 4264dc15e1 ("git reflog expire", 2006-12-19). Change this to stop calling lock_ref_oid_basic() with the OID we saw when we looped over the logs, instead have it pass the OID it managed to lock. This mostly mitigates a race condition where e.g. "git gc" will fail in a concurrently updated repository because the branch moved since "git reflog expire --all" was started. I.e. with: error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B> This behavior of passing in an "oid" was needed for an edge-case that I've untangled in this and preceding commits though, namely that we needed this OID because we'd: 1. Lookup the reflog name/OID via dwim_log() 2. With that OID, lock the reflog 3. Later in builtin/reflog.c we use the OID we looked as input to lookup_commit_reference_gently(), assured that it's equal to the OID we got from dwim_log(). We can be sure that this change is safe to make because between dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other logic relevant to the OID or expiry run in the cmd_reflog_expire() caller. We can thus treat that code as a black box, before and after this change it would get an OID that's been locked, the only difference is that now we mostly won't be failing to get the lock due to the TOCTOU race[0]. That failure was purely an implementation detail in how the "current OID" was looked up, it was divorced from the locking mechanism. What do we mean with "mostly"? It mostly mitigates it because we'll still run into cases where the ref is locked and being updated as we want to expire it, and other git processes wanting to update the refs will in turn race with us as we expire the reflog. That remaining race can in turn be mitigated with the core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry acquiring reference locks for 100ms", 2017-08-21). In practice if that value is high enough we'll probably never have ref updates or reflog expiry failing, since the clients involved will retry for far longer than the time any of those operations could take. See [1] for an initial report of how this impacted "git gc" and a large discussion about this change in early 2019. In particular patch looked good to Michael Haggerty, see his[2]. That message seems to not have made it to the ML archive, its content is quoted in full in my [3]. I'm leaving behind now-unused code the refs API etc. that takes the now-NULL "unused_oid" argument, and other code that can be simplified now that we never have on OID in that context, that'll be cleaned up in subsequent commits, but for now let's narrowly focus on fixing the "git gc" issue. As the modified assert() shows we always pass a NULL oid to reflog_expire() now. Unfortunately this sort of probabilistic contention is hard to turn into a test. I've tested this by running the following three subshells in concurrent terminals: ( rm -rf /tmp/git && git init /tmp/git && while true do head -c 10 /dev/urandom | hexdump >/tmp/git/out && git -C /tmp/git add out && git -C /tmp/git commit -m"out" done ) ( rm -rf /tmp/git-clone && git clone file:///tmp/git /tmp/git-clone && while git -C /tmp/git-clone pull do date done ) ( while git -C /tmp/git-clone reflog expire --all do date done ) Before this change the "reflog expire" would fail really quickly with the "but expected" error noted above. After this change both the "pull" and "reflog expire" will run for a while, but eventually fail because I get unlucky with core.filesRefLockTimeout (the "reflog expire" is in a really tight loop). As noted above that can in turn be mitigated with higher values of core.filesRefLockTimeout than the 100ms default. As noted in the commentary added in the preceding commit there's also the case of branches being racily deleted, that can be tested by adding this to the above: ( while git -C /tmp/git-clone branch topic master && git -C /tmp/git-clone branch -D topic do date done ) With core.filesRefLockTimeout set to 10 seconds (it can probably be a lot lower) I managed to run all four of these concurrently for about an hour, and accumulated ~125k commits, auto-gc's and all, and didn't have a single failure. The loops visibly stall while waiting for the lock, but that's expected and desired behavior. 0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use 1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/ 2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu 3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/reflog.c | 13 ++++++------- refs.h | 2 +- refs/files-backend.c | 7 +++++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/builtin/reflog.c b/builtin/reflog.c index 09541d1c80483c..61795f22d53f3d 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -629,8 +629,9 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) free_worktrees(worktrees); for (i = 0; i < collected.nr; i++) { struct collected_reflog *e = collected.e[i]; + set_reflog_expiry_param(&cb.cmd, explicit_expiry, e->reflog); - status |= reflog_expire(e->reflog, &e->oid, flags, + status |= reflog_expire(e->reflog, NULL, flags, reflog_expiry_prepare, should_expire_reflog_ent, reflog_expiry_cleanup, @@ -642,13 +643,12 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) for (; i < argc; i++) { char *ref; - struct object_id oid; - if (!dwim_log(argv[i], strlen(argv[i]), &oid, &ref)) { + if (!dwim_log(argv[i], strlen(argv[i]), NULL, &ref)) { status |= error(_("%s points nowhere!"), argv[i]); continue; } set_reflog_expiry_param(&cb.cmd, explicit_expiry, ref); - status |= reflog_expire(ref, &oid, flags, + status |= reflog_expire(ref, NULL, flags, reflog_expiry_prepare, should_expire_reflog_ent, reflog_expiry_cleanup, @@ -700,7 +700,6 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix) for ( ; i < argc; i++) { const char *spec = strstr(argv[i], "@{"); - struct object_id oid; char *ep, *ref; int recno; @@ -709,7 +708,7 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix) continue; } - if (!dwim_log(argv[i], spec - argv[i], &oid, &ref)) { + if (!dwim_log(argv[i], spec - argv[i], NULL, &ref)) { status |= error(_("no reflog for '%s'"), argv[i]); continue; } @@ -724,7 +723,7 @@ static int cmd_reflog_delete(int argc, const char **argv, const char *prefix) cb.cmd.expire_total = 0; } - status |= reflog_expire(ref, &oid, flags, + status |= reflog_expire(ref, NULL, flags, reflog_expiry_prepare, should_expire_reflog_ent, reflog_expiry_cleanup, diff --git a/refs.h b/refs.h index 48970dfc7e0f0d..ddbf15f1c2119d 100644 --- a/refs.h +++ b/refs.h @@ -796,7 +796,7 @@ enum expire_reflog_flags { * expiration policy that is desired. * * reflog_expiry_prepare_fn -- Called once after the reference is - * locked. + * locked. Called with the OID of the locked reference. * * reflog_expiry_should_prune_fn -- Called once for each entry in the * existing reflog. It should return true iff that entry should be diff --git a/refs/files-backend.c b/refs/files-backend.c index 54153064166d9b..ccdf45504985b3 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3032,7 +3032,7 @@ static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid, } static int files_reflog_expire(struct ref_store *ref_store, - const char *refname, const struct object_id *oid, + const char *refname, const struct object_id *unused_oid, unsigned int flags, reflog_expiry_prepare_fn prepare_fn, reflog_expiry_should_prune_fn should_prune_fn, @@ -3049,6 +3049,7 @@ static int files_reflog_expire(struct ref_store *ref_store, int status = 0; int type; struct strbuf err = STRBUF_INIT; + const struct object_id *oid; memset(&cb, 0, sizeof(cb)); cb.flags = flags; @@ -3060,7 +3061,7 @@ static int files_reflog_expire(struct ref_store *ref_store, * reference itself, plus we might need to update the * reference if --updateref was specified: */ - lock = lock_ref_oid_basic(refs, refname, oid, + lock = lock_ref_oid_basic(refs, refname, NULL, REF_NO_DEREF, &type, &err); if (!lock) { @@ -3068,6 +3069,7 @@ static int files_reflog_expire(struct ref_store *ref_store, strbuf_release(&err); return -1; } + oid = &lock->old_oid; /* * When refs are deleted, their reflog is deleted before the @@ -3111,6 +3113,7 @@ static int files_reflog_expire(struct ref_store *ref_store, } } + assert(!unused_oid); (*prepare_fn)(refname, oid, cb.policy_cb); refs_for_each_reflog_ent(ref_store, refname, expire_reflog_ent, &cb); (*cleanup_fn)(cb.policy_cb); From 5794236350422402e16f34a161b82cc1004b7a4d Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:27 +0200 Subject: [PATCH 110/198] refs: make errno output explicit for read_raw_ref_fn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it explicit how alternative ref backends should report errors in read_raw_ref_fn. read_raw_ref_fn needs to supply a credible errno for a number of cases. These are primarily: 1) The files backend calls read_raw_ref from lock_raw_ref, and uses the resulting error codes to create/remove directories as needed. 2) ENOENT should be translated in a zero OID, optionally with REF_ISBROKEN set, returning the last successfully resolved symref. This is necessary so read_raw_ref("HEAD") on an empty repo returns refs/heads/main (or the default branch du-jour), and we know on which branch to create the first commit. Make this information flow explicit by adding a failure_errno to the signature of read_raw_ref. All errnos from the files backend are still propagated unchanged, even though inspection suggests only ENOTDIR, EISDIR and ENOENT are relevant. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs.c | 2 +- refs/debug.c | 4 ++-- refs/files-backend.c | 29 +++++++++++++++-------------- refs/packed-backend.c | 8 ++++---- refs/refs-internal.h | 20 ++++++++++++-------- 5 files changed, 34 insertions(+), 29 deletions(-) diff --git a/refs.c b/refs.c index d963543675992a..fc6c0ddffaed41 100644 --- a/refs.c +++ b/refs.c @@ -1682,7 +1682,7 @@ int refs_read_raw_ref(struct ref_store *ref_store, } return ref_store->be->read_raw_ref(ref_store, refname, oid, referent, - type); + type, &errno); } /* This function needs to return a meaningful errno on failure */ diff --git a/refs/debug.c b/refs/debug.c index 449ac3e6cc8e68..70a78b516abaf2 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -238,7 +238,7 @@ debug_ref_iterator_begin(struct ref_store *ref_store, const char *prefix, static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname, struct object_id *oid, struct strbuf *referent, - unsigned int *type) + unsigned int *type, int *failure_errno) { struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; int res = 0; @@ -246,7 +246,7 @@ static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname, oidcpy(oid, null_oid()); errno = 0; res = drefs->refs->be->read_raw_ref(drefs->refs, refname, oid, referent, - type); + type, failure_errno); if (res == 0) { trace_printf_key(&trace_refs, "read_raw_ref: %s: %s (=> %s) type %x: %d\n", diff --git a/refs/files-backend.c b/refs/files-backend.c index 635e7653c29f64..ee37c2e6c99806 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -341,9 +341,9 @@ static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs) return refs->loose; } -static int files_read_raw_ref(struct ref_store *ref_store, - const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type) +static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); @@ -354,7 +354,6 @@ static int files_read_raw_ref(struct ref_store *ref_store, struct stat st; int fd; int ret = -1; - int save_errno; int remaining_retries = 3; *type = 0; @@ -459,10 +458,9 @@ static int files_read_raw_ref(struct ref_store *ref_store, ret = parse_loose_ref_contents(buf, oid, referent, type); out: - save_errno = errno; + *failure_errno = errno; strbuf_release(&sb_path); strbuf_release(&sb_contents); - errno = save_errno; return ret; } @@ -540,6 +538,7 @@ static int lock_raw_ref(struct files_ref_store *refs, struct strbuf ref_file = STRBUF_INIT; int attempts_remaining = 3; int ret = TRANSACTION_GENERIC_ERROR; + int failure_errno; assert(err); files_assert_main_repository(refs, "lock_raw_ref"); @@ -610,7 +609,9 @@ static int lock_raw_ref(struct files_ref_store *refs, if (hold_lock_file_for_update_timeout( &lock->lk, ref_file.buf, LOCK_NO_DEREF, get_files_ref_lock_timeout_ms()) < 0) { - if (errno == ENOENT && --attempts_remaining > 0) { + int myerr = errno; + errno = 0; + if (myerr == ENOENT && --attempts_remaining > 0) { /* * Maybe somebody just deleted one of the * directories leading to ref_file. Try @@ -618,7 +619,7 @@ static int lock_raw_ref(struct files_ref_store *refs, */ goto retry; } else { - unable_to_lock_message(ref_file.buf, errno, err); + unable_to_lock_message(ref_file.buf, myerr, err); goto error_return; } } @@ -628,9 +629,9 @@ static int lock_raw_ref(struct files_ref_store *refs, * fear that its value will change. */ - if (files_read_raw_ref(&refs->base, refname, - &lock->old_oid, referent, type)) { - if (errno == ENOENT) { + if (files_read_raw_ref(&refs->base, refname, &lock->old_oid, referent, + type, &failure_errno)) { + if (failure_errno == ENOENT) { if (mustexist) { /* Garden variety missing reference. */ strbuf_addf(err, "unable to resolve reference '%s'", @@ -654,7 +655,7 @@ static int lock_raw_ref(struct files_ref_store *refs, * reference named "refs/foo/bar/baz". */ } - } else if (errno == EISDIR) { + } else if (failure_errno == EISDIR) { /* * There is a directory in the way. It might have * contained references that have been deleted. If @@ -692,13 +693,13 @@ static int lock_raw_ref(struct files_ref_store *refs, goto error_return; } } - } else if (errno == EINVAL && (*type & REF_ISBROKEN)) { + } else if (failure_errno == EINVAL && (*type & REF_ISBROKEN)) { strbuf_addf(err, "unable to resolve reference '%s': " "reference broken", refname); goto error_return; } else { strbuf_addf(err, "unable to resolve reference '%s': %s", - refname, strerror(errno)); + refname, strerror(failure_errno)); goto error_return; } diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 24a360b719ff25..159ac7762401b8 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -724,9 +724,9 @@ static struct snapshot *get_snapshot(struct packed_ref_store *refs) return refs->snapshot; } -static int packed_read_raw_ref(struct ref_store *ref_store, - const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type) +static int packed_read_raw_ref(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno) { struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); @@ -739,7 +739,7 @@ static int packed_read_raw_ref(struct ref_store *ref_store, if (!rec) { /* refname is not a packed reference. */ - errno = ENOENT; + *failure_errno = ENOENT; return -1; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index dc0d826c3acc83..33a31d5c2366c5 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -620,11 +620,15 @@ typedef int reflog_expire_fn(struct ref_store *ref_store, * properly-formatted or even safe reference name. NEITHER INPUT NOR * OUTPUT REFERENCE NAMES ARE VALIDATED WITHIN THIS FUNCTION. * - * Return 0 on success. If the ref doesn't exist, set errno to ENOENT - * and return -1. If the ref exists but is neither a symbolic ref nor - * an object ID, it is broken; set REF_ISBROKEN in type, and return -1 - * (errno should not be ENOENT) If there is another error reading the - * ref, set errno appropriately and return -1. + * Return 0 on success, or -1 on failure. If the ref exists but is neither a + * symbolic ref nor an object ID, it is broken. In this case set REF_ISBROKEN in + * type, and return -1 (failure_errno should not be ENOENT) + * + * failure_errno provides errno codes that are interpreted beyond error + * reporting. The following error codes have special meaning: + * * ENOENT: the ref doesn't exist + * * EISDIR: ref name is a directory + * * ENOTDIR: ref prefix is not a directory * * Backend-specific flags might be set in type as well, regardless of * outcome. @@ -638,9 +642,9 @@ typedef int reflog_expire_fn(struct ref_store *ref_store, * - in all other cases, referent will be untouched, and therefore * refname will still be valid and unchanged. */ -typedef int read_raw_ref_fn(struct ref_store *ref_store, - const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type); +typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno); struct ref_storage_be { struct ref_storage_be *next; From 066c64af8603953234c191e64c5b50bd226ad198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:25 +0200 Subject: [PATCH 111/198] refs/files: remove unused "oid" in lock_ref_oid_basic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the preceding commit the last caller that passed a non-NULL OID was changed to pass NULL to lock_ref_oid_basic(). As noted in preceding commits use of this API has been going away (we should use ref transactions, or lock_raw_ref()), so we're unlikely to gain new callers that want to pass the "oid". So let's remove it, doing so means we can remove the "mustexist" condition, and therefore anything except the "flags = RESOLVE_REF_NO_RECURSE" case. Furthermore, since the verify_lock() function we called did most of its work when the "oid" was passed (as "old_oid") we can inline the trivial part of it that remains in its only remaining caller. Without a NULL "oid" passed it was equivalent to calling refs_read_ref_full() followed by oidclr(). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 72 ++++++++------------------------------------ 1 file changed, 12 insertions(+), 60 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index ccdf45504985b3..98d9aa1601fa7b 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -852,42 +852,6 @@ static struct ref_iterator *files_ref_iterator_begin( return ref_iterator; } -/* - * Verify that the reference locked by lock has the value old_oid - * (unless it is NULL). Fail if the reference doesn't exist and - * mustexist is set. Return 0 on success. On error, write an error - * message to err, set errno, and return a negative value. - */ -static int verify_lock(struct ref_store *ref_store, struct ref_lock *lock, - const struct object_id *old_oid, int mustexist, - struct strbuf *err) -{ - assert(err); - - if (refs_read_ref_full(ref_store, lock->ref_name, - mustexist ? RESOLVE_REF_READING : 0, - &lock->old_oid, NULL)) { - if (old_oid) { - int save_errno = errno; - strbuf_addf(err, "can't verify ref '%s'", lock->ref_name); - errno = save_errno; - return -1; - } else { - oidclr(&lock->old_oid); - return 0; - } - } - if (old_oid && !oideq(&lock->old_oid, old_oid)) { - strbuf_addf(err, "ref '%s' is at %s but expected %s", - lock->ref_name, - oid_to_hex(&lock->old_oid), - oid_to_hex(old_oid)); - errno = EBUSY; - return -1; - } - return 0; -} - static int remove_empty_directories(struct strbuf *path) { /* @@ -913,15 +877,12 @@ static int create_reflock(const char *path, void *cb) */ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, const char *refname, - const struct object_id *old_oid, unsigned int flags, int *type, struct strbuf *err) { struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; int last_errno = 0; - int mustexist = (old_oid && !is_null_oid(old_oid)); - int resolve_flags = RESOLVE_REF_NO_RECURSE; int resolved; files_assert_main_repository(refs, "lock_ref_oid_basic"); @@ -929,12 +890,9 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, CALLOC_ARRAY(lock, 1); - if (mustexist) - resolve_flags |= RESOLVE_REF_READING; - files_ref_path(refs, &ref_file, refname); - resolved = !!refs_resolve_ref_unsafe(&refs->base, - refname, resolve_flags, + resolved = !!refs_resolve_ref_unsafe(&refs->base, refname, + RESOLVE_REF_NO_RECURSE, &lock->old_oid, type); if (!resolved && errno == EISDIR) { /* @@ -952,8 +910,8 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, refname); goto error_return; } - resolved = !!refs_resolve_ref_unsafe(&refs->base, - refname, resolve_flags, + resolved = !!refs_resolve_ref_unsafe(&refs->base, refname, + RESOLVE_REF_NO_RECURSE, &lock->old_oid, type); } if (!resolved) { @@ -988,10 +946,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, goto error_return; } - if (verify_lock(&refs->base, lock, old_oid, mustexist, err)) { - last_errno = errno; - goto error_return; - } + if (refs_read_ref_full(&refs->base, lock->ref_name, + 0, + &lock->old_oid, NULL)) + oidclr(&lock->old_oid); goto out; error_return: @@ -1410,8 +1368,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, logmoved = log; - lock = lock_ref_oid_basic(refs, newrefname, NULL, REF_NO_DEREF, NULL, - &err); + lock = lock_ref_oid_basic(refs, newrefname, REF_NO_DEREF, NULL, &err); if (!lock) { if (copy) error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf); @@ -1433,8 +1390,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, goto out; rollback: - lock = lock_ref_oid_basic(refs, oldrefname, NULL, - REF_NO_DEREF, NULL, &err); + lock = lock_ref_oid_basic(refs, oldrefname, REF_NO_DEREF, NULL, &err); if (!lock) { error("unable to lock %s for rollback: %s", oldrefname, err.buf); strbuf_release(&err); @@ -1841,9 +1797,7 @@ static int files_create_symref(struct ref_store *ref_store, struct ref_lock *lock; int ret; - lock = lock_ref_oid_basic(refs, refname, NULL, - REF_NO_DEREF, NULL, - &err); + lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, NULL, &err); if (!lock) { error("%s", err.buf); strbuf_release(&err); @@ -3061,9 +3015,7 @@ static int files_reflog_expire(struct ref_store *ref_store, * reference itself, plus we might need to update the * reference if --updateref was specified: */ - lock = lock_ref_oid_basic(refs, refname, NULL, - REF_NO_DEREF, - &type, &err); + lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, &type, &err); if (!lock) { error("cannot lock ref '%s': %s", refname, err.buf); strbuf_release(&err); From 0dac16d8eb9c2bb9ed394639327cd9c5cd8f5235 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:28 +0200 Subject: [PATCH 112/198] refs: add failure_errno to refs_read_raw_ref() signature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets us use the explicit errno output parameter in refs_resolve_ref_unsafe. Some of our callers explicitly do not care about the errno, rather than understanding NULL let's have them declare that they don't care by passing in an "ignore_errno". There's only three of them, and using that pattern will make it more obvious that they want to throw away data, let's also add a comment to one of the callers about why we'd like to ignore the errno. Let's not extend that to refs_resolve_ref_unsafe() itself for now, it has a large set of legacy callers, so we're faking up the old "errno" behavior for it. We can convert those callers to refs_resolve_ref_unsafe_with_errno() later. We are leaving out out the refs_read_special_head() in refs_read_raw_ref() for now, as noted in the next commit moving it to "failure_errno" will require some special consideration. We're intentionally mis-indenting the argument list of the new refs_resolve_ref_unsafe_with_errno(), it will be non-static in a subsequent commit, doing it this way makes that diff smaller. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs.c | 61 ++++++++++++++++++++++++++++++------------- refs/files-backend.c | 10 ++++--- refs/packed-backend.c | 7 ++--- refs/refs-internal.h | 6 ++--- 4 files changed, 56 insertions(+), 28 deletions(-) diff --git a/refs.c b/refs.c index fc6c0ddffaed41..728285c922000e 100644 --- a/refs.c +++ b/refs.c @@ -1672,30 +1672,33 @@ static int refs_read_special_head(struct ref_store *ref_store, return result; } -int refs_read_raw_ref(struct ref_store *ref_store, - const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type) +int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno) { + assert(failure_errno); if (!strcmp(refname, "FETCH_HEAD") || !strcmp(refname, "MERGE_HEAD")) { return refs_read_special_head(ref_store, refname, oid, referent, type); } return ref_store->be->read_raw_ref(ref_store, refname, oid, referent, - type, &errno); + type, failure_errno); } -/* This function needs to return a meaningful errno on failure */ -const char *refs_resolve_ref_unsafe(struct ref_store *refs, - const char *refname, - int resolve_flags, - struct object_id *oid, int *flags) +static const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs, + const char *refname, + int resolve_flags, + struct object_id *oid, + int *flags, int *failure_errno) { static struct strbuf sb_refname = STRBUF_INIT; struct object_id unused_oid; int unused_flags; int symref_count; + assert(failure_errno); + if (!oid) oid = &unused_oid; if (!flags) @@ -1706,7 +1709,7 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || !refname_is_safe(refname)) { - errno = EINVAL; + *failure_errno = EINVAL; return NULL; } @@ -1724,8 +1727,8 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, for (symref_count = 0; symref_count < SYMREF_MAXDEPTH; symref_count++) { unsigned int read_flags = 0; - if (refs_read_raw_ref(refs, refname, - oid, &sb_refname, &read_flags)) { + if (refs_read_raw_ref(refs, refname, oid, &sb_refname, + &read_flags, failure_errno)) { *flags |= read_flags; /* In reading mode, refs must eventually resolve */ @@ -1737,9 +1740,9 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, * may show errors besides ENOENT if there are * similarly-named refs. */ - if (errno != ENOENT && - errno != EISDIR && - errno != ENOTDIR) + if (*failure_errno != ENOENT && + *failure_errno != EISDIR && + *failure_errno != ENOTDIR) return NULL; oidclr(oid); @@ -1766,7 +1769,7 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) { if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) || !refname_is_safe(refname)) { - errno = EINVAL; + *failure_errno = EINVAL; return NULL; } @@ -1774,10 +1777,24 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, } } - errno = ELOOP; + *failure_errno = ELOOP; return NULL; } +const char *refs_resolve_ref_unsafe(struct ref_store *refs, const char *refname, + int resolve_flags, struct object_id *oid, + int *flags) +{ + int failure_errno = 0; + const char *refn; + refn = refs_resolve_ref_unsafe_with_errno(refs, refname, resolve_flags, + oid, flags, &failure_errno); + if (!refn) + /* For unmigrated legacy callers */ + errno = failure_errno; + return refn; +} + /* backend functions */ int refs_init_db(struct strbuf *err) { @@ -2228,6 +2245,13 @@ int refs_verify_refname_available(struct ref_store *refs, strbuf_grow(&dirname, strlen(refname) + 1); for (slash = strchr(refname, '/'); slash; slash = strchr(slash + 1, '/')) { + /* + * Just saying "Is a directory" when we e.g. can't + * lock some multi-level ref isn't very informative, + * the user won't be told *what* is a directory, so + * let's not use strerror() below. + */ + int ignore_errno; /* Expand dirname to the new prefix, not including the trailing slash: */ strbuf_add(&dirname, refname + dirname.len, slash - refname - dirname.len); @@ -2239,7 +2263,8 @@ int refs_verify_refname_available(struct ref_store *refs, if (skip && string_list_has_string(skip, dirname.buf)) continue; - if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent, &type)) { + if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent, + &type, &ignore_errno)) { strbuf_addf(err, _("'%s' exists; cannot create '%s'"), dirname.buf, refname); goto cleanup; diff --git a/refs/files-backend.c b/refs/files-backend.c index ee37c2e6c99806..7191f036ba70f5 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -381,10 +381,11 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, goto out; if (lstat(path, &st) < 0) { + int ignore_errno; if (errno != ENOENT) goto out; - if (refs_read_raw_ref(refs->packed_ref_store, refname, - oid, referent, type)) { + if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, + referent, type, &ignore_errno)) { errno = ENOENT; goto out; } @@ -418,13 +419,14 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, /* Is it a directory? */ if (S_ISDIR(st.st_mode)) { + int ignore_errno; /* * Even though there is a directory where the loose * ref is supposed to be, there could still be a * packed ref: */ - if (refs_read_raw_ref(refs->packed_ref_store, refname, - oid, referent, type)) { + if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, + referent, type, &ignore_errno)) { errno = EISDIR; goto out; } diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 159ac7762401b8..923b9ad89de314 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1347,6 +1347,7 @@ int is_packed_transaction_needed(struct ref_store *ref_store, ret = 0; for (i = 0; i < transaction->nr; i++) { struct ref_update *update = transaction->updates[i]; + int failure_errno; unsigned int type; struct object_id oid; @@ -1357,9 +1358,9 @@ int is_packed_transaction_needed(struct ref_store *ref_store, */ continue; - if (!refs_read_raw_ref(ref_store, update->refname, - &oid, &referent, &type) || - errno != ENOENT) { + if (!refs_read_raw_ref(ref_store, update->refname, &oid, + &referent, &type, &failure_errno) || + failure_errno != ENOENT) { /* * We have to actually delete that reference * -> this transaction is needed. diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 33a31d5c2366c5..7beb38f79ccd81 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -149,9 +149,9 @@ struct ref_update { const char refname[FLEX_ARRAY]; }; -int refs_read_raw_ref(struct ref_store *ref_store, - const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type); +int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, + struct object_id *oid, struct strbuf *referent, + unsigned int *type, int *failure_errno); /* * Write an error to `err` and return a nonzero value iff the same From ac7337ee0d2cd84fcbde733ce9ffcbc1dcc768b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:26 +0200 Subject: [PATCH 113/198] refs/files: remove unused "errno == EISDIR" code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we lock a reference like "foo" we need to handle the case where "foo" exists, but is an empty directory. That's what this code added in bc7127ef0f (ref locking: allow 'foo' when 'foo/bar' used to exist but not anymore., 2006-09-30) seems like it should be dealing with. Except it doesn't, and we never take this branch. The reason is that when bc7127ef0f was written this looked like: ref = resolve_ref([...]); if (!ref && errno == EISDIR) { [...] And in resolve_ref() we had this code: fd = open(path, O_RDONLY); if (fd < 0) return NULL; I.e. we would attempt to read "foo" with open(), which would fail with EISDIR and we'd return NULL. We'd then take this branch, call remove_empty_directories() and continue. Since a1c1d8170d (refs_resolve_ref_unsafe: handle d/f conflicts for writes, 2017-10-06) we don't. E.g. in the case of files_copy_or_rename_ref() our callstack will look something like: [...] -> files_copy_or_rename_ref() -> lock_ref_oid_basic() -> refs_resolve_ref_unsafe() At that point the first (now only) refs_resolve_ref_unsafe() call in lock_ref_oid_basic() would do the equivalent of this in the resulting call to refs_read_raw_ref() in refs_resolve_ref_unsafe(): /* Via refs_read_raw_ref() */ fd = open(path, O_RDONLY); if (fd < 0) /* get errno == EISDIR */ /* later, in refs_resolve_ref_unsafe() */ if ([...] && errno != EISDIR) return NULL; [...] /* returns the refs/heads/foo to the caller, even though it's a directory */ return refname; I.e. even though we got an "errno == EISDIR" we won't take this branch, since in cases of EISDIR "resolved" is always non-NULL. I.e. we pretend at this point as though everything's OK and there is no "foo" directory. We then proceed with the entire ref update and don't call remove_empty_directories() until we call commit_ref_update(). See 5387c0d883 (commit_ref(): if there is an empty dir in the way, delete it, 2016-05-05) for the addition of that code, and a1c1d8170db (refs_resolve_ref_unsafe: handle d/f conflicts for writes, 2017-10-06) for the commit that changed the original codepath added in bc7127ef0f to use this "EISDIR" handling. Further historical commentary: Before the two preceding commits the caller in files_reflog_expire() was the only one out of our 4 callers that would pass non-NULL as an oid. We would then set a (now gone) "resolve_flags" to "RESOLVE_REF_READING" and just before that "errno != EISDIR" check do: if (resolve_flags & RESOLVE_REF_READING) return NULL; There may have been some case where this ended up mattering and we couldn't safely make this change before we removed the "oid" parameter, but I don't think there was, see [1] for some discussion on that. In any case, now that we've removed the "oid" parameter in a preceding commit we can be sure that this code is redundant, so let's remove it. 1. http://lore.kernel.org/git/871r801yp6.fsf@evledraar.gmail.com Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 98d9aa1601fa7b..28cd8853f5266b 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -883,7 +883,6 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; int last_errno = 0; - int resolved; files_assert_main_repository(refs, "lock_ref_oid_basic"); assert(err); @@ -891,30 +890,9 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, CALLOC_ARRAY(lock, 1); files_ref_path(refs, &ref_file, refname); - resolved = !!refs_resolve_ref_unsafe(&refs->base, refname, - RESOLVE_REF_NO_RECURSE, - &lock->old_oid, type); - if (!resolved && errno == EISDIR) { - /* - * we are trying to lock foo but we used to - * have foo/bar which now does not exist; - * it is normal for the empty directory 'foo' - * to remain. - */ - if (remove_empty_directories(&ref_file)) { - last_errno = errno; - if (!refs_verify_refname_available( - &refs->base, - refname, NULL, NULL, err)) - strbuf_addf(err, "there are still refs under '%s'", - refname); - goto error_return; - } - resolved = !!refs_resolve_ref_unsafe(&refs->base, refname, - RESOLVE_REF_NO_RECURSE, - &lock->old_oid, type); - } - if (!resolved) { + if (!refs_resolve_ref_unsafe(&refs->base, refname, + RESOLVE_REF_NO_RECURSE, + &lock->old_oid, type)) { last_errno = errno; if (last_errno != ENOTDIR || !refs_verify_refname_available(&refs->base, refname, From a073125011b776bd7bd1deedfd0ec898915f6763 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:29 +0200 Subject: [PATCH 114/198] refs: explicitly return failure_errno from parse_loose_ref_contents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EINVAL error from parse_loose_ref_contents is used in files-backend to create a custom error message. In untangling this we discovered a tricky edge case. The refs_read_special_head() function was relying on parse_loose_ref_contents() setting EINVAL. By converting it to use "saved_errno" we can migrate away from "errno" in this part of the code entirely, and do away with an existing "save_errno" pattern, its only purpose was to not clobber the "errno" we previously needed at the end of files_read_raw_ref(). Let's assert that we can do that by not having files_read_raw_ref() itself operate on *failure_errno in addition to passing it on. Instead we'll assert that if we return non-zero we actually do set errno, thus assuring ourselves and callers that they can trust the resulting "failure_errno". Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs.c | 8 +++++--- refs/files-backend.c | 30 +++++++++++++++++++----------- refs/refs-internal.h | 6 ++++-- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/refs.c b/refs.c index 728285c922000e..b31dbdd0fa2be5 100644 --- a/refs.c +++ b/refs.c @@ -1654,7 +1654,8 @@ int for_each_fullref_in_prefixes(const char *namespace, static int refs_read_special_head(struct ref_store *ref_store, const char *refname, struct object_id *oid, - struct strbuf *referent, unsigned int *type) + struct strbuf *referent, unsigned int *type, + int *failure_errno) { struct strbuf full_path = STRBUF_INIT; struct strbuf content = STRBUF_INIT; @@ -1664,7 +1665,8 @@ static int refs_read_special_head(struct ref_store *ref_store, if (strbuf_read_file(&content, full_path.buf, 0) < 0) goto done; - result = parse_loose_ref_contents(content.buf, oid, referent, type); + result = parse_loose_ref_contents(content.buf, oid, referent, type, + failure_errno); done: strbuf_release(&full_path); @@ -1679,7 +1681,7 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, assert(failure_errno); if (!strcmp(refname, "FETCH_HEAD") || !strcmp(refname, "MERGE_HEAD")) { return refs_read_special_head(ref_store, refname, oid, referent, - type); + type, failure_errno); } return ref_store->be->read_raw_ref(ref_store, refname, oid, referent, diff --git a/refs/files-backend.c b/refs/files-backend.c index 7191f036ba70f5..be9e2c548272f3 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -355,6 +355,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, int fd; int ret = -1; int remaining_retries = 3; + int myerr = 0; *type = 0; strbuf_reset(&sb_path); @@ -382,11 +383,13 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, if (lstat(path, &st) < 0) { int ignore_errno; - if (errno != ENOENT) + myerr = errno; + errno = 0; + if (myerr != ENOENT) goto out; if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, referent, type, &ignore_errno)) { - errno = ENOENT; + myerr = ENOENT; goto out; } ret = 0; @@ -397,7 +400,9 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, if (S_ISLNK(st.st_mode)) { strbuf_reset(&sb_contents); if (strbuf_readlink(&sb_contents, path, st.st_size) < 0) { - if (errno == ENOENT || errno == EINVAL) + myerr = errno; + errno = 0; + if (myerr == ENOENT || myerr == EINVAL) /* inconsistent with lstat; retry */ goto stat_ref; else @@ -427,7 +432,7 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, */ if (refs_read_raw_ref(refs->packed_ref_store, refname, oid, referent, type, &ignore_errno)) { - errno = EISDIR; + myerr = EISDIR; goto out; } ret = 0; @@ -440,7 +445,8 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, */ fd = open(path, O_RDONLY); if (fd < 0) { - if (errno == ENOENT && !S_ISLNK(st.st_mode)) + myerr = errno; + if (myerr == ENOENT && !S_ISLNK(st.st_mode)) /* inconsistent with lstat; retry */ goto stat_ref; else @@ -448,26 +454,28 @@ static int files_read_raw_ref(struct ref_store *ref_store, const char *refname, } strbuf_reset(&sb_contents); if (strbuf_read(&sb_contents, fd, 256) < 0) { - int save_errno = errno; close(fd); - errno = save_errno; goto out; } close(fd); strbuf_rtrim(&sb_contents); buf = sb_contents.buf; - ret = parse_loose_ref_contents(buf, oid, referent, type); + ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr); out: - *failure_errno = errno; + if (ret && !myerr) + BUG("returning non-zero %d, should have set myerr!", ret); + *failure_errno = myerr; + strbuf_release(&sb_path); strbuf_release(&sb_contents); return ret; } int parse_loose_ref_contents(const char *buf, struct object_id *oid, - struct strbuf *referent, unsigned int *type) + struct strbuf *referent, unsigned int *type, + int *failure_errno) { const char *p; if (skip_prefix(buf, "ref:", &buf)) { @@ -486,7 +494,7 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid, if (parse_oid_hex(buf, oid, &p) || (*p != '\0' && !isspace(*p))) { *type |= REF_ISBROKEN; - errno = EINVAL; + *failure_errno = EINVAL; return -1; } return 0; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 7beb38f79ccd81..9aa4af8183692d 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -692,10 +692,12 @@ struct ref_store { }; /* - * Parse contents of a loose ref file. + * Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for + * invalid contents. */ int parse_loose_ref_contents(const char *buf, struct object_id *oid, - struct strbuf *referent, unsigned int *type); + struct strbuf *referent, unsigned int *type, + int *failure_errno); /* * Fill in the generic part of refs and add it to our collection of From af5d6e2c1fbd2d09424ea0898ad6c5e35b7545be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 27 Jul 2021 01:44:27 +0200 Subject: [PATCH 115/198] refs/files: remove unused "errno != ENOTDIR" condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a follow-up to the preceding commit where we removed the adjacent "errno == EISDIR" condition in the same function, remove the "last_errno != ENOTDIR" condition here. It's not possible for us to hit this condition added in 5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F conflicts, 2015-05-11). Since a1c1d8170db (refs_resolve_ref_unsafe: handle d/f conflicts for writes, 2017-10-06) we've explicitly caught these in refs_resolve_ref_unsafe() before returning NULL: if (errno != ENOENT && errno != EISDIR && errno != ENOTDIR) return NULL; We'd then always return the refname from refs_resolve_ref_unsafe() even if we were in a broken state as explained in the preceding commit. The elided context here is a call to refs_resolve_ref_unsafe(). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 28cd8853f5266b..5d12003471e586 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -894,8 +894,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, RESOLVE_REF_NO_RECURSE, &lock->old_oid, type)) { last_errno = errno; - if (last_errno != ENOTDIR || - !refs_verify_refname_available(&refs->base, refname, + if (!refs_verify_refname_available(&refs->base, refname, NULL, NULL, err)) strbuf_addf(err, "unable to resolve reference '%s': %s", refname, strerror(last_errno)); From 9f6b82ec2801fdde1d09abc6b651c48aba381567 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Tue, 20 Jul 2021 12:33:30 +0200 Subject: [PATCH 116/198] refs: make errno output explicit for refs_resolve_ref_unsafe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces refs_resolve_ref_unsafe_with_errno(), which makes the API contract for the errno output explicit. The implementation still relies on the global errno variable to ensure no side effects of this refactoring. lock_ref_oid_basic() in files-backend.c is the only caller of refs_resolve_ref() that needs error information to make logic decisions, so update that caller Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs.c | 2 +- refs.h | 11 +++++++++++ refs/files-backend.c | 10 ++++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/refs.c b/refs.c index b31dbdd0fa2be5..9497b51e0a1aab 100644 --- a/refs.c +++ b/refs.c @@ -1688,7 +1688,7 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, type, failure_errno); } -static const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs, +const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs, const char *refname, int resolve_flags, struct object_id *oid, diff --git a/refs.h b/refs.h index ddbf15f1c2119d..9a5b4763ee6a78 100644 --- a/refs.h +++ b/refs.h @@ -68,6 +68,17 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, int resolve_flags, struct object_id *oid, int *flags); +/** + * refs_resolve_ref_unsafe_with_errno() is like + * refs_resolve_ref_unsafe(), but provide access to errno code that + * lead to a failure. We guarantee that errno is set to a meaningful + * value on non-zero return. + */ +const char *refs_resolve_ref_unsafe_with_errno(struct ref_store *refs, + const char *refname, + int resolve_flags, + struct object_id *oid, + int *flags, int *failure_errno); const char *resolve_ref_unsafe(const char *refname, int resolve_flags, struct object_id *oid, int *flags); diff --git a/refs/files-backend.c b/refs/files-backend.c index be9e2c548272f3..598a487db9e07e 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1001,6 +1001,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, { struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; + int resolve_errno = 0; files_assert_main_repository(refs, "lock_ref_oid_basic"); assert(err); @@ -1008,13 +1009,14 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, CALLOC_ARRAY(lock, 1); files_ref_path(refs, &ref_file, refname); - if (!refs_resolve_ref_unsafe(&refs->base, refname, - RESOLVE_REF_NO_RECURSE, - &lock->old_oid, type)) { + if (!refs_resolve_ref_unsafe_with_errno(&refs->base, refname, + RESOLVE_REF_NO_RECURSE, + &lock->old_oid, type, + &resolve_errno)) { if (!refs_verify_refname_available(&refs->base, refname, NULL, NULL, err)) strbuf_addf(err, "unable to resolve reference '%s': %s", - refname, strerror(errno)); + refname, strerror(resolve_errno)); goto error_return; } From 326f261b5e79c372ff5138d30d4d814591b604f0 Mon Sep 17 00:00:00 2001 From: ZheNing Hu <adlternative@gmail.com> Date: Tue, 3 Aug 2021 01:16:21 +0000 Subject: [PATCH 117/198] cherry-pick: fix bug when used with GIT_CHERRY_PICK_HELP GIT_CHERRY_PICK_HELP is an environment variable, as the implementation detail of some porcelain in git to help realize the rebasing steps. E.g. `git rebase -p` set GIT_CHERRY_PICK_HELP value in `git-rebase--preserve-merges.sh`, `git rebase --merge` set GIT_CHERRY_PICK_HELP value in run_specific_rebase(). But If we set the value of GIT_CHERRY_PICK_HELP when using `git cherry-pick`, CHERRY_PICK_HEAD will be deleted, then we will get an error when we try to use `git cherry-pick --continue` or other cherr-pick command. Introduce new "hidden" option `--delete-cherry-pick-head` for git cherry-pick which indicates that CHERRY_PICK_HEAD will be deleted when conflict occurs, which provided for some porcelain commands of git like `git-rebase--preserve-merges.sh`. After `git rebase -p` completely abolished, this option should be removed. At the same time, add the flag `delete_cherry_pick_head` to `struct replay_opts`, We can decide whether to delete CHERRY_PICK_HEAD by setting and checking this flag bit. Then we split print_advice() into two part: Firstly, print_advice() will only be responsible for outputting content; Secondly, check if we set the `delete_cherry_pick_head` flag; if set, delete CHERRY_PICK_HEAD. In this way, the steps of printing advice and deleting CHERRY_PICK_HEAD are decoupled. Finally, let `git-rebase--preserve-merges.sh` use the `--delete-cherry-pick-head` option when it executes git cherry-pick, and set the `delete_cherry_pick_head` flag in get_replay_opts() when we are using `git rebase --merge`, which can fix this breakage. It is worth mentioning that now we use advice() to print the content of GIT_CHERRY_PICK_HELP in print_advice(), each line of output will start with "hint: ". Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by Hariom Verma <hariom18599@gmail.com>: Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Hepled-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/rebase.c | 1 + builtin/revert.c | 2 ++ git-rebase--preserve-merges.sh | 2 +- sequencer.c | 28 +++++++++++++--------------- sequencer.h | 1 + t/t3507-cherry-pick-conflict.sh | 31 +++++++++++++++++++++---------- 6 files changed, 39 insertions(+), 26 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 12f093121d9ed3..08ba437c6a0a33 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -152,6 +152,7 @@ static struct replay_opts get_replay_opts(const struct rebase_options *opts) oidcpy(&replay.squash_onto, opts->squash_onto); replay.have_squash_onto = 1; } + replay.delete_cherry_pick_head = 1; return replay; } diff --git a/builtin/revert.c b/builtin/revert.c index 237f2f18d4c035..15a4b6fe4eeba5 100644 --- a/builtin/revert.c +++ b/builtin/revert.c @@ -127,6 +127,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts) OPT_BOOL(0, "allow-empty", &opts->allow_empty, N_("preserve initially empty commits")), OPT_BOOL(0, "allow-empty-message", &opts->allow_empty_message, N_("allow commits with empty messages")), OPT_BOOL(0, "keep-redundant-commits", &opts->keep_redundant_commits, N_("keep redundant, empty commits")), + OPT_BOOL_F(0, "delete-cherry-pick-head", &opts->delete_cherry_pick_head, + N_("delete CHERRY_PICK_HEAD when conflict occurs"), PARSE_OPT_HIDDEN), OPT_END(), }; options = parse_options_concat(options, cp_extra); diff --git a/git-rebase--preserve-merges.sh b/git-rebase--preserve-merges.sh index b9c71d2a71bdd3..eaa8f9de2c5a16 100644 --- a/git-rebase--preserve-merges.sh +++ b/git-rebase--preserve-merges.sh @@ -444,7 +444,7 @@ pick_one_preserving_merges () { output eval git cherry-pick $allow_rerere_autoupdate \ $allow_empty_message \ ${gpg_sign_opt:+$(git rev-parse --sq-quote "$gpg_sign_opt")} \ - "$strategy_args" "$@" || + "$strategy_args" --delete-cherry-pick-head "$@" || die_with_patch $sha1 "$(eval_gettext "Could not pick \$sha1")" ;; esac diff --git a/sequencer.c b/sequencer.c index 0bec01cf38e817..83cf6a5da3c24e 100644 --- a/sequencer.c +++ b/sequencer.c @@ -397,24 +397,13 @@ static void free_message(struct commit *commit, struct commit_message *msg) unuse_commit_buffer(commit, msg->message); } -static void print_advice(struct repository *r, int show_hint, - struct replay_opts *opts) +static void print_advice(struct replay_opts *opts, int show_hint) { char *msg = getenv("GIT_CHERRY_PICK_HELP"); if (msg) { - fprintf(stderr, "%s\n", msg); - /* - * A conflict has occurred but the porcelain - * (typically rebase --interactive) wants to take care - * of the commit itself so remove CHERRY_PICK_HEAD - */ - refs_delete_ref(get_main_ref_store(r), "", "CHERRY_PICK_HEAD", - NULL, 0); - return; - } - - if (show_hint) { + advise("%s\n", msg); + } else if (show_hint) { if (opts->no_commit) advise(_("after resolving the conflicts, mark the corrected paths\n" "with 'git add <paths>' or 'git rm <paths>'")); @@ -2265,7 +2254,16 @@ static int do_pick_commit(struct repository *r, ? _("could not revert %s... %s") : _("could not apply %s... %s"), short_commit_name(commit), msg.subject); - print_advice(r, res == 1, opts); + print_advice(opts, res == 1); + if (opts->delete_cherry_pick_head) { + /* + * A conflict has occurred but the porcelain + * (typically rebase --interactive) wants to take care + * of the commit itself so remove CHERRY_PICK_HEAD + */ + refs_delete_ref(get_main_ref_store(r), "", "CHERRY_PICK_HEAD", + NULL, 0); + } repo_rerere(r, opts->allow_rerere_auto); goto leave; } diff --git a/sequencer.h b/sequencer.h index d57d8ea23d7a22..76fb4af56fd464 100644 --- a/sequencer.h +++ b/sequencer.h @@ -49,6 +49,7 @@ struct replay_opts { int reschedule_failed_exec; int committer_date_is_author_date; int ignore_date; + int delete_cherry_pick_head; int mainline; diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh index 014001b8f325c0..af5678d981ab59 100755 --- a/t/t3507-cherry-pick-conflict.sh +++ b/t/t3507-cherry-pick-conflict.sh @@ -76,12 +76,33 @@ test_expect_success 'advice from failed cherry-pick --no-commit' " test_cmp expected actual " +test_expect_success 'advice from failed cherry-pick with GIT_CHERRY_PICK_HELP' " + pristine_detach initial && + ( + picked=\$(git rev-parse --short picked) && + cat <<-EOF >expected && + error: could not apply \$picked... picked + hint: and then do something else + EOF + GIT_CHERRY_PICK_HELP='and then do something else' && + export GIT_CHERRY_PICK_HELP && + test_must_fail git cherry-pick picked 2>actual && + test_cmp expected actual + ) +" + test_expect_success 'failed cherry-pick sets CHERRY_PICK_HEAD' ' pristine_detach initial && test_must_fail git cherry-pick picked && test_cmp_rev picked CHERRY_PICK_HEAD ' +test_expect_success 'failed cherry-pick with --delete-cherry-pick-head does not set CHERRY_PICK_HEAD' ' + pristine_detach initial && + test_must_fail git cherry-pick --delete-cherry-pick-head picked && + test_must_fail git rev-parse --verify CHERRY_PICK_HEAD +' + test_expect_success 'successful cherry-pick does not set CHERRY_PICK_HEAD' ' pristine_detach initial && git cherry-pick base && @@ -109,16 +130,6 @@ test_expect_success \ test_must_fail git rev-parse --verify CHERRY_PICK_HEAD ' -test_expect_success 'GIT_CHERRY_PICK_HELP suppresses CHERRY_PICK_HEAD' ' - pristine_detach initial && - ( - GIT_CHERRY_PICK_HELP="and then do something else" && - export GIT_CHERRY_PICK_HELP && - test_must_fail git cherry-pick picked - ) && - test_must_fail git rev-parse --verify CHERRY_PICK_HEAD -' - test_expect_success 'git reset clears CHERRY_PICK_HEAD' ' pristine_detach initial && From 86215558032cea84dcc14da1fa58286c333d58f8 Mon Sep 17 00:00:00 2001 From: ZheNing Hu <adlternative@gmail.com> Date: Tue, 3 Aug 2021 01:16:22 +0000 Subject: [PATCH 118/198] cherry-pick: use better advice message In the past, git cherry-pick would print such advice when there was a conflict: hint: after resolving the conflicts, mark the corrected paths hint: with 'git add <paths>' or 'git rm <paths>' hint: and commit the result with 'git commit' But in fact, when we want to cherry-pick multiple commits, we should not use "git commit" after resolving conflicts, which will make Git generate some errors. We should recommend users to use `git cherry-pick --continue`, `git cherry-pick --abort`, just like git rebase does. This is the improved advice: hint: Resolve all conflicts manually, mark them as resolved with hint: "git add/rm <conflicted_files>", then run hint: "git cherry-pick --continue". hint: You can instead skip this commit: run "git cherry-pick --skip". hint: To abort and get back to the state before "git cherry-pick", hint: run "git cherry-pick --abort". Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by Hariom Verma <hariom18599@gmail.com>: Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Hepled-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- sequencer.c | 10 +++++++++- t/t3507-cherry-pick-conflict.sh | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sequencer.c b/sequencer.c index 83cf6a5da3c24e..bf7dbea81ddffb 100644 --- a/sequencer.c +++ b/sequencer.c @@ -404,7 +404,15 @@ static void print_advice(struct replay_opts *opts, int show_hint) if (msg) { advise("%s\n", msg); } else if (show_hint) { - if (opts->no_commit) + if (opts->action == REPLAY_PICK) { + advise(_("Resolve all conflicts manually, mark them as resolved with\n" + "\"git add/rm <conflicted_files>\", then run\n" + "\"git cherry-pick --continue\".\n" + "You can instead skip this commit: run \"git cherry-pick --skip\".\n" + "To abort and get back to the state before \"git cherry-pick\",\n" + "run \"git cherry-pick --abort\".")); + + } else if (opts->no_commit) advise(_("after resolving the conflicts, mark the corrected paths\n" "with 'git add <paths>' or 'git rm <paths>'")); else diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh index af5678d981ab59..e953b54e54d346 100755 --- a/t/t3507-cherry-pick-conflict.sh +++ b/t/t3507-cherry-pick-conflict.sh @@ -53,9 +53,12 @@ test_expect_success 'advice from failed cherry-pick' " picked=\$(git rev-parse --short picked) && cat <<-EOF >expected && error: could not apply \$picked... picked - hint: after resolving the conflicts, mark the corrected paths - hint: with 'git add <paths>' or 'git rm <paths>' - hint: and commit the result with 'git commit' + hint: Resolve all conflicts manually, mark them as resolved with + hint: \"git add/rm <conflicted_files>\", then run + hint: \"git cherry-pick --continue\". + hint: You can instead skip this commit: run \"git cherry-pick --skip\". + hint: To abort and get back to the state before \"git cherry-pick\", + hint: run \"git cherry-pick --abort\". EOF test_must_fail git cherry-pick picked 2>actual && @@ -68,8 +71,12 @@ test_expect_success 'advice from failed cherry-pick --no-commit' " picked=\$(git rev-parse --short picked) && cat <<-EOF >expected && error: could not apply \$picked... picked - hint: after resolving the conflicts, mark the corrected paths - hint: with 'git add <paths>' or 'git rm <paths>' + hint: Resolve all conflicts manually, mark them as resolved with + hint: \"git add/rm <conflicted_files>\", then run + hint: \"git cherry-pick --continue\". + hint: You can instead skip this commit: run \"git cherry-pick --skip\". + hint: To abort and get back to the state before \"git cherry-pick\", + hint: run \"git cherry-pick --abort\". EOF test_must_fail git cherry-pick --no-commit picked 2>actual && From c2223851649c88a4723b79e6c2fe87efe6feffc1 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:50 +0000 Subject: [PATCH 119/198] ssh signing: preliminary refactoring and clean-up Openssh v8.2p1 added some new options to ssh-keygen for signature creation and verification. These allow us to use ssh keys for git signatures easily. In our corporate environment we use PIV x509 Certs on Yubikeys for email signing/encryption and ssh keys which I think is quite common (at least for the email part). This way we can establish the correct trust for the SSH Keys without setting up a separate GPG Infrastructure (which is still quite painful for users) or implementing x509 signing support for git (which lacks good forwarding mechanisms). Using ssh agent forwarding makes this feature easily usable in todays development environments where code is often checked out in remote VMs / containers. In such a setup the keyring & revocationKeyring can be centrally generated from the x509 CA information and distributed to the users. To be able to implement new signing formats this commit: - makes the sigc structure more generic by renaming "gpg_output" to "output" - introduces function pointers in the gpg_format structure to call format specific signing and verification functions - moves format detection from verify_signed_buffer into the check_signature api function and calls the format specific verify - renames and wraps sign_buffer to handle format specific signing logic as well Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fmt-merge-msg.c | 6 +-- gpg-interface.c | 104 +++++++++++++++++++++++++++++------------------- gpg-interface.h | 2 +- log-tree.c | 8 ++-- pretty.c | 4 +- 5 files changed, 74 insertions(+), 50 deletions(-) diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c index 0f66818e0f839f..fb300bb4b67861 100644 --- a/fmt-merge-msg.c +++ b/fmt-merge-msg.c @@ -526,11 +526,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out) buf = payload.buf; len = payload.len; if (check_signature(payload.buf, payload.len, sig.buf, - sig.len, &sigc) && - !sigc.gpg_output) + sig.len, &sigc) && + !sigc.output) strbuf_addstr(&sig, "gpg verification failed.\n"); else - strbuf_addstr(&sig, sigc.gpg_output); + strbuf_addstr(&sig, sigc.output); } signature_check_clear(&sigc); diff --git a/gpg-interface.c b/gpg-interface.c index 127aecfc2b071f..db54b05416257d 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -15,6 +15,12 @@ struct gpg_format { const char *program; const char **verify_args; const char **sigs; + int (*verify_signed_buffer)(struct signature_check *sigc, + struct gpg_format *fmt, const char *payload, + size_t payload_size, const char *signature, + size_t signature_size); + int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key); }; static const char *openpgp_verify_args[] = { @@ -35,14 +41,29 @@ static const char *x509_sigs[] = { NULL }; +static int verify_gpg_signed_buffer(struct signature_check *sigc, + struct gpg_format *fmt, const char *payload, + size_t payload_size, const char *signature, + size_t signature_size); +static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key); + static struct gpg_format gpg_format[] = { - { .name = "openpgp", .program = "gpg", - .verify_args = openpgp_verify_args, - .sigs = openpgp_sigs + { + .name = "openpgp", + .program = "gpg", + .verify_args = openpgp_verify_args, + .sigs = openpgp_sigs, + .verify_signed_buffer = verify_gpg_signed_buffer, + .sign_buffer = sign_buffer_gpg, }, - { .name = "x509", .program = "gpgsm", - .verify_args = x509_verify_args, - .sigs = x509_sigs + { + .name = "x509", + .program = "gpgsm", + .verify_args = x509_verify_args, + .sigs = x509_sigs, + .verify_signed_buffer = verify_gpg_signed_buffer, + .sign_buffer = sign_buffer_gpg, }, }; @@ -72,7 +93,7 @@ static struct gpg_format *get_format_by_sig(const char *sig) void signature_check_clear(struct signature_check *sigc) { FREE_AND_NULL(sigc->payload); - FREE_AND_NULL(sigc->gpg_output); + FREE_AND_NULL(sigc->output); FREE_AND_NULL(sigc->gpg_status); FREE_AND_NULL(sigc->signer); FREE_AND_NULL(sigc->key); @@ -257,16 +278,16 @@ static void parse_gpg_output(struct signature_check *sigc) FREE_AND_NULL(sigc->key); } -static int verify_signed_buffer(const char *payload, size_t payload_size, - const char *signature, size_t signature_size, - struct strbuf *gpg_output, - struct strbuf *gpg_status) +static int verify_gpg_signed_buffer(struct signature_check *sigc, + struct gpg_format *fmt, const char *payload, + size_t payload_size, const char *signature, + size_t signature_size) { struct child_process gpg = CHILD_PROCESS_INIT; - struct gpg_format *fmt; struct tempfile *temp; int ret; - struct strbuf buf = STRBUF_INIT; + struct strbuf gpg_stdout = STRBUF_INIT; + struct strbuf gpg_stderr = STRBUF_INIT; temp = mks_tempfile_t(".git_vtag_tmpXXXXXX"); if (!temp) @@ -279,10 +300,6 @@ static int verify_signed_buffer(const char *payload, size_t payload_size, return -1; } - fmt = get_format_by_sig(signature); - if (!fmt) - BUG("bad signature '%s'", signature); - strvec_push(&gpg.args, fmt->program); strvec_pushv(&gpg.args, fmt->verify_args); strvec_pushl(&gpg.args, @@ -290,18 +307,22 @@ static int verify_signed_buffer(const char *payload, size_t payload_size, "--verify", temp->filename.buf, "-", NULL); - if (!gpg_status) - gpg_status = &buf; - sigchain_push(SIGPIPE, SIG_IGN); - ret = pipe_command(&gpg, payload, payload_size, - gpg_status, 0, gpg_output, 0); + ret = pipe_command(&gpg, payload, payload_size, &gpg_stdout, 0, + &gpg_stderr, 0); sigchain_pop(SIGPIPE); delete_tempfile(&temp); - ret |= !strstr(gpg_status->buf, "\n[GNUPG:] GOODSIG "); - strbuf_release(&buf); /* no matter it was used or not */ + ret |= !strstr(gpg_stdout.buf, "\n[GNUPG:] GOODSIG "); + sigc->payload = xmemdupz(payload, payload_size); + sigc->output = strbuf_detach(&gpg_stderr, NULL); + sigc->gpg_status = strbuf_detach(&gpg_stdout, NULL); + + parse_gpg_output(sigc); + + strbuf_release(&gpg_stdout); + strbuf_release(&gpg_stderr); return ret; } @@ -309,35 +330,32 @@ static int verify_signed_buffer(const char *payload, size_t payload_size, int check_signature(const char *payload, size_t plen, const char *signature, size_t slen, struct signature_check *sigc) { - struct strbuf gpg_output = STRBUF_INIT; - struct strbuf gpg_status = STRBUF_INIT; + struct gpg_format *fmt; int status; sigc->result = 'N'; sigc->trust_level = -1; - status = verify_signed_buffer(payload, plen, signature, slen, - &gpg_output, &gpg_status); - if (status && !gpg_output.len) - goto out; - sigc->payload = xmemdupz(payload, plen); - sigc->gpg_output = strbuf_detach(&gpg_output, NULL); - sigc->gpg_status = strbuf_detach(&gpg_status, NULL); - parse_gpg_output(sigc); + fmt = get_format_by_sig(signature); + if (!fmt) + die(_("bad/incompatible signature '%s'"), signature); + + status = fmt->verify_signed_buffer(sigc, fmt, payload, plen, signature, + slen); + + if (status && !sigc->output) + return !!status; + status |= sigc->result != 'G'; status |= sigc->trust_level < configured_min_trust_level; - out: - strbuf_release(&gpg_status); - strbuf_release(&gpg_output); - return !!status; } void print_signature_buffer(const struct signature_check *sigc, unsigned flags) { - const char *output = flags & GPG_VERIFY_RAW ? - sigc->gpg_status : sigc->gpg_output; + const char *output = flags & GPG_VERIFY_RAW ? sigc->gpg_status : + sigc->output; if (flags & GPG_VERIFY_VERBOSE && sigc->payload) fputs(sigc->payload, stdout); @@ -441,6 +459,12 @@ const char *get_signing_key(void) } int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *signing_key) +{ + return use_format->sign_buffer(buffer, signature, signing_key); +} + +static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key) { struct child_process gpg = CHILD_PROCESS_INIT; int ret; diff --git a/gpg-interface.h b/gpg-interface.h index 80567e4894868d..feac4decf8b79f 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -17,7 +17,7 @@ enum signature_trust_level { struct signature_check { char *payload; - char *gpg_output; + char *output; char *gpg_status; /* diff --git a/log-tree.c b/log-tree.c index 7b823786c2cba7..20af9bd1c82575 100644 --- a/log-tree.c +++ b/log-tree.c @@ -513,10 +513,10 @@ static void show_signature(struct rev_info *opt, struct commit *commit) status = check_signature(payload.buf, payload.len, signature.buf, signature.len, &sigc); - if (status && !sigc.gpg_output) + if (status && !sigc.output) show_sig_lines(opt, status, "No signature\n"); else - show_sig_lines(opt, status, sigc.gpg_output); + show_sig_lines(opt, status, sigc.output); signature_check_clear(&sigc); out: @@ -583,8 +583,8 @@ static int show_one_mergetag(struct commit *commit, /* could have a good signature */ status = check_signature(payload.buf, payload.len, signature.buf, signature.len, &sigc); - if (sigc.gpg_output) - strbuf_addstr(&verify_message, sigc.gpg_output); + if (sigc.output) + strbuf_addstr(&verify_message, sigc.output); else strbuf_addstr(&verify_message, "No signature\n"); signature_check_clear(&sigc); diff --git a/pretty.c b/pretty.c index b1ecd039cef29e..daa71394efd749 100644 --- a/pretty.c +++ b/pretty.c @@ -1432,8 +1432,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ check_commit_signature(c->commit, &(c->signature_check)); switch (placeholder[1]) { case 'G': - if (c->signature_check.gpg_output) - strbuf_addstr(sb, c->signature_check.gpg_output); + if (c->signature_check.output) + strbuf_addstr(sb, c->signature_check.output); break; case '?': switch (c->signature_check.result) { From 3a3fdc0b4ea60f884f3bdb18058111e47014c998 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:51 +0000 Subject: [PATCH 120/198] ssh signing: add test prereqs Generate some ssh keys and a allowedSignersFile for testing Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-gpg.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/t/lib-gpg.sh b/t/lib-gpg.sh index 9fc5241228e800..f99ef3e859dd43 100644 --- a/t/lib-gpg.sh +++ b/t/lib-gpg.sh @@ -87,6 +87,34 @@ test_lazy_prereq RFC1991 ' echo | gpg --homedir "${GNUPGHOME}" -b --rfc1991 >/dev/null ' +GPGSSH_KEY_PRIMARY="${GNUPGHOME}/ed25519_ssh_signing_key" +GPGSSH_KEY_SECONDARY="${GNUPGHOME}/rsa_2048_ssh_signing_key" +GPGSSH_KEY_UNTRUSTED="${GNUPGHOME}/untrusted_ssh_signing_key" +GPGSSH_KEY_WITH_PASSPHRASE="${GNUPGHOME}/protected_ssh_signing_key" +GPGSSH_KEY_PASSPHRASE="super_secret" +GPGSSH_ALLOWED_SIGNERS="${GNUPGHOME}/ssh.all_valid.allowedSignersFile" + +GPGSSH_GOOD_SIGNATURE_TRUSTED='Good "git" signature for' +GPGSSH_GOOD_SIGNATURE_UNTRUSTED='Good "git" signature with' +GPGSSH_KEY_NOT_TRUSTED="No principal matched" +GPGSSH_BAD_SIGNATURE="Signature verification failed" + +test_lazy_prereq GPGSSH ' + ssh_version=$(ssh-keygen -Y find-principals -n "git" 2>&1) + test $? != 127 || exit 1 + echo $ssh_version | grep -q "find-principals:missing signature file" + test $? = 0 || exit 1; + mkdir -p "${GNUPGHOME}" && + chmod 0700 "${GNUPGHOME}" && + ssh-keygen -t ed25519 -N "" -C "git ed25519 key" -f "${GPGSSH_KEY_PRIMARY}" >/dev/null && + echo "\"principal with number 1\" $(cat "${GPGSSH_KEY_PRIMARY}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" && + ssh-keygen -t rsa -b 2048 -N "" -C "git rsa2048 key" -f "${GPGSSH_KEY_SECONDARY}" >/dev/null && + echo "\"principal with number 2\" $(cat "${GPGSSH_KEY_SECONDARY}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" && + ssh-keygen -t ed25519 -N "${GPGSSH_KEY_PASSPHRASE}" -C "git ed25519 encrypted key" -f "${GPGSSH_KEY_WITH_PASSPHRASE}" >/dev/null && + echo "\"principal with number 3\" $(cat "${GPGSSH_KEY_WITH_PASSPHRASE}.pub")" >> "${GPGSSH_ALLOWED_SIGNERS}" && + ssh-keygen -t ed25519 -N "" -f "${GPGSSH_KEY_UNTRUSTED}" >/dev/null +' + sanitize_pgp() { perl -ne ' /^-----END PGP/ and $in_pgp = 0; From c7e2d30efec488ff34afa5560c3f0fa364d04fbf Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:52 +0000 Subject: [PATCH 121/198] ssh signing: add ssh key format and signing code Implements the actual sign_buffer_ssh operation and move some shared cleanup code into a strbuf function Set gpg.format = ssh and user.signingkey to either a ssh public key string (like from an authorized_keys file), or a ssh key file. If the key file or the config value itself contains only a public key then the private key needs to be available via ssh-agent. gpg.ssh.program can be set to an alternative location of ssh-keygen. A somewhat recent openssh version (8.2p1+) of ssh-keygen is needed for this feature. Since only ssh-keygen is needed it can this way be installed seperately without upgrading your system openssh packages. Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/gpg.txt | 4 +- Documentation/config/user.txt | 5 ++ gpg-interface.c | 138 ++++++++++++++++++++++++++++++++-- 3 files changed, 137 insertions(+), 10 deletions(-) diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt index d94025cb3684d8..88531b15f0ff32 100644 --- a/Documentation/config/gpg.txt +++ b/Documentation/config/gpg.txt @@ -11,13 +11,13 @@ gpg.program:: gpg.format:: Specifies which key format to use when signing with `--gpg-sign`. - Default is "openpgp" and another possible value is "x509". + Default is "openpgp". Other possible values are "x509", "ssh". gpg.<format>.program:: Use this to customize the program used for the signing format you chose. (see `gpg.program` and `gpg.format`) `gpg.program` can still be used as a legacy synonym for `gpg.openpgp.program`. The default - value for `gpg.x509.program` is "gpgsm". + value for `gpg.x509.program` is "gpgsm" and `gpg.ssh.program` is "ssh-keygen". gpg.minTrustLevel:: Specifies a minimum trust level for signature verification. If diff --git a/Documentation/config/user.txt b/Documentation/config/user.txt index 59aec7c3aed32a..2155128957c93e 100644 --- a/Documentation/config/user.txt +++ b/Documentation/config/user.txt @@ -36,3 +36,8 @@ user.signingKey:: commit, you can override the default selection with this variable. This option is passed unchanged to gpg's --local-user parameter, so you may specify a key using any method that gpg supports. + If gpg.format is set to "ssh" this can contain the literal ssh public + key (e.g.: "ssh-rsa XXXXXX identifier") or a file which contains it and + corresponds to the private key used for signing. The private key + needs to be available via ssh-agent. Alternatively it can be set to + a file containing a private key directly. diff --git a/gpg-interface.c b/gpg-interface.c index db54b05416257d..7ca682ac6d6849 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -41,12 +41,20 @@ static const char *x509_sigs[] = { NULL }; +static const char *ssh_verify_args[] = { NULL }; +static const char *ssh_sigs[] = { + "-----BEGIN SSH SIGNATURE-----", + NULL +}; + static int verify_gpg_signed_buffer(struct signature_check *sigc, struct gpg_format *fmt, const char *payload, size_t payload_size, const char *signature, size_t signature_size); static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, const char *signing_key); +static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key); static struct gpg_format gpg_format[] = { { @@ -65,6 +73,14 @@ static struct gpg_format gpg_format[] = { .verify_signed_buffer = verify_gpg_signed_buffer, .sign_buffer = sign_buffer_gpg, }, + { + .name = "ssh", + .program = "ssh-keygen", + .verify_args = ssh_verify_args, + .sigs = ssh_sigs, + .verify_signed_buffer = NULL, /* TODO */ + .sign_buffer = sign_buffer_ssh + }, }; static struct gpg_format *use_format = &gpg_format[0]; @@ -443,6 +459,9 @@ int git_gpg_config(const char *var, const char *value, void *cb) if (!strcmp(var, "gpg.x509.program")) fmtname = "x509"; + if (!strcmp(var, "gpg.ssh.program")) + fmtname = "ssh"; + if (fmtname) { fmt = get_format_by_name(fmtname); return git_config_string(&fmt->program, var, value); @@ -463,12 +482,30 @@ int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *sig return use_format->sign_buffer(buffer, signature, signing_key); } +/* + * Strip CR from the line endings, in case we are on Windows. + * NEEDSWORK: make it trim only CRs before LFs and rename + */ +static void remove_cr_after(struct strbuf *buffer, size_t offset) +{ + size_t i, j; + + for (i = j = offset; i < buffer->len; i++) { + if (buffer->buf[i] != '\r') { + if (i != j) + buffer->buf[j] = buffer->buf[i]; + j++; + } + } + strbuf_setlen(buffer, j); +} + static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, const char *signing_key) { struct child_process gpg = CHILD_PROCESS_INIT; int ret; - size_t i, j, bottom; + size_t bottom; struct strbuf gpg_status = STRBUF_INIT; strvec_pushl(&gpg.args, @@ -494,13 +531,98 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, return error(_("gpg failed to sign the data")); /* Strip CR from the line endings, in case we are on Windows. */ - for (i = j = bottom; i < signature->len; i++) - if (signature->buf[i] != '\r') { - if (i != j) - signature->buf[j] = signature->buf[i]; - j++; - } - strbuf_setlen(signature, j); + remove_cr_after(signature, bottom); return 0; } + +static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key) +{ + struct child_process signer = CHILD_PROCESS_INIT; + int ret = -1; + size_t bottom, keylen; + struct strbuf signer_stderr = STRBUF_INIT; + struct tempfile *key_file = NULL, *buffer_file = NULL; + char *ssh_signing_key_file = NULL; + struct strbuf ssh_signature_filename = STRBUF_INIT; + + if (!signing_key || signing_key[0] == '\0') + return error( + _("user.signingkey needs to be set for ssh signing")); + + if (starts_with(signing_key, "ssh-")) { + /* A literal ssh key */ + key_file = mks_tempfile_t(".git_signing_key_tmpXXXXXX"); + if (!key_file) + return error_errno( + _("could not create temporary file")); + keylen = strlen(signing_key); + if (write_in_full(key_file->fd, signing_key, keylen) < 0 || + close_tempfile_gently(key_file) < 0) { + error_errno(_("failed writing ssh signing key to '%s'"), + key_file->filename.buf); + goto out; + } + ssh_signing_key_file = strbuf_detach(&key_file->filename, NULL); + } else { + /* We assume a file */ + ssh_signing_key_file = expand_user_path(signing_key, 1); + } + + buffer_file = mks_tempfile_t(".git_signing_buffer_tmpXXXXXX"); + if (!buffer_file) { + error_errno(_("could not create temporary file")); + goto out; + } + + if (write_in_full(buffer_file->fd, buffer->buf, buffer->len) < 0 || + close_tempfile_gently(buffer_file) < 0) { + error_errno(_("failed writing ssh signing key buffer to '%s'"), + buffer_file->filename.buf); + goto out; + } + + strvec_pushl(&signer.args, use_format->program, + "-Y", "sign", + "-n", "git", + "-f", ssh_signing_key_file, + buffer_file->filename.buf, + NULL); + + sigchain_push(SIGPIPE, SIG_IGN); + ret = pipe_command(&signer, NULL, 0, NULL, 0, &signer_stderr, 0); + sigchain_pop(SIGPIPE); + + if (ret) { + if (strstr(signer_stderr.buf, "usage:")) + error(_("ssh-keygen -Y sign is needed for ssh signing (available in openssh version 8.2p1+)")); + + error("%s", signer_stderr.buf); + goto out; + } + + bottom = signature->len; + + strbuf_addbuf(&ssh_signature_filename, &buffer_file->filename); + strbuf_addstr(&ssh_signature_filename, ".sig"); + if (strbuf_read_file(signature, ssh_signature_filename.buf, 0) < 0) { + error_errno( + _("failed reading ssh signing data buffer from '%s'"), + ssh_signature_filename.buf); + } + unlink_or_warn(ssh_signature_filename.buf); + + /* Strip CR from the line endings, in case we are on Windows. */ + remove_cr_after(signature, bottom); + +out: + if (key_file) + delete_tempfile(&key_file); + if (buffer_file) + delete_tempfile(&buffer_file); + strbuf_release(&signer_stderr); + strbuf_release(&ssh_signature_filename); + FREE_AND_NULL(ssh_signing_key_file); + return ret; +} From 54937221225569332b92f6cd7876d4fa08dc3277 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:53 +0000 Subject: [PATCH 122/198] ssh signing: retrieve a default key from ssh-agent If user.signingkey is not set and a ssh signature is requested we call gpg.ssh.defaultKeyCommand (typically "ssh-add -L") and use the first key we get Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/gpg.txt | 6 +++ Documentation/config/user.txt | 4 +- gpg-interface.c | 70 ++++++++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt index 88531b15f0ff32..9b95dd280c3749 100644 --- a/Documentation/config/gpg.txt +++ b/Documentation/config/gpg.txt @@ -33,3 +33,9 @@ gpg.minTrustLevel:: * `marginal` * `fully` * `ultimate` + +gpg.ssh.defaultKeyCommand: + This command that will be run when user.signingkey is not set and a ssh + signature is requested. On successful exit a valid ssh public key is + expected in the first line of its output. To automatically use the first + available key from your ssh-agent set this to "ssh-add -L". diff --git a/Documentation/config/user.txt b/Documentation/config/user.txt index 2155128957c93e..ad78dce9ecbfc6 100644 --- a/Documentation/config/user.txt +++ b/Documentation/config/user.txt @@ -40,4 +40,6 @@ user.signingKey:: key (e.g.: "ssh-rsa XXXXXX identifier") or a file which contains it and corresponds to the private key used for signing. The private key needs to be available via ssh-agent. Alternatively it can be set to - a file containing a private key directly. + a file containing a private key directly. If not set git will call + gpg.ssh.defaultKeyCommand (e.g.: "ssh-add -L") and try to use the first + key available. diff --git a/gpg-interface.c b/gpg-interface.c index 7ca682ac6d6849..3a0cca1b1d2109 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -6,8 +6,10 @@ #include "gpg-interface.h" #include "sigchain.h" #include "tempfile.h" +#include "alias.h" static char *configured_signing_key; +static const char *ssh_default_key_command; static enum signature_trust_level configured_min_trust_level = TRUST_UNDEFINED; struct gpg_format { @@ -21,6 +23,7 @@ struct gpg_format { size_t signature_size); int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature, const char *signing_key); + const char *(*get_default_key)(void); }; static const char *openpgp_verify_args[] = { @@ -56,6 +59,8 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, const char *signing_key); +static const char *get_default_ssh_signing_key(void); + static struct gpg_format gpg_format[] = { { .name = "openpgp", @@ -64,6 +69,7 @@ static struct gpg_format gpg_format[] = { .sigs = openpgp_sigs, .verify_signed_buffer = verify_gpg_signed_buffer, .sign_buffer = sign_buffer_gpg, + .get_default_key = NULL, }, { .name = "x509", @@ -72,6 +78,7 @@ static struct gpg_format gpg_format[] = { .sigs = x509_sigs, .verify_signed_buffer = verify_gpg_signed_buffer, .sign_buffer = sign_buffer_gpg, + .get_default_key = NULL, }, { .name = "ssh", @@ -79,7 +86,8 @@ static struct gpg_format gpg_format[] = { .verify_args = ssh_verify_args, .sigs = ssh_sigs, .verify_signed_buffer = NULL, /* TODO */ - .sign_buffer = sign_buffer_ssh + .sign_buffer = sign_buffer_ssh, + .get_default_key = get_default_ssh_signing_key, }, }; @@ -453,6 +461,12 @@ int git_gpg_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "gpg.ssh.defaultkeycommand")) { + if (!value) + return config_error_nonbool(var); + return git_config_string(&ssh_default_key_command, var, value); + } + if (!strcmp(var, "gpg.program") || !strcmp(var, "gpg.openpgp.program")) fmtname = "openpgp"; @@ -470,11 +484,63 @@ int git_gpg_config(const char *var, const char *value, void *cb) return 0; } +/* Returns the first public key from an ssh-agent to use for signing */ +static const char *get_default_ssh_signing_key(void) +{ + struct child_process ssh_default_key = CHILD_PROCESS_INIT; + int ret = -1; + struct strbuf key_stdout = STRBUF_INIT, key_stderr = STRBUF_INIT; + struct strbuf **keys; + char *key_command = NULL; + const char **argv; + int n; + char *default_key = NULL; + + if (!ssh_default_key_command) + die(_("either user.signingkey or gpg.ssh.defaultKeyCommand needs to be configured")); + + key_command = xstrdup(ssh_default_key_command); + n = split_cmdline(key_command, &argv); + + if (n < 0) + die("malformed build-time gpg.ssh.defaultKeyCommand: %s", + split_cmdline_strerror(n)); + + strvec_pushv(&ssh_default_key.args, argv); + ret = pipe_command(&ssh_default_key, NULL, 0, &key_stdout, 0, + &key_stderr, 0); + + if (!ret) { + keys = strbuf_split_max(&key_stdout, '\n', 2); + if (keys[0] && starts_with(keys[0]->buf, "ssh-")) { + default_key = strbuf_detach(keys[0], NULL); + } else { + warning(_("gpg.ssh.defaultKeycommand succeeded but returned no keys: %s %s"), + key_stderr.buf, key_stdout.buf); + } + + strbuf_list_free(keys); + } else { + warning(_("gpg.ssh.defaultKeyCommand failed: %s %s"), + key_stderr.buf, key_stdout.buf); + } + + free(key_command); + free(argv); + strbuf_release(&key_stdout); + + return default_key; +} + const char *get_signing_key(void) { if (configured_signing_key) return configured_signing_key; - return git_committer_info(IDENT_STRICT|IDENT_NO_DATE); + if (use_format->get_default_key) { + return use_format->get_default_key(); + } + + return git_committer_info(IDENT_STRICT | IDENT_NO_DATE); } int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *signing_key) From 6869f1f60c1e9c9fa226d8d67ce11df904aaa963 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:54 +0000 Subject: [PATCH 123/198] ssh signing: provide a textual signing_key_id For ssh the user.signingkey can be a filename/path or even a literal ssh pubkey. In push certs and textual output we prefer the ssh fingerprint instead. Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- gpg-interface.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ gpg-interface.h | 6 ++++++ send-pack.c | 8 +++---- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/gpg-interface.c b/gpg-interface.c index 3a0cca1b1d2109..0f1c6a02e53257 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -24,6 +24,7 @@ struct gpg_format { int (*sign_buffer)(struct strbuf *buffer, struct strbuf *signature, const char *signing_key); const char *(*get_default_key)(void); + const char *(*get_key_id)(void); }; static const char *openpgp_verify_args[] = { @@ -61,6 +62,8 @@ static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, static const char *get_default_ssh_signing_key(void); +static const char *get_ssh_key_id(void); + static struct gpg_format gpg_format[] = { { .name = "openpgp", @@ -70,6 +73,7 @@ static struct gpg_format gpg_format[] = { .verify_signed_buffer = verify_gpg_signed_buffer, .sign_buffer = sign_buffer_gpg, .get_default_key = NULL, + .get_key_id = NULL, }, { .name = "x509", @@ -79,6 +83,7 @@ static struct gpg_format gpg_format[] = { .verify_signed_buffer = verify_gpg_signed_buffer, .sign_buffer = sign_buffer_gpg, .get_default_key = NULL, + .get_key_id = NULL, }, { .name = "ssh", @@ -88,6 +93,7 @@ static struct gpg_format gpg_format[] = { .verify_signed_buffer = NULL, /* TODO */ .sign_buffer = sign_buffer_ssh, .get_default_key = get_default_ssh_signing_key, + .get_key_id = get_ssh_key_id, }, }; @@ -484,6 +490,41 @@ int git_gpg_config(const char *var, const char *value, void *cb) return 0; } +static char *get_ssh_key_fingerprint(const char *signing_key) +{ + struct child_process ssh_keygen = CHILD_PROCESS_INIT; + int ret = -1; + struct strbuf fingerprint_stdout = STRBUF_INIT; + struct strbuf **fingerprint; + + /* + * With SSH Signing this can contain a filename or a public key + * For textual representation we usually want a fingerprint + */ + if (starts_with(signing_key, "ssh-")) { + strvec_pushl(&ssh_keygen.args, "ssh-keygen", "-lf", "-", NULL); + ret = pipe_command(&ssh_keygen, signing_key, + strlen(signing_key), &fingerprint_stdout, 0, + NULL, 0); + } else { + strvec_pushl(&ssh_keygen.args, "ssh-keygen", "-lf", + configured_signing_key, NULL); + ret = pipe_command(&ssh_keygen, NULL, 0, &fingerprint_stdout, 0, + NULL, 0); + } + + if (!!ret) + die_errno(_("failed to get the ssh fingerprint for key '%s'"), + signing_key); + + fingerprint = strbuf_split_max(&fingerprint_stdout, ' ', 3); + if (!fingerprint[1]) + die_errno(_("failed to get the ssh fingerprint for key '%s'"), + signing_key); + + return strbuf_detach(fingerprint[1], NULL); +} + /* Returns the first public key from an ssh-agent to use for signing */ static const char *get_default_ssh_signing_key(void) { @@ -532,6 +573,21 @@ static const char *get_default_ssh_signing_key(void) return default_key; } +static const char *get_ssh_key_id(void) { + return get_ssh_key_fingerprint(get_signing_key()); +} + +/* Returns a textual but unique representation of the signing key */ +const char *get_signing_key_id(void) +{ + if (use_format->get_key_id) { + return use_format->get_key_id(); + } + + /* GPG/GPGSM only store a key id on this variable */ + return get_signing_key(); +} + const char *get_signing_key(void) { if (configured_signing_key) diff --git a/gpg-interface.h b/gpg-interface.h index feac4decf8b79f..beefacbb1e9025 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -64,6 +64,12 @@ int sign_buffer(struct strbuf *buffer, struct strbuf *signature, int git_gpg_config(const char *, const char *, void *); void set_signing_key(const char *); const char *get_signing_key(void); + +/* + * Returns a textual unique representation of the signing key in use + * Either a GPG KeyID or a SSH Key Fingerprint + */ +const char *get_signing_key_id(void); int check_signature(const char *payload, size_t plen, const char *signature, size_t slen, struct signature_check *sigc); diff --git a/send-pack.c b/send-pack.c index 5a79e0e7110319..50cca7e439b34b 100644 --- a/send-pack.c +++ b/send-pack.c @@ -341,13 +341,13 @@ static int generate_push_cert(struct strbuf *req_buf, { const struct ref *ref; struct string_list_item *item; - char *signing_key = xstrdup(get_signing_key()); + char *signing_key_id = xstrdup(get_signing_key_id()); const char *cp, *np; struct strbuf cert = STRBUF_INIT; int update_seen = 0; strbuf_addstr(&cert, "certificate version 0.1\n"); - strbuf_addf(&cert, "pusher %s ", signing_key); + strbuf_addf(&cert, "pusher %s ", signing_key_id); datestamp(&cert); strbuf_addch(&cert, '\n'); if (args->url && *args->url) { @@ -374,7 +374,7 @@ static int generate_push_cert(struct strbuf *req_buf, if (!update_seen) goto free_return; - if (sign_buffer(&cert, &cert, signing_key)) + if (sign_buffer(&cert, &cert, get_signing_key())) die(_("failed to sign the push certificate")); packet_buf_write(req_buf, "push-cert%c%s", 0, cap_string); @@ -386,7 +386,7 @@ static int generate_push_cert(struct strbuf *req_buf, packet_buf_write(req_buf, "push-cert-end\n"); free_return: - free(signing_key); + free(signing_key_id); strbuf_release(&cert); return update_seen; } From 9048bb3c9b829cb19298532e960a1e735b2f1b18 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:55 +0000 Subject: [PATCH 124/198] ssh signing: verify signatures using ssh-keygen To verify a ssh signature we first call ssh-keygen -Y find-principal to look up the signing principal by their public key from the allowedSignersFile. If the key is found then we do a verify. Otherwise we only validate the signature but can not verify the signers identity. Verification uses the gpg.ssh.allowedSignersFile (see ssh-keygen(1) "ALLOWED SIGNERS") which contains valid public keys and a principal (usually user@domain). Depending on the environment this file can be managed by the individual developer or for example generated by the central repository server from known ssh keys with push access. This file is usually stored outside the repository, but if the repository only allows signed commits/pushes, the user might choose to store it in the repository. To revoke a key put the public key without the principal prefix into gpg.ssh.revocationKeyring or generate a KRL (see ssh-keygen(1) "KEY REVOCATION LISTS"). The same considerations about who to trust for verification as with the allowedSignersFile apply. Using SSH CA Keys with these files is also possible. Add "cert-authority" as key option between the principal and the key to mark it as a CA and all keys signed by it as valid for this CA. See "CERTIFICATES" in ssh-keygen(1). Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/gpg.txt | 35 ++++++ builtin/receive-pack.c | 4 + gpg-interface.c | 209 ++++++++++++++++++++++++++++++++++- 3 files changed, 246 insertions(+), 2 deletions(-) diff --git a/Documentation/config/gpg.txt b/Documentation/config/gpg.txt index 9b95dd280c3749..51a756b2f15f85 100644 --- a/Documentation/config/gpg.txt +++ b/Documentation/config/gpg.txt @@ -39,3 +39,38 @@ gpg.ssh.defaultKeyCommand: signature is requested. On successful exit a valid ssh public key is expected in the first line of its output. To automatically use the first available key from your ssh-agent set this to "ssh-add -L". + +gpg.ssh.allowedSignersFile:: + A file containing ssh public keys which you are willing to trust. + The file consists of one or more lines of principals followed by an ssh + public key. + e.g.: user1@example.com,user2@example.com ssh-rsa AAAAX1... + See ssh-keygen(1) "ALLOWED SIGNERS" for details. + The principal is only used to identify the key and is available when + verifying a signature. ++ +SSH has no concept of trust levels like gpg does. To be able to differentiate +between valid signatures and trusted signatures the trust level of a signature +verification is set to `fully` when the public key is present in the allowedSignersFile. +Therefore to only mark fully trusted keys as verified set gpg.minTrustLevel to `fully`. +Otherwise valid but untrusted signatures will still verify but show no principal +name of the signer. ++ +This file can be set to a location outside of the repository and every developer +maintains their own trust store. A central repository server could generate this +file automatically from ssh keys with push access to verify the code against. +In a corporate setting this file is probably generated at a global location +from automation that already handles developer ssh keys. ++ +A repository that only allows signed commits can store the file +in the repository itself using a path relative to the top-level of the working tree. +This way only committers with an already valid key can add or change keys in the keyring. ++ +Using a SSH CA key with the cert-authority option +(see ssh-keygen(1) "CERTIFICATES") is also valid. + +gpg.ssh.revocationFile:: + Either a SSH KRL or a list of revoked public keys (without the principal prefix). + See ssh-keygen(1) for details. + If a public key is found in this file then it will always be treated + as having trust level "never" and signatures will show as invalid. diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a34742513aca7e..f17c7d2246b031 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -131,6 +131,10 @@ static int receive_pack_config(const char *var, const char *value, void *cb) { int status = parse_hide_refs_config(var, value, "receive"); + if (status) + return status; + + status = git_gpg_config(var, value, NULL); if (status) return status; diff --git a/gpg-interface.c b/gpg-interface.c index 0f1c6a02e53257..9c1ef11a563f3f 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -3,13 +3,14 @@ #include "config.h" #include "run-command.h" #include "strbuf.h" +#include "dir.h" #include "gpg-interface.h" #include "sigchain.h" #include "tempfile.h" #include "alias.h" static char *configured_signing_key; -static const char *ssh_default_key_command; +static const char *ssh_default_key_command, *ssh_allowed_signers, *ssh_revocation_file; static enum signature_trust_level configured_min_trust_level = TRUST_UNDEFINED; struct gpg_format { @@ -55,6 +56,10 @@ static int verify_gpg_signed_buffer(struct signature_check *sigc, struct gpg_format *fmt, const char *payload, size_t payload_size, const char *signature, size_t signature_size); +static int verify_ssh_signed_buffer(struct signature_check *sigc, + struct gpg_format *fmt, const char *payload, + size_t payload_size, const char *signature, + size_t signature_size); static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, const char *signing_key); static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, @@ -90,7 +95,7 @@ static struct gpg_format gpg_format[] = { .program = "ssh-keygen", .verify_args = ssh_verify_args, .sigs = ssh_sigs, - .verify_signed_buffer = NULL, /* TODO */ + .verify_signed_buffer = verify_ssh_signed_buffer, .sign_buffer = sign_buffer_ssh, .get_default_key = get_default_ssh_signing_key, .get_key_id = get_ssh_key_id, @@ -357,6 +362,194 @@ static int verify_gpg_signed_buffer(struct signature_check *sigc, return ret; } +static void parse_ssh_output(struct signature_check *sigc) +{ + const char *line, *principal, *search; + char *key = NULL; + + /* + * ssh-keygen output should be: + * Good "git" signature for PRINCIPAL with RSA key SHA256:FINGERPRINT + * + * or for valid but unknown keys: + * Good "git" signature with RSA key SHA256:FINGERPRINT + * + * Note that "PRINCIPAL" can contain whitespace, "RSA" and + * "SHA256" part could be a different token that names of + * the algorithms used, and "FINGERPRINT" is a hexadecimal + * string. By finding the last occurence of " with ", we can + * reliably parse out the PRINCIPAL. + */ + sigc->result = 'B'; + sigc->trust_level = TRUST_NEVER; + + line = xmemdupz(sigc->output, strcspn(sigc->output, "\n")); + + if (skip_prefix(line, "Good \"git\" signature for ", &line)) { + /* Valid signature and known principal */ + sigc->result = 'G'; + sigc->trust_level = TRUST_FULLY; + + /* Search for the last "with" to get the full principal */ + principal = line; + do { + search = strstr(line, " with "); + if (search) + line = search + 1; + } while (search != NULL); + sigc->signer = xmemdupz(principal, line - principal - 1); + } else if (skip_prefix(line, "Good \"git\" signature with ", &line)) { + /* Valid signature, but key unknown */ + sigc->result = 'G'; + sigc->trust_level = TRUST_UNDEFINED; + } else { + return; + } + + key = strstr(line, "key"); + if (key) { + sigc->fingerprint = xstrdup(strstr(line, "key") + 4); + sigc->key = xstrdup(sigc->fingerprint); + } else { + /* + * Output did not match what we expected + * Treat the signature as bad + */ + sigc->result = 'B'; + } +} + +static int verify_ssh_signed_buffer(struct signature_check *sigc, + struct gpg_format *fmt, const char *payload, + size_t payload_size, const char *signature, + size_t signature_size) +{ + struct child_process ssh_keygen = CHILD_PROCESS_INIT; + struct tempfile *buffer_file; + int ret = -1; + const char *line; + size_t trust_size; + char *principal; + struct strbuf ssh_keygen_out = STRBUF_INIT; + struct strbuf ssh_keygen_err = STRBUF_INIT; + + if (!ssh_allowed_signers) { + error(_("gpg.ssh.allowedSignersFile needs to be configured and exist for ssh signature verification")); + return -1; + } + + buffer_file = mks_tempfile_t(".git_vtag_tmpXXXXXX"); + if (!buffer_file) + return error_errno(_("could not create temporary file")); + if (write_in_full(buffer_file->fd, signature, signature_size) < 0 || + close_tempfile_gently(buffer_file) < 0) { + error_errno(_("failed writing detached signature to '%s'"), + buffer_file->filename.buf); + delete_tempfile(&buffer_file); + return -1; + } + + /* Find the principal from the signers */ + strvec_pushl(&ssh_keygen.args, fmt->program, + "-Y", "find-principals", + "-f", ssh_allowed_signers, + "-s", buffer_file->filename.buf, + NULL); + ret = pipe_command(&ssh_keygen, NULL, 0, &ssh_keygen_out, 0, + &ssh_keygen_err, 0); + if (ret && strstr(ssh_keygen_err.buf, "usage:")) { + error(_("ssh-keygen -Y find-principals/verify is needed for ssh signature verification (available in openssh version 8.2p1+)")); + goto out; + } + if (ret || !ssh_keygen_out.len) { + /* + * We did not find a matching principal in the allowedSigners + * Check without validation + */ + child_process_init(&ssh_keygen); + strvec_pushl(&ssh_keygen.args, fmt->program, + "-Y", "check-novalidate", + "-n", "git", + "-s", buffer_file->filename.buf, + NULL); + pipe_command(&ssh_keygen, payload, payload_size, + &ssh_keygen_out, 0, &ssh_keygen_err, 0); + + /* + * Fail on unknown keys + * we still call check-novalidate to display the signature info + */ + ret = -1; + } else { + /* Check every principal we found (one per line) */ + for (line = ssh_keygen_out.buf; *line; + line = strchrnul(line + 1, '\n')) { + while (*line == '\n') + line++; + if (!*line) + break; + + trust_size = strcspn(line, "\n"); + principal = xmemdupz(line, trust_size); + + child_process_init(&ssh_keygen); + strbuf_release(&ssh_keygen_out); + strbuf_release(&ssh_keygen_err); + strvec_push(&ssh_keygen.args, fmt->program); + /* + * We found principals + * Try with each until we find a match + */ + strvec_pushl(&ssh_keygen.args, "-Y", "verify", + "-n", "git", + "-f", ssh_allowed_signers, + "-I", principal, + "-s", buffer_file->filename.buf, + NULL); + + if (ssh_revocation_file) { + if (file_exists(ssh_revocation_file)) { + strvec_pushl(&ssh_keygen.args, "-r", + ssh_revocation_file, NULL); + } else { + warning(_("ssh signing revocation file configured but not found: %s"), + ssh_revocation_file); + } + } + + sigchain_push(SIGPIPE, SIG_IGN); + ret = pipe_command(&ssh_keygen, payload, payload_size, + &ssh_keygen_out, 0, &ssh_keygen_err, 0); + sigchain_pop(SIGPIPE); + + FREE_AND_NULL(principal); + + if (!ret) + ret = !starts_with(ssh_keygen_out.buf, "Good"); + + if (!ret) + break; + } + } + + sigc->payload = xmemdupz(payload, payload_size); + strbuf_stripspace(&ssh_keygen_out, 0); + strbuf_stripspace(&ssh_keygen_err, 0); + strbuf_add(&ssh_keygen_out, ssh_keygen_err.buf, ssh_keygen_err.len); + sigc->output = strbuf_detach(&ssh_keygen_out, NULL); + sigc->gpg_status = xstrdup(sigc->output); + + parse_ssh_output(sigc); + +out: + if (buffer_file) + delete_tempfile(&buffer_file); + strbuf_release(&ssh_keygen_out); + strbuf_release(&ssh_keygen_err); + + return ret; +} + int check_signature(const char *payload, size_t plen, const char *signature, size_t slen, struct signature_check *sigc) { @@ -473,6 +666,18 @@ int git_gpg_config(const char *var, const char *value, void *cb) return git_config_string(&ssh_default_key_command, var, value); } + if (!strcmp(var, "gpg.ssh.allowedsignersfile")) { + if (!value) + return config_error_nonbool(var); + return git_config_string(&ssh_allowed_signers, var, value); + } + + if (!strcmp(var, "gpg.ssh.revocationfile")) { + if (!value) + return config_error_nonbool(var); + return git_config_string(&ssh_revocation_file, var, value); + } + if (!strcmp(var, "gpg.program") || !strcmp(var, "gpg.openpgp.program")) fmtname = "openpgp"; From 587967698ab2d679a53f79162a35fda6e2eaac08 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:56 +0000 Subject: [PATCH 125/198] ssh signing: duplicate t7510 tests for commits Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7528-signed-commit-ssh.sh | 398 +++++++++++++++++++++++++++++++++++ 1 file changed, 398 insertions(+) create mode 100755 t/t7528-signed-commit-ssh.sh diff --git a/t/t7528-signed-commit-ssh.sh b/t/t7528-signed-commit-ssh.sh new file mode 100755 index 00000000000000..3e093168eef571 --- /dev/null +++ b/t/t7528-signed-commit-ssh.sh @@ -0,0 +1,398 @@ +#!/bin/sh + +test_description='ssh signed commit tests' +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +GNUPGHOME_NOT_USED=$GNUPGHOME +. "$TEST_DIRECTORY/lib-gpg.sh" + +test_expect_success GPGSSH 'create signed commits' ' + test_oid_cache <<-\EOF && + header sha1:gpgsig + header sha256:gpgsig-sha256 + EOF + + test_when_finished "test_unconfig commit.gpgsign" && + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + + echo 1 >file && git add file && + test_tick && git commit -S -m initial && + git tag initial && + git branch side && + + echo 2 >file && test_tick && git commit -a -S -m second && + git tag second && + + git checkout side && + echo 3 >elif && git add elif && + test_tick && git commit -m "third on side" && + + git checkout main && + test_tick && git merge -S side && + git tag merge && + + echo 4 >file && test_tick && git commit -a -m "fourth unsigned" && + git tag fourth-unsigned && + + test_tick && git commit --amend -S -m "fourth signed" && + git tag fourth-signed && + + git config commit.gpgsign true && + echo 5 >file && test_tick && git commit -a -m "fifth signed" && + git tag fifth-signed && + + git config commit.gpgsign false && + echo 6 >file && test_tick && git commit -a -m "sixth" && + git tag sixth-unsigned && + + git config commit.gpgsign true && + echo 7 >file && test_tick && git commit -a -m "seventh" --no-gpg-sign && + git tag seventh-unsigned && + + test_tick && git rebase -f HEAD^^ && git tag sixth-signed HEAD^ && + git tag seventh-signed && + + echo 8 >file && test_tick && git commit -a -m eighth -S"${GPGSSH_KEY_UNTRUSTED}" && + git tag eighth-signed-alt && + + # commit.gpgsign is still on but this must not be signed + echo 9 | git commit-tree HEAD^{tree} >oid && + test_line_count = 1 oid && + git tag ninth-unsigned $(cat oid) && + # explicit -S of course must sign. + echo 10 | git commit-tree -S HEAD^{tree} >oid && + test_line_count = 1 oid && + git tag tenth-signed $(cat oid) && + + # --gpg-sign[=<key-id>] must sign. + echo 11 | git commit-tree --gpg-sign HEAD^{tree} >oid && + test_line_count = 1 oid && + git tag eleventh-signed $(cat oid) && + echo 12 | git commit-tree --gpg-sign="${GPGSSH_KEY_UNTRUSTED}" HEAD^{tree} >oid && + test_line_count = 1 oid && + git tag twelfth-signed-alt $(cat oid) +' + +test_expect_success GPGSSH 'verify and show signatures' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_config gpg.mintrustlevel UNDEFINED && + ( + for commit in initial second merge fourth-signed \ + fifth-signed sixth-signed seventh-signed tenth-signed \ + eleventh-signed + do + git verify-commit $commit && + git show --pretty=short --show-signature $commit >actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $commit OK || exit 1 + done + ) && + ( + for commit in merge^2 fourth-unsigned sixth-unsigned \ + seventh-unsigned ninth-unsigned + do + test_must_fail git verify-commit $commit && + git show --pretty=short --show-signature $commit >actual && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $commit OK || exit 1 + done + ) && + ( + for commit in eighth-signed-alt twelfth-signed-alt + do + git show --pretty=short --show-signature $commit >actual && + grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + grep "${KEY_NOT_TRUSTED}" actual && + echo $commit OK || exit 1 + done + ) +' + +test_expect_success GPGSSH 'verify-commit exits failure on untrusted signature' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_must_fail git verify-commit eighth-signed-alt 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + grep "${KEY_NOT_TRUSTED}" actual +' + +test_expect_success GPGSSH 'verify-commit exits success with matching minTrustLevel' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_config gpg.minTrustLevel fully && + git verify-commit sixth-signed +' + +test_expect_success GPGSSH 'verify-commit exits success with low minTrustLevel' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_config gpg.minTrustLevel marginal && + git verify-commit sixth-signed +' + +test_expect_success GPGSSH 'verify-commit exits failure with high minTrustLevel' ' + test_config gpg.minTrustLevel ultimate && + test_must_fail git verify-commit eighth-signed-alt +' + +test_expect_success GPGSSH 'verify signatures with --raw' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + ( + for commit in initial second merge fourth-signed fifth-signed sixth-signed seventh-signed + do + git verify-commit --raw $commit 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $commit OK || exit 1 + done + ) && + ( + for commit in merge^2 fourth-unsigned sixth-unsigned seventh-unsigned + do + test_must_fail git verify-commit --raw $commit 2>actual && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $commit OK || exit 1 + done + ) && + ( + for commit in eighth-signed-alt + do + test_must_fail git verify-commit --raw $commit 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $commit OK || exit 1 + done + ) +' + +test_expect_success GPGSSH 'proper header is used for hash algorithm' ' + git cat-file commit fourth-signed >output && + grep "^$(test_oid header) -----BEGIN SSH SIGNATURE-----" output +' + +test_expect_success GPGSSH 'show signed commit with signature' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git show -s initial >commit && + git show -s --show-signature initial >show && + git verify-commit -v initial >verify.1 2>verify.2 && + git cat-file commit initial >cat && + grep -v -e "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" -e "Warning: " show >show.commit && + grep -e "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" -e "Warning: " show >show.gpg && + grep -v "^ " cat | grep -v "^gpgsig.* " >cat.commit && + test_cmp show.commit commit && + test_cmp show.gpg verify.2 && + test_cmp cat.commit verify.1 +' + +test_expect_success GPGSSH 'detect fudged signature' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git cat-file commit seventh-signed >raw && + sed -e "s/^seventh/7th forged/" raw >forged1 && + git hash-object -w -t commit forged1 >forged1.commit && + test_must_fail git verify-commit $(cat forged1.commit) && + git show --pretty=short --show-signature $(cat forged1.commit) >actual1 && + grep "${GPGSSH_BAD_SIGNATURE}" actual1 && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual1 && + ! grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual1 +' + +test_expect_success GPGSSH 'detect fudged signature with NUL' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git cat-file commit seventh-signed >raw && + cat raw >forged2 && + echo Qwik | tr "Q" "\000" >>forged2 && + git hash-object -w -t commit forged2 >forged2.commit && + test_must_fail git verify-commit $(cat forged2.commit) && + git show --pretty=short --show-signature $(cat forged2.commit) >actual2 && + grep "${GPGSSH_BAD_SIGNATURE}" actual2 && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual2 +' + +test_expect_success GPGSSH 'amending already signed commit' ' + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git checkout fourth-signed^0 && + git commit --amend -S --no-edit && + git verify-commit HEAD && + git show -s --show-signature HEAD >actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual +' + +test_expect_success GPGSSH 'show good signature with custom format' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") && + cat >expect.tmpl <<-\EOF && + G + FINGERPRINT + principal with number 1 + FINGERPRINT + + EOF + sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect && + git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" sixth-signed >actual && + test_cmp expect actual +' + +test_expect_success GPGSSH 'show bad signature with custom format' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + cat >expect <<-\EOF && + B + + + + + EOF + git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" $(cat forged1.commit) >actual && + test_cmp expect actual +' + +test_expect_success GPGSSH 'show untrusted signature with custom format' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + cat >expect.tmpl <<-\EOF && + U + FINGERPRINT + + FINGERPRINT + + EOF + git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" eighth-signed-alt >actual && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_UNTRUSTED}" | awk "{print \$2;}") && + sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect && + test_cmp expect actual +' + +test_expect_success GPGSSH 'show untrusted signature with undefined trust level' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + cat >expect.tmpl <<-\EOF && + undefined + FINGERPRINT + + FINGERPRINT + + EOF + git log -1 --format="%GT%n%GK%n%GS%n%GF%n%GP" eighth-signed-alt >actual && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_UNTRUSTED}" | awk "{print \$2;}") && + sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect && + test_cmp expect actual +' + +test_expect_success GPGSSH 'show untrusted signature with ultimate trust level' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + cat >expect.tmpl <<-\EOF && + fully + FINGERPRINT + principal with number 1 + FINGERPRINT + + EOF + git log -1 --format="%GT%n%GK%n%GS%n%GF%n%GP" sixth-signed >actual && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") && + sed "s|FINGERPRINT|$FINGERPRINT|g" expect.tmpl >expect && + test_cmp expect actual +' + +test_expect_success GPGSSH 'show lack of signature with custom format' ' + cat >expect <<-\EOF && + N + + + + + EOF + git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" seventh-unsigned >actual && + test_cmp expect actual +' + +test_expect_success GPGSSH 'log.showsignature behaves like --show-signature' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_config log.showsignature true && + git show initial >actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual +' + +test_expect_success GPGSSH 'check config gpg.format values' ' + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + test_config gpg.format ssh && + git commit -S --amend -m "success" && + test_config gpg.format OpEnPgP && + test_must_fail git commit -S --amend -m "fail" +' + +test_expect_failure GPGSSH 'detect fudged commit with double signature (TODO)' ' + sed -e "/gpgsig/,/END PGP/d" forged1 >double-base && + sed -n -e "/gpgsig/,/END PGP/p" forged1 | \ + sed -e "s/^$(test_oid header)//;s/^ //" | gpg --dearmor >double-sig1.sig && + gpg -o double-sig2.sig -u 29472784 --detach-sign double-base && + cat double-sig1.sig double-sig2.sig | gpg --enarmor >double-combined.asc && + sed -e "s/^\(-.*\)ARMORED FILE/\1SIGNATURE/;1s/^/$(test_oid header) /;2,\$s/^/ /" \ + double-combined.asc > double-gpgsig && + sed -e "/committer/r double-gpgsig" double-base >double-commit && + git hash-object -w -t commit double-commit >double-commit.commit && + test_must_fail git verify-commit $(cat double-commit.commit) && + git show --pretty=short --show-signature $(cat double-commit.commit) >double-actual && + grep "BAD signature from" double-actual && + grep "Good signature from" double-actual +' + +test_expect_failure GPGSSH 'show double signature with custom format (TODO)' ' + cat >expect <<-\EOF && + E + + + + + EOF + git log -1 --format="%G?%n%GK%n%GS%n%GF%n%GP" $(cat double-commit.commit) >actual && + test_cmp expect actual +' + + +test_expect_failure GPGSSH 'verify-commit verifies multiply signed commits (TODO)' ' + git init multiply-signed && + cd multiply-signed && + test_commit first && + echo 1 >second && + git add second && + tree=$(git write-tree) && + parent=$(git rev-parse HEAD^{commit}) && + git commit --gpg-sign -m second && + git cat-file commit HEAD && + # Avoid trailing whitespace. + sed -e "s/^Q//" -e "s/^Z/ /" >commit <<-EOF && + Qtree $tree + Qparent $parent + Qauthor A U Thor <author@example.com> 1112912653 -0700 + Qcommitter C O Mitter <committer@example.com> 1112912653 -0700 + Qgpgsig -----BEGIN PGP SIGNATURE----- + QZ + Q iHQEABECADQWIQRz11h0S+chaY7FTocTtvUezd5DDQUCX/uBDRYcY29tbWl0dGVy + Q QGV4YW1wbGUuY29tAAoJEBO29R7N3kMNd+8AoK1I8mhLHviPH+q2I5fIVgPsEtYC + Q AKCTqBh+VabJceXcGIZuF0Ry+udbBQ== + Q =tQ0N + Q -----END PGP SIGNATURE----- + Qgpgsig-sha256 -----BEGIN PGP SIGNATURE----- + QZ + Q iHQEABECADQWIQRz11h0S+chaY7FTocTtvUezd5DDQUCX/uBIBYcY29tbWl0dGVy + Q QGV4YW1wbGUuY29tAAoJEBO29R7N3kMN/NEAn0XO9RYSBj2dFyozi0JKSbssYMtO + Q AJwKCQ1BQOtuwz//IjU8TiS+6S4iUw== + Q =pIwP + Q -----END PGP SIGNATURE----- + Q + Qsecond + EOF + head=$(git hash-object -t commit -w commit) && + git reset --hard $head && + git verify-commit $head 2>actual && + grep "Good signature from" actual && + ! grep "BAD signature from" actual +' + +test_done From 52ac6bd36f7435c9f2b68be6691eacfcf5a80538 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:57 +0000 Subject: [PATCH 126/198] ssh signing: tests for logs, tags & push certs Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t4202-log.sh | 23 +++++ t/t5534-push-signed.sh | 101 +++++++++++++++++++ t/t7031-verify-tag-signed-ssh.sh | 161 +++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+) create mode 100755 t/t7031-verify-tag-signed-ssh.sh diff --git a/t/t4202-log.sh b/t/t4202-log.sh index 39e746fbcbe228..79702716549a14 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -1616,6 +1616,16 @@ test_expect_success GPGSM 'setup signed branch x509' ' git commit -S -m signed_commit ' +test_expect_success GPGSSH 'setup sshkey signed branch' ' + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + test_when_finished "git reset --hard && git checkout main" && + git checkout -b signed-ssh main && + echo foo >foo && + git add foo && + git commit -S -m signed_commit +' + test_expect_success GPGSM 'log x509 fingerprint' ' echo "F8BF62E0693D0694816377099909C779FA23FD65 | " >expect && git log -n1 --format="%GF | %GP" signed-x509 >actual && @@ -1628,6 +1638,13 @@ test_expect_success GPGSM 'log OpenPGP fingerprint' ' test_cmp expect actual ' +test_expect_success GPGSSH 'log ssh key fingerprint' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2\" | \"}" >expect && + git log -n1 --format="%GF | %GP" signed-ssh >actual && + test_cmp expect actual +' + test_expect_success GPG 'log --graph --show-signature' ' git log --graph --show-signature -n1 signed >actual && grep "^| gpg: Signature made" actual && @@ -1640,6 +1657,12 @@ test_expect_success GPGSM 'log --graph --show-signature x509' ' grep "^| gpgsm: Good signature" actual ' +test_expect_success GPGSSH 'log --graph --show-signature ssh' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git log --graph --show-signature -n1 signed-ssh >actual && + grep "${GOOD_SIGNATURE_TRUSTED}" actual +' + test_expect_success GPG 'log --graph --show-signature for merged tag' ' test_when_finished "git reset --hard && git checkout main" && git checkout -b plain main && diff --git a/t/t5534-push-signed.sh b/t/t5534-push-signed.sh index bba768f5ded1fc..24d374adbae884 100755 --- a/t/t5534-push-signed.sh +++ b/t/t5534-push-signed.sh @@ -137,6 +137,53 @@ test_expect_success GPG 'signed push sends push certificate' ' test_cmp expect dst/push-cert-status ' +test_expect_success GPGSSH 'ssh signed push sends push certificate' ' + prepare_dst && + mkdir -p dst/.git/hooks && + git -C dst config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git -C dst config receive.certnonceseed sekrit && + write_script dst/.git/hooks/post-receive <<-\EOF && + # discard the update list + cat >/dev/null + # record the push certificate + if test -n "${GIT_PUSH_CERT-}" + then + git cat-file blob $GIT_PUSH_CERT >../push-cert + fi && + + cat >../push-cert-status <<E_O_F + SIGNER=${GIT_PUSH_CERT_SIGNER-nobody} + KEY=${GIT_PUSH_CERT_KEY-nokey} + STATUS=${GIT_PUSH_CERT_STATUS-nostatus} + NONCE_STATUS=${GIT_PUSH_CERT_NONCE_STATUS-nononcestatus} + NONCE=${GIT_PUSH_CERT_NONCE-nononce} + E_O_F + + EOF + + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") && + git push --signed dst noop ff +noff && + + ( + cat <<-\EOF && + SIGNER=principal with number 1 + KEY=FINGERPRINT + STATUS=G + NONCE_STATUS=OK + EOF + sed -n -e "s/^nonce /NONCE=/p" -e "/^$/q" dst/push-cert + ) | sed -e "s|FINGERPRINT|$FINGERPRINT|" >expect && + + noop=$(git rev-parse noop) && + ff=$(git rev-parse ff) && + noff=$(git rev-parse noff) && + grep "$noop $ff refs/heads/ff" dst/push-cert && + grep "$noop $noff refs/heads/noff" dst/push-cert && + test_cmp expect dst/push-cert-status +' + test_expect_success GPG 'inconsistent push options in signed push not allowed' ' # First, invoke receive-pack with dummy input to obtain its preamble. prepare_dst && @@ -276,6 +323,60 @@ test_expect_success GPGSM 'fail without key and heed user.signingkey x509' ' test_cmp expect dst/push-cert-status ' +test_expect_success GPGSSH 'fail without key and heed user.signingkey ssh' ' + test_config gpg.format ssh && + prepare_dst && + mkdir -p dst/.git/hooks && + git -C dst config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git -C dst config receive.certnonceseed sekrit && + write_script dst/.git/hooks/post-receive <<-\EOF && + # discard the update list + cat >/dev/null + # record the push certificate + if test -n "${GIT_PUSH_CERT-}" + then + git cat-file blob $GIT_PUSH_CERT >../push-cert + fi && + + cat >../push-cert-status <<E_O_F + SIGNER=${GIT_PUSH_CERT_SIGNER-nobody} + KEY=${GIT_PUSH_CERT_KEY-nokey} + STATUS=${GIT_PUSH_CERT_STATUS-nostatus} + NONCE_STATUS=${GIT_PUSH_CERT_NONCE_STATUS-nononcestatus} + NONCE=${GIT_PUSH_CERT_NONCE-nononce} + E_O_F + + EOF + + test_config user.email hasnokey@nowhere.com && + test_config gpg.format ssh && + test_config user.signingkey "" && + ( + sane_unset GIT_COMMITTER_EMAIL && + test_must_fail git push --signed dst noop ff +noff + ) && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + FINGERPRINT=$(ssh-keygen -lf "${GPGSSH_KEY_PRIMARY}" | awk "{print \$2;}") && + git push --signed dst noop ff +noff && + + ( + cat <<-\EOF && + SIGNER=principal with number 1 + KEY=FINGERPRINT + STATUS=G + NONCE_STATUS=OK + EOF + sed -n -e "s/^nonce /NONCE=/p" -e "/^$/q" dst/push-cert + ) | sed -e "s|FINGERPRINT|$FINGERPRINT|" >expect && + + noop=$(git rev-parse noop) && + ff=$(git rev-parse ff) && + noff=$(git rev-parse noff) && + grep "$noop $ff refs/heads/ff" dst/push-cert && + grep "$noop $noff refs/heads/noff" dst/push-cert && + test_cmp expect dst/push-cert-status +' + test_expect_success GPG 'failed atomic push does not execute GPG' ' prepare_dst && git -C dst config receive.certnonceseed sekrit && diff --git a/t/t7031-verify-tag-signed-ssh.sh b/t/t7031-verify-tag-signed-ssh.sh new file mode 100755 index 00000000000000..06c9dd6c9339f2 --- /dev/null +++ b/t/t7031-verify-tag-signed-ssh.sh @@ -0,0 +1,161 @@ +#!/bin/sh + +test_description='signed tag tests' +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-gpg.sh" + +test_expect_success GPGSSH 'create signed tags ssh' ' + test_when_finished "test_unconfig commit.gpgsign" && + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + + echo 1 >file && git add file && + test_tick && git commit -m initial && + git tag -s -m initial initial && + git branch side && + + echo 2 >file && test_tick && git commit -a -m second && + git tag -s -m second second && + + git checkout side && + echo 3 >elif && git add elif && + test_tick && git commit -m "third on side" && + + git checkout main && + test_tick && git merge -S side && + git tag -s -m merge merge && + + echo 4 >file && test_tick && git commit -a -S -m "fourth unsigned" && + git tag -a -m fourth-unsigned fourth-unsigned && + + test_tick && git commit --amend -S -m "fourth signed" && + git tag -s -m fourth fourth-signed && + + echo 5 >file && test_tick && git commit -a -m "fifth" && + git tag fifth-unsigned && + + git config commit.gpgsign true && + echo 6 >file && test_tick && git commit -a -m "sixth" && + git tag -a -m sixth sixth-unsigned && + + test_tick && git rebase -f HEAD^^ && git tag -s -m 6th sixth-signed HEAD^ && + git tag -m seventh -s seventh-signed && + + echo 8 >file && test_tick && git commit -a -m eighth && + git tag -u"${GPGSSH_KEY_UNTRUSTED}" -m eighth eighth-signed-alt +' + +test_expect_success GPGSSH 'verify and show ssh signatures' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + ( + for tag in initial second merge fourth-signed sixth-signed seventh-signed + do + git verify-tag $tag 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $tag OK || exit 1 + done + ) && + ( + for tag in fourth-unsigned fifth-unsigned sixth-unsigned + do + test_must_fail git verify-tag $tag 2>actual && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $tag OK || exit 1 + done + ) && + ( + for tag in eighth-signed-alt + do + test_must_fail git verify-tag $tag 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + grep "${GPGSSH_KEY_NOT_TRUSTED}" actual && + echo $tag OK || exit 1 + done + ) +' + +test_expect_success GPGSSH 'detect fudged ssh signature' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git cat-file tag seventh-signed >raw && + sed -e "/^tag / s/seventh/7th forged/" raw >forged1 && + git hash-object -w -t tag forged1 >forged1.tag && + test_must_fail git verify-tag $(cat forged1.tag) 2>actual1 && + grep "${GPGSSH_BAD_SIGNATURE}" actual1 && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual1 && + ! grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual1 +' + +test_expect_success GPGSSH 'verify ssh signatures with --raw' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + ( + for tag in initial second merge fourth-signed sixth-signed seventh-signed + do + git verify-tag --raw $tag 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $tag OK || exit 1 + done + ) && + ( + for tag in fourth-unsigned fifth-unsigned sixth-unsigned + do + test_must_fail git verify-tag --raw $tag 2>actual && + ! grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $tag OK || exit 1 + done + ) && + ( + for tag in eighth-signed-alt + do + test_must_fail git verify-tag --raw $tag 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_UNTRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo $tag OK || exit 1 + done + ) +' + +test_expect_success GPGSSH 'verify signatures with --raw ssh' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + git verify-tag --raw sixth-signed 2>actual && + grep "${GPGSSH_GOOD_SIGNATURE_TRUSTED}" actual && + ! grep "${GPGSSH_BAD_SIGNATURE}" actual && + echo sixth-signed OK +' + +test_expect_success GPGSSH 'verify multiple tags ssh' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + tags="seventh-signed sixth-signed" && + for i in $tags + do + git verify-tag -v --raw $i || return 1 + done >expect.stdout 2>expect.stderr.1 && + grep "^${GPGSSH_GOOD_SIGNATURE_TRUSTED}" <expect.stderr.1 >expect.stderr && + git verify-tag -v --raw $tags >actual.stdout 2>actual.stderr.1 && + grep "^${GPGSSH_GOOD_SIGNATURE_TRUSTED}" <actual.stderr.1 >actual.stderr && + test_cmp expect.stdout actual.stdout && + test_cmp expect.stderr actual.stderr +' + +test_expect_success GPGSSH 'verifying tag with --format - ssh' ' + test_config gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + cat >expect <<-\EOF && + tagname : fourth-signed + EOF + git verify-tag --format="tagname : %(tag)" "fourth-signed" >actual && + test_cmp expect actual +' + +test_expect_success GPGSSH 'verifying a forged tag with --format should fail silently - ssh' ' + test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged && + test_must_be_empty actual-forged +' + +test_done From 4ff5911494cb29f9ce073368544193f9283b2617 Mon Sep 17 00:00:00 2001 From: Fabian Stelzer <fs@gigacodes.de> Date: Tue, 3 Aug 2021 13:45:58 +0000 Subject: [PATCH 127/198] ssh signing: test that gpg fails for unkown keys Test that verify-commit/tag will fail when a gpg key is completely unknown. To do this we have to generate a key, use it for a signature and delete it from our keyring aferwards completely. Signed-off-by: Fabian Stelzer <fs@gigacodes.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7510-signed-commit.sh | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/t/t7510-signed-commit.sh b/t/t7510-signed-commit.sh index 8df5a74f1db4ec..d65a0171f29c85 100755 --- a/t/t7510-signed-commit.sh +++ b/t/t7510-signed-commit.sh @@ -71,7 +71,25 @@ test_expect_success GPG 'create signed commits' ' git tag eleventh-signed $(cat oid) && echo 12 | git commit-tree --gpg-sign=B7227189 HEAD^{tree} >oid && test_line_count = 1 oid && - git tag twelfth-signed-alt $(cat oid) + git tag twelfth-signed-alt $(cat oid) && + + cat >keydetails <<-\EOF && + Key-Type: RSA + Key-Length: 2048 + Subkey-Type: RSA + Subkey-Length: 2048 + Name-Real: Unknown User + Name-Email: unknown@git.com + Expire-Date: 0 + %no-ask-passphrase + %no-protection + EOF + gpg --batch --gen-key keydetails && + echo 13 >file && git commit -a -S"unknown@git.com" -m thirteenth && + git tag thirteenth-signed && + DELETE_FINGERPRINT=$(gpg -K --with-colons --fingerprint --batch unknown@git.com | grep "^fpr" | head -n 1 | awk -F ":" "{print \$10;}") && + gpg --batch --yes --delete-secret-keys $DELETE_FINGERPRINT && + gpg --batch --yes --delete-keys unknown@git.com ' test_expect_success GPG 'verify and show signatures' ' @@ -110,6 +128,13 @@ test_expect_success GPG 'verify and show signatures' ' ) ' +test_expect_success GPG 'verify-commit exits failure on unknown signature' ' + test_must_fail git verify-commit thirteenth-signed 2>actual && + ! grep "Good signature from" actual && + ! grep "BAD signature from" actual && + grep -q -F -e "No public key" -e "public key not found" actual +' + test_expect_success GPG 'verify-commit exits success on untrusted signature' ' git verify-commit eighth-signed-alt 2>actual && grep "Good signature from" actual && @@ -338,6 +363,8 @@ test_expect_success GPG 'show double signature with custom format' ' ' +# NEEDSWORK: This test relies on the test_tick commit/author dates from the first +# 'create signed commits' test even though it creates its own test_expect_success GPG 'verify-commit verifies multiply signed commits' ' git init multiply-signed && cd multiply-signed && From 993b69e5baa4515f97c93e26956ff8618b0f0054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:27 +0200 Subject: [PATCH 128/198] Makefile: mark "check" target as .PHONY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug in 44c9e8594e (Fix up header file dependencies and add sparse checking rules, 2005-07-03), we never marked the phony "check" target as such. Perhaps we should just remove it, since as of a combination of 912f9980d2 (Makefile: help people who run 'make check' by mistake, 2008-11-11) 0bcd9ae85d (sparse: Fix errors due to missing target-specific variables, 2011-04-21) we've been suggesting the user run "make sparse" directly. But under that mode it still does something, as well as directing the user to run "make test" under non-sparse. So let's punt that and narrowly fix the PHONY bug. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index c6f6246bf63ef2..2ff038069e8a56 100644 --- a/Makefile +++ b/Makefile @@ -2931,6 +2931,7 @@ hdr-check: $(HCO) style: git clang-format --style file --diff --extensions c,h +.PHONY: check check: config-list.h command-list.h @if sparse; \ then \ From b0b9d4f3a2f68017c19b1b5261d9ec2926eae264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:28 +0200 Subject: [PATCH 129/198] Makefile: stop hardcoding {command,config}-list.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change various places that hardcode the names of these two files to refer to either $(GENERATED_H), or to a new generated-hdrs target. That target is consistent with the *-objs targets I recently added in 029bac01a8 (Makefile: add {program,xdiff,test,git,fuzz}-objs & objects targets, 2021-02-23). A subsequent commit will add a new generated hook-list.h. By doing this refactoring we'll only need to add the new file to the GENERATED_H variable, not EXCEPT_HDRS, the vcbuild/README etc. I have not tested the Windows-specific change in config.mak.uname being made here, but we use other variables from the Makefile in the same block, and the GENERATED_H is fully defined before we include config.mak.uname. Hardcoding command-list.h there seems to have been a case of copy/paste programming in 976aaedca0 (msvc: add a Makefile target to pre-generate the Visual Studio solution, 2019-07-29). The config-list.h was added later in 709df95b78 (help: move list_config_help to builtin/help, 2020-04-16). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 6 ++++-- compat/vcbuild/README | 2 +- config.mak.uname | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 2ff038069e8a56..89bf0dd73325bd 100644 --- a/Makefile +++ b/Makefile @@ -823,6 +823,8 @@ XDIFF_LIB = xdiff/lib.a GENERATED_H += command-list.h GENERATED_H += config-list.h +.PHONY: generated-hdrs +generated-hdrs: $(GENERATED_H) LIB_H := $(sort $(patsubst ./%,%,$(shell git ls-files '*.h' ':!t/' ':!Documentation/' 2>/dev/null || \ $(FIND) . \ @@ -2909,7 +2911,7 @@ $(SP_OBJ): %.sp: %.c GIT-CFLAGS FORCE .PHONY: sparse $(SP_OBJ) sparse: $(SP_OBJ) -EXCEPT_HDRS := command-list.h config-list.h unicode-width.h compat/% xdiff/% +EXCEPT_HDRS := $(GENERATED_H) unicode-width.h compat/% xdiff/% ifndef GCRYPT_SHA256 EXCEPT_HDRS += sha256/gcrypt.h endif @@ -2932,7 +2934,7 @@ style: git clang-format --style file --diff --extensions c,h .PHONY: check -check: config-list.h command-list.h +check: $(GENERATED_H) @if sparse; \ then \ echo >&2 "Use 'make sparse' instead"; \ diff --git a/compat/vcbuild/README b/compat/vcbuild/README index 51fb083dbbe213..29ec1d0f104b80 100644 --- a/compat/vcbuild/README +++ b/compat/vcbuild/README @@ -92,7 +92,7 @@ The Steps of Build Git with VS2008 the git operations. 3. Inside Git's directory run the command: - make command-list.h config-list.h + make generated-hdrs to generate the header file needed to compile git. 4. Then either build Git with the GNU Make Makefile in the Git projects diff --git a/config.mak.uname b/config.mak.uname index 69413fb3dc0ad8..9988378160b87c 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -732,9 +732,9 @@ vcxproj: echo '</Project>') >git-remote-http/LinkOrCopyRemoteHttp.targets git add -f git/LinkOrCopyBuiltins.targets git-remote-http/LinkOrCopyRemoteHttp.targets - # Add command-list.h and config-list.h - $(MAKE) MSVC=1 SKIP_VCPKG=1 prefix=/mingw64 config-list.h command-list.h - git add -f config-list.h command-list.h + # Add generated headers + $(MAKE) MSVC=1 SKIP_VCPKG=1 prefix=/mingw64 $(GENERATED_H) + git add -f $(GENERATED_H) # Add scripts rm -f perl/perl.mak From cfd9bd26a1171153dccedda2f612cef5e80c2da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:29 +0200 Subject: [PATCH 130/198] Makefile: remove an out-of-date comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This comment added in dfea575017 (Makefile: lazily compute header dependencies, 2010-01-26) has been out of date since 92b88eba9f (Makefile: use `git ls-files` to list header files, if possible, 2019-03-04), when we did exactly what it tells us not to do and added $(GENERATED_H) to $(OBJECTS) dependencies. The rest of it was also somewhere between inaccurate and outdated, since as of b8ba629264 (Makefile: fold MISC_H into LIB_H, 2012-06-20) it's not followed by a list of header files, that got moved earlier in the file into LIB_H in 60d24dd255 (Makefile: fold XDIFF_H and VCSSVN_H into LIB_H, 2012-07-06). Let's just remove it entirely, to the extent that we have anything useful to say here the comment on the "USE_COMPUTED_HEADER_DEPENDENCIES" variable a few lines above this change does the job for us. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Makefile b/Makefile index 89bf0dd73325bd..0a540dcd34e6ac 100644 --- a/Makefile +++ b/Makefile @@ -2519,13 +2519,6 @@ ifneq ($(dep_files_present),) include $(dep_files_present) endif else -# Dependencies on header files, for platforms that do not support -# the gcc -MMD option. -# -# Dependencies on automatically generated headers such as command-list.h -# should _not_ be included here, since they are necessary even when -# building an object for the first time. - $(OBJECTS): $(LIB_H) $(GENERATED_H) endif From 4a832f0e4e6d619fef7e2933933350d8ab317788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:30 +0200 Subject: [PATCH 131/198] hook.[ch]: move find_hook() to this new library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the find_hook() function from run-command.c to a new hook.c library. This change establishes a stub library that's pretty pointless right now, but will see much wider use with Emily Shaffer's upcoming "configuration-based hooks" series. Eventually all the hook related code will live in hook.[ch]. Let's start that process by moving the simple find_hook() function over as-is. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 1 + builtin/am.c | 1 + builtin/bugreport.c | 2 +- builtin/commit.c | 1 + builtin/merge.c | 1 + builtin/receive-pack.c | 1 + builtin/worktree.c | 1 + hook.c | 37 +++++++++++++++++++++++++++++++++++++ hook.h | 11 +++++++++++ refs.c | 1 + run-command.c | 35 +---------------------------------- run-command.h | 7 ------- sequencer.c | 1 + transport.c | 1 + 14 files changed, 59 insertions(+), 42 deletions(-) create mode 100644 hook.c create mode 100644 hook.h diff --git a/Makefile b/Makefile index 0a540dcd34e6ac..bc258886904ba5 100644 --- a/Makefile +++ b/Makefile @@ -910,6 +910,7 @@ LIB_OBJS += hash-lookup.o LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o +LIB_OBJS += hook.o LIB_OBJS += ident.o LIB_OBJS += json-writer.o LIB_OBJS += kwset.o diff --git a/builtin/am.c b/builtin/am.c index 0c2ad96b70eef4..c603f3cebdf5a2 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -11,6 +11,7 @@ #include "parse-options.h" #include "dir.h" #include "run-command.h" +#include "hook.h" #include "quote.h" #include "tempfile.h" #include "lockfile.h" diff --git a/builtin/bugreport.c b/builtin/bugreport.c index 9915a5841def8c..596f079a7f98a0 100644 --- a/builtin/bugreport.c +++ b/builtin/bugreport.c @@ -3,7 +3,7 @@ #include "strbuf.h" #include "help.h" #include "compat/compiler.h" -#include "run-command.h" +#include "hook.h" static void get_system_info(struct strbuf *sys_info) diff --git a/builtin/commit.c b/builtin/commit.c index 7436262aae21b5..51b07ee02eacf6 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -19,6 +19,7 @@ #include "revision.h" #include "wt-status.h" #include "run-command.h" +#include "hook.h" #include "refs.h" #include "log-tree.h" #include "strbuf.h" diff --git a/builtin/merge.c b/builtin/merge.c index a8a843b1f54113..be98d66b0a8734 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -13,6 +13,7 @@ #include "builtin.h" #include "lockfile.h" #include "run-command.h" +#include "hook.h" #include "diff.h" #include "diff-merges.h" #include "refs.h" diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 2d1f97e1ca7b53..97aebdc15bd4e8 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -7,6 +7,7 @@ #include "pkt-line.h" #include "sideband.h" #include "run-command.h" +#include "hook.h" #include "exec-cmd.h" #include "commit.h" #include "object.h" diff --git a/builtin/worktree.c b/builtin/worktree.c index 0d0a80da61f1ee..d22ece93e1a805 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -8,6 +8,7 @@ #include "branch.h" #include "refs.h" #include "run-command.h" +#include "hook.h" #include "sigchain.h" #include "submodule.h" #include "utf8.h" diff --git a/hook.c b/hook.c new file mode 100644 index 00000000000000..c4dbef1d0ef839 --- /dev/null +++ b/hook.c @@ -0,0 +1,37 @@ +#include "cache.h" +#include "hook.h" +#include "run-command.h" + +const char *find_hook(const char *name) +{ + static struct strbuf path = STRBUF_INIT; + + strbuf_reset(&path); + strbuf_git_path(&path, "hooks/%s", name); + if (access(path.buf, X_OK) < 0) { + int err = errno; + +#ifdef STRIP_EXTENSION + strbuf_addstr(&path, STRIP_EXTENSION); + if (access(path.buf, X_OK) >= 0) + return path.buf; + if (errno == EACCES) + err = errno; +#endif + + if (err == EACCES && advice_ignored_hook) { + static struct string_list advise_given = STRING_LIST_INIT_DUP; + + if (!string_list_lookup(&advise_given, name)) { + string_list_insert(&advise_given, name); + advise(_("The '%s' hook was ignored because " + "it's not set as executable.\n" + "You can disable this warning with " + "`git config advice.ignoredHook false`."), + path.buf); + } + } + return NULL; + } + return path.buf; +} diff --git a/hook.h b/hook.h new file mode 100644 index 00000000000000..68624f16059dfb --- /dev/null +++ b/hook.h @@ -0,0 +1,11 @@ +#ifndef HOOK_H +#define HOOK_H + +/* + * Returns the path to the hook file, or NULL if the hook is missing + * or disabled. Note that this points to static storage that will be + * overwritten by further calls to find_hook and run_hook_*. + */ +const char *find_hook(const char *name); + +#endif diff --git a/refs.c b/refs.c index 8b9f7c3a80a0f6..6211692eaaea53 100644 --- a/refs.c +++ b/refs.c @@ -10,6 +10,7 @@ #include "refs.h" #include "refs/refs-internal.h" #include "run-command.h" +#include "hook.h" #include "object-store.h" #include "object.h" #include "tag.h" diff --git a/run-command.c b/run-command.c index f72e72cce73f1a..352f5be16465df 100644 --- a/run-command.c +++ b/run-command.c @@ -8,6 +8,7 @@ #include "string-list.h" #include "quote.h" #include "config.h" +#include "hook.h" void child_process_init(struct child_process *child) { @@ -1319,40 +1320,6 @@ int async_with_fork(void) #endif } -const char *find_hook(const char *name) -{ - static struct strbuf path = STRBUF_INIT; - - strbuf_reset(&path); - strbuf_git_path(&path, "hooks/%s", name); - if (access(path.buf, X_OK) < 0) { - int err = errno; - -#ifdef STRIP_EXTENSION - strbuf_addstr(&path, STRIP_EXTENSION); - if (access(path.buf, X_OK) >= 0) - return path.buf; - if (errno == EACCES) - err = errno; -#endif - - if (err == EACCES && advice_ignored_hook) { - static struct string_list advise_given = STRING_LIST_INIT_DUP; - - if (!string_list_lookup(&advise_given, name)) { - string_list_insert(&advise_given, name); - advise(_("The '%s' hook was ignored because " - "it's not set as executable.\n" - "You can disable this warning with " - "`git config advice.ignoredHook false`."), - path.buf); - } - } - return NULL; - } - return path.buf; -} - int run_hook_ve(const char *const *env, const char *name, va_list args) { struct child_process hook = CHILD_PROCESS_INIT; diff --git a/run-command.h b/run-command.h index af1296769f9862..f76b740f927b3b 100644 --- a/run-command.h +++ b/run-command.h @@ -204,13 +204,6 @@ int finish_command_in_signal(struct child_process *); */ int run_command(struct child_process *); -/* - * Returns the path to the hook file, or NULL if the hook is missing - * or disabled. Note that this points to static storage that will be - * overwritten by further calls to find_hook and run_hook_*. - */ -const char *find_hook(const char *name); - /** * Run a hook. * The first argument is a pathname to an index file, or NULL diff --git a/sequencer.c b/sequencer.c index 7f07cd00f3f20a..ea4199d65a4d7c 100644 --- a/sequencer.c +++ b/sequencer.c @@ -8,6 +8,7 @@ #include "sequencer.h" #include "tag.h" #include "run-command.h" +#include "hook.h" #include "exec-cmd.h" #include "utf8.h" #include "cache-tree.h" diff --git a/transport.c b/transport.c index 17e9629710a2f8..77e196f75f551d 100644 --- a/transport.c +++ b/transport.c @@ -2,6 +2,7 @@ #include "config.h" #include "transport.h" #include "run-command.h" +#include "hook.h" #include "pkt-line.h" #include "fetch-pack.h" #include "remote.h" From bb7449213c1301424ebdcf791bb2d90fe48d7de6 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:31 +0200 Subject: [PATCH 132/198] hook.c: add a hook_exists() wrapper and use it in bugreport.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a boolean version of the find_hook() function for those callers who are only interested in checking whether the hook exists, not what the path to it is. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/bugreport.c | 2 +- hook.c | 5 +++++ hook.h | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/bugreport.c b/builtin/bugreport.c index 596f079a7f98a0..941c8d5e27004c 100644 --- a/builtin/bugreport.c +++ b/builtin/bugreport.c @@ -82,7 +82,7 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit) } for (i = 0; i < ARRAY_SIZE(hook); i++) - if (find_hook(hook[i])) + if (hook_exists(hook[i])) strbuf_addf(hook_info, "%s\n", hook[i]); } diff --git a/hook.c b/hook.c index c4dbef1d0ef839..97cd799a320c67 100644 --- a/hook.c +++ b/hook.c @@ -35,3 +35,8 @@ const char *find_hook(const char *name) } return path.buf; } + +int hook_exists(const char *name) +{ + return !!find_hook(name); +} diff --git a/hook.h b/hook.h index 68624f16059dfb..4c547ac15e5ba8 100644 --- a/hook.h +++ b/hook.h @@ -8,4 +8,9 @@ */ const char *find_hook(const char *name); +/* + * A boolean version of find_hook() + */ +int hook_exists(const char *hookname); + #endif From f1f399d19632546ec71d50dc07ed558619efdf05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:32 +0200 Subject: [PATCH 133/198] hook.c users: use "hook_exists()" insted of "find_hook()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new hook_exists() function instead of find_hook() where the latter was called in boolean contexts. This make subsequent changes in a series where we further refactor the hook API clearer, as we won't conflate wanting to get the path of the hook with checking for its existence. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/commit.c | 2 +- builtin/merge.c | 2 +- builtin/receive-pack.c | 2 +- sequencer.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 51b07ee02eacf6..aa3c741efa9d37 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1052,7 +1052,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix, return 0; } - if (!no_verify && find_hook("pre-commit")) { + if (!no_verify && hook_exists("pre-commit")) { /* * Re-read the index as pre-commit hook could have updated it, * and write it out as a tree. We must do this before we invoke diff --git a/builtin/merge.c b/builtin/merge.c index be98d66b0a8734..03f244dd5a09a1 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -849,7 +849,7 @@ static void prepare_to_commit(struct commit_list *remoteheads) * and write it out as a tree. We must do this before we invoke * the editor and after we invoke run_status above. */ - if (find_hook("pre-merge-commit")) + if (hook_exists("pre-merge-commit")) discard_cache(); read_cache_from(index_file); strbuf_addbuf(&msg, &merge_msg); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 97aebdc15bd4e8..91fa799b66e407 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1464,7 +1464,7 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w strvec_pushf(&env, "GIT_DIR=%s", absolute_path(git_dir)); - if (!find_hook(push_to_checkout_hook)) + if (!hook_exists(push_to_checkout_hook)) retval = push_to_deploy(sha1, &env, work_tree); else retval = push_to_checkout(sha1, &env, work_tree); diff --git a/sequencer.c b/sequencer.c index ea4199d65a4d7c..9aac08c15451a6 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1446,7 +1446,7 @@ static int try_to_commit(struct repository *r, } } - if (find_hook("prepare-commit-msg")) { + if (hook_exists("prepare-commit-msg")) { res = run_prepare_commit_msg_hook(r, msg, hook_commit); if (res) goto out; From 117a741a6ec6ffa613eb1713ce9be384b36da4da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:33 +0200 Subject: [PATCH 134/198] hook-list.h: add a generated list of hooks, like config-list.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make githooks(5) the source of truth for what hooks git supports, and die hooks we don't know about in find_hook(). This ensures that the documentation and the C code's idea about existing hooks doesn't diverge. We still have Perl and Python code running its own hooks, but that'll be addressed by Emily Shaffer's upcoming "git hook run" command. This resolves a long-standing TODO item in bugreport.c of there being no centralized listing of hooks, and fixes a bug with the bugreport listing only knowing about 1/4 of the p4 hooks. It didn't know about the recent "reference-transaction" hook either. I have not been able to directly test the CMake change being made here. Since 4c2c38e800 (ci: modification of main.yml to use cmake for vs-build job, 2020-06-26) some of the Windows CI has a hard dependency on CMake, this change works there, and is to my eyes an obviously correct use of a pattern established in previous CMake changes, namely: - 061c2240b1 (Introduce CMake support for configuring Git, 2020-06-12) - 709df95b78 (help: move list_config_help to builtin/help, 2020-04-16) - 976aaedca0 (msvc: add a Makefile target to pre-generate the Visual Studio solution, 2019-07-29) The LC_ALL=C is needed because at least in my locale the dash ("-") is ignored for the purposes of sorting, which results in a different order. I'm not aware of anything in git that has a hard dependency on the order, but e.g. the bugreport output would end up using whatever locale was in effect when git was compiled. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- .gitignore | 1 + Makefile | 10 ++++++- builtin/bugreport.c | 44 ++++++----------------------- contrib/buildsystems/CMakeLists.txt | 7 +++++ generate-hooklist.sh | 18 ++++++++++++ hook.c | 19 +++++++++++++ 6 files changed, 62 insertions(+), 37 deletions(-) create mode 100755 generate-hooklist.sh diff --git a/.gitignore b/.gitignore index 311841f9bed577..6be9de41ae83b3 100644 --- a/.gitignore +++ b/.gitignore @@ -190,6 +190,7 @@ /gitweb/static/gitweb.min.* /config-list.h /command-list.h +/hook-list.h *.tar.gz *.dsc *.deb diff --git a/Makefile b/Makefile index bc258886904ba5..1740b99db006b1 100644 --- a/Makefile +++ b/Makefile @@ -823,6 +823,8 @@ XDIFF_LIB = xdiff/lib.a GENERATED_H += command-list.h GENERATED_H += config-list.h +GENERATED_H += hook-list.h + .PHONY: generated-hdrs generated-hdrs: $(GENERATED_H) @@ -2226,7 +2228,9 @@ git$X: git.o GIT-LDFLAGS $(BUILTIN_OBJS) $(GITLIBS) help.sp help.s help.o: command-list.h -builtin/help.sp builtin/help.s builtin/help.o: config-list.h GIT-PREFIX +hook.sp hook.s hook.o: hook-list.h + +builtin/help.sp builtin/help.s builtin/help.o: config-list.h hook-list.h GIT-PREFIX builtin/help.sp builtin/help.s builtin/help.o: EXTRA_CPPFLAGS = \ '-DGIT_HTML_PATH="$(htmldir_relative_SQ)"' \ '-DGIT_MAN_PATH="$(mandir_relative_SQ)"' \ @@ -2259,6 +2263,10 @@ command-list.h: $(wildcard Documentation/git*.txt) $(patsubst %,--exclude-program %,$(EXCLUDED_PROGRAMS)) \ command-list.txt >$@+ && mv $@+ $@ +hook-list.h: generate-hooklist.sh Documentation/githooks.txt + $(QUIET_GEN)$(SHELL_PATH) ./generate-hooklist.sh \ + >$@+ && mv $@+ $@ + SCRIPT_DEFINES = $(SHELL_PATH_SQ):$(DIFF_SQ):$(GIT_VERSION):\ $(localedir_SQ):$(NO_CURL):$(USE_GETTEXT_SCHEME):$(SANE_TOOL_PATH_SQ):\ $(gitwebdir_SQ):$(PERL_PATH_SQ):$(SANE_TEXT_GREP):$(PAGER_ENV):\ diff --git a/builtin/bugreport.c b/builtin/bugreport.c index 941c8d5e27004c..a7a1fcb8a7ad02 100644 --- a/builtin/bugreport.c +++ b/builtin/bugreport.c @@ -4,6 +4,7 @@ #include "help.h" #include "compat/compiler.h" #include "hook.h" +#include "hook-list.h" static void get_system_info(struct strbuf *sys_info) @@ -41,39 +42,7 @@ static void get_system_info(struct strbuf *sys_info) static void get_populated_hooks(struct strbuf *hook_info, int nongit) { - /* - * NEEDSWORK: Doesn't look like there is a list of all possible hooks; - * so below is a transcription of `git help hooks`. Later, this should - * be replaced with some programmatically generated list (generated from - * doc or else taken from some library which tells us about all the - * hooks) - */ - static const char *hook[] = { - "applypatch-msg", - "pre-applypatch", - "post-applypatch", - "pre-commit", - "pre-merge-commit", - "prepare-commit-msg", - "commit-msg", - "post-commit", - "pre-rebase", - "post-checkout", - "post-merge", - "pre-push", - "pre-receive", - "update", - "post-receive", - "post-update", - "push-to-checkout", - "pre-auto-gc", - "post-rewrite", - "sendemail-validate", - "fsmonitor-watchman", - "p4-pre-submit", - "post-index-change", - }; - int i; + const char **p; if (nongit) { strbuf_addstr(hook_info, @@ -81,9 +50,12 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit) return; } - for (i = 0; i < ARRAY_SIZE(hook); i++) - if (hook_exists(hook[i])) - strbuf_addf(hook_info, "%s\n", hook[i]); + for (p = hook_name_list; *p; p++) { + const char *hook = *p; + + if (hook_exists(hook)) + strbuf_addf(hook_info, "%s\n", hook); + } } static const char * const bugreport_usage[] = { diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 171b4124afef58..fd1399c440f84a 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -624,6 +624,13 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/config-list.h) OUTPUT_FILE ${CMAKE_BINARY_DIR}/config-list.h) endif() +if(NOT EXISTS ${CMAKE_BINARY_DIR}/hook-list.h) + message("Generating hook-list.h") + execute_process(COMMAND ${SH_EXE} ${CMAKE_SOURCE_DIR}/generate-hooklist.sh + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_FILE ${CMAKE_BINARY_DIR}/hook-list.h) +endif() + include_directories(${CMAKE_BINARY_DIR}) #build diff --git a/generate-hooklist.sh b/generate-hooklist.sh new file mode 100755 index 00000000000000..6d4e56d1a31ea3 --- /dev/null +++ b/generate-hooklist.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# +# Usage: ./generate-hooklist.sh >hook-list.h + +cat <<EOF +/* Automatically generated by generate-hooklist.sh */ + +static const char *hook_name_list[] = { +EOF + +sed -n -e '/^~~~~*$/ {x; s/^.*$/ "&",/; p;}; x' \ + <Documentation/githooks.txt | + LC_ALL=C sort + +cat <<EOF + NULL, +}; +EOF diff --git a/hook.c b/hook.c index 97cd799a320c67..1f1db1ec9bf4f8 100644 --- a/hook.c +++ b/hook.c @@ -1,11 +1,30 @@ #include "cache.h" #include "hook.h" #include "run-command.h" +#include "hook-list.h" + +static int known_hook(const char *name) +{ + const char **p; + size_t len = strlen(name); + for (p = hook_name_list; *p; p++) { + const char *hook = *p; + + if (!strncmp(name, hook, len) && hook[len] == '\0') + return 1; + } + + return 0; +} const char *find_hook(const char *name) { static struct strbuf path = STRBUF_INIT; + if (!known_hook(name)) + die(_("the hook '%s' is not known to git, should be in hook-list.h via githooks(5)"), + name); + strbuf_reset(&path); strbuf_git_path(&path, "hooks/%s", name); if (access(path.buf, X_OK) < 0) { From 91ebe8e76feeff99f8a3d0701bbd928a1a3f3f97 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:34 +0200 Subject: [PATCH 135/198] hook: add 'run' subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to enable hooks to be run as an external process, by a standalone Git command, or by tools which wrap Git, provide an external means to run all configured hook commands for a given hook event. Most of our hooks require more complex functionality than this, but let's start with the bare minimum required to support our simplest hooks. In terms of implementation the usage_with_options() and "goto usage" pattern here mirrors that of builtin/{commit-graph,multi-pack-index}.c. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- .gitignore | 1 + Documentation/git-hook.txt | 38 +++++++++++ Documentation/githooks.txt | 4 ++ Makefile | 1 + builtin.h | 1 + builtin/hook.c | 85 ++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + hook.c | 111 +++++++++++++++++++++++++++++++ hook.h | 41 ++++++++++++ t/t1800-hook.sh | 133 +++++++++++++++++++++++++++++++++++++ 11 files changed, 417 insertions(+) create mode 100644 Documentation/git-hook.txt create mode 100644 builtin/hook.c create mode 100755 t/t1800-hook.sh diff --git a/.gitignore b/.gitignore index 6be9de41ae83b3..66189ca3cdc9f7 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ /git-grep /git-hash-object /git-help +/git-hook /git-http-backend /git-http-fetch /git-http-push diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt new file mode 100644 index 00000000000000..660d6a992a0f8d --- /dev/null +++ b/Documentation/git-hook.txt @@ -0,0 +1,38 @@ +git-hook(1) +=========== + +NAME +---- +git-hook - run git hooks + +SYNOPSIS +-------- +[verse] +'git hook' run <hook-name> [-- <hook-args>] + +DESCRIPTION +----------- + +This command is an interface to git hooks (see linkgit:githooks[5]). +Currently it only provides a convenience wrapper for running hooks for +use by git itself. In the future it might gain other functionality. + +SUBCOMMANDS +----------- + +run:: + Run the `<hook-name>` hook. See linkgit:githooks[5] for + the hook names we support. ++ +Any positional arguments to the hook should be passed after an +optional `--` (or `--end-of-options`, see linkgit:gitcli[7]). The +arguments (if any) differ by hook name, see linkgit:githooks[5] for +what those are. + +SEE ALSO +-------- +linkgit:githooks[5] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index b51959ff9418fd..a16e62bc8c8ea7 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -698,6 +698,10 @@ and "0" meaning they were not. Only one parameter should be set to "1" when the hook runs. The hook running passing "1", "1" should not be possible. +SEE ALSO +-------- +linkgit:git-hook[1] + GIT --- Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 1740b99db006b1..e68ca41507f012 100644 --- a/Makefile +++ b/Makefile @@ -1114,6 +1114,7 @@ BUILTIN_OBJS += builtin/get-tar-commit-id.o BUILTIN_OBJS += builtin/grep.o BUILTIN_OBJS += builtin/hash-object.o BUILTIN_OBJS += builtin/help.o +BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o diff --git a/builtin.h b/builtin.h index 16ecd5586f0bee..91740c151494d4 100644 --- a/builtin.h +++ b/builtin.h @@ -164,6 +164,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix); int cmd_grep(int argc, const char **argv, const char *prefix); int cmd_hash_object(int argc, const char **argv, const char *prefix); int cmd_help(int argc, const char **argv, const char *prefix); +int cmd_hook(int argc, const char **argv, const char *prefix); int cmd_index_pack(int argc, const char **argv, const char *prefix); int cmd_init_db(int argc, const char **argv, const char *prefix); int cmd_interpret_trailers(int argc, const char **argv, const char *prefix); diff --git a/builtin/hook.c b/builtin/hook.c new file mode 100644 index 00000000000000..41dd15550cf142 --- /dev/null +++ b/builtin/hook.c @@ -0,0 +1,85 @@ +#include "cache.h" +#include "builtin.h" +#include "config.h" +#include "hook.h" +#include "parse-options.h" +#include "strbuf.h" +#include "strvec.h" + +#define BUILTIN_HOOK_RUN_USAGE \ + N_("git hook run <hook-name> [-- <hook-args>]") + +static const char * const builtin_hook_usage[] = { + BUILTIN_HOOK_RUN_USAGE, + NULL +}; + +static const char * const builtin_hook_run_usage[] = { + BUILTIN_HOOK_RUN_USAGE, + NULL +}; + +static int run(int argc, const char **argv, const char *prefix) +{ + int i; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + const char *hook_name; + const char *hook_path; + struct option run_options[] = { + OPT_END(), + }; + int ret; + + argc = parse_options(argc, argv, prefix, run_options, + builtin_hook_run_usage, + PARSE_OPT_KEEP_DASHDASH); + + if (!argc) + goto usage; + + /* + * Having a -- for "run" when providing <hook-args> is + * mandatory. + */ + if (argc > 1 && strcmp(argv[1], "--") && + strcmp(argv[1], "--end-of-options")) + goto usage; + + /* Add our arguments, start after -- */ + for (i = 2 ; i < argc; i++) + strvec_push(&opt.args, argv[i]); + + /* Need to take into account core.hooksPath */ + git_config(git_default_config, NULL); + + hook_name = argv[0]; + hook_path = find_hook(hook_name); + if (!hook_path) { + error("cannot find a hook named %s", hook_name); + return 1; + } + + ret = run_hooks(hook_name, hook_path, &opt); + run_hooks_opt_clear(&opt); + return ret; +usage: + usage_with_options(builtin_hook_run_usage, run_options); +} + +int cmd_hook(int argc, const char **argv, const char *prefix) +{ + struct option builtin_hook_options[] = { + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, builtin_hook_options, + builtin_hook_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + goto usage; + + if (!strcmp(argv[0], "run")) + return run(argc, argv, prefix); + +usage: + usage_with_options(builtin_hook_usage, builtin_hook_options); +} diff --git a/command-list.txt b/command-list.txt index a289f09ed6fbf9..9ccd8e5aebeb81 100644 --- a/command-list.txt +++ b/command-list.txt @@ -103,6 +103,7 @@ git-grep mainporcelain info git-gui mainporcelain git-hash-object plumbingmanipulators git-help ancillaryinterrogators complete +git-hook mainporcelain git-http-backend synchingrepositories git-http-fetch synchelpers git-http-push synchelpers diff --git a/git.c b/git.c index 18bed9a99647aa..540909c391ff89 100644 --- a/git.c +++ b/git.c @@ -538,6 +538,7 @@ static struct cmd_struct commands[] = { { "grep", cmd_grep, RUN_SETUP_GENTLY }, { "hash-object", cmd_hash_object }, { "help", cmd_help }, + { "hook", cmd_hook, RUN_SETUP }, { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "init", cmd_init_db }, { "init-db", cmd_init_db }, diff --git a/hook.c b/hook.c index 1f1db1ec9bf4f8..c8ff39750430c7 100644 --- a/hook.c +++ b/hook.c @@ -2,11 +2,14 @@ #include "hook.h" #include "run-command.h" #include "hook-list.h" +#include "config.h" static int known_hook(const char *name) { const char **p; size_t len = strlen(name); + static int test_hooks_ok = -1; + for (p = hook_name_list; *p; p++) { const char *hook = *p; @@ -14,6 +17,14 @@ static int known_hook(const char *name) return 1; } + if (test_hooks_ok == -1) + test_hooks_ok = git_env_bool("GIT_TEST_FAKE_HOOKS", 0); + + if (test_hooks_ok && + (!strcmp(name, "test-hook") || + !strcmp(name, "does-not-exist"))) + return 1; + return 0; } @@ -59,3 +70,103 @@ int hook_exists(const char *name) { return !!find_hook(name); } + +void run_hooks_opt_clear(struct run_hooks_opt *o) +{ + strvec_clear(&o->env); + strvec_clear(&o->args); +} + +static int pick_next_hook(struct child_process *cp, + struct strbuf *out, + void *pp_cb, + void **pp_task_cb) +{ + struct hook_cb_data *hook_cb = pp_cb; + struct hook *run_me = hook_cb->run_me; + + if (!run_me) + return 0; + + cp->no_stdin = 1; + cp->env = hook_cb->options->env.v; + cp->stdout_to_stderr = 1; + cp->trace2_hook_name = hook_cb->hook_name; + + /* add command */ + strvec_push(&cp->args, run_me->hook_path); + + /* + * add passed-in argv, without expanding - let the user get back + * exactly what they put in + */ + strvec_pushv(&cp->args, hook_cb->options->args.v); + + /* Provide context for errors if necessary */ + *pp_task_cb = run_me; + + /* + * This pick_next_hook() will be called again, we're only + * running one hook, so indicate that no more work will be + * done. + */ + hook_cb->run_me = NULL; + + return 1; +} + +static int notify_start_failure(struct strbuf *out, + void *pp_cb, + void *pp_task_cp) +{ + struct hook_cb_data *hook_cb = pp_cb; + struct hook *attempted = pp_task_cp; + + hook_cb->rc |= 1; + + strbuf_addf(out, _("Couldn't start hook '%s'\n"), + attempted->hook_path); + + return 1; +} + +static int notify_hook_finished(int result, + struct strbuf *out, + void *pp_cb, + void *pp_task_cb) +{ + struct hook_cb_data *hook_cb = pp_cb; + + hook_cb->rc |= result; + + return 0; +} + +int run_hooks(const char *hook_name, const char *hook_path, + struct run_hooks_opt *options) +{ + struct hook my_hook = { + .hook_path = hook_path, + }; + struct hook_cb_data cb_data = { + .rc = 0, + .hook_name = hook_name, + .options = options, + }; + int jobs = 1; + + if (!options) + BUG("a struct run_hooks_opt must be provided to run_hooks"); + + cb_data.run_me = &my_hook; + + run_processes_parallel_tr2(jobs, + pick_next_hook, + notify_start_failure, + notify_hook_finished, + &cb_data, + "hook", + hook_name); + + return cb_data.rc; +} diff --git a/hook.h b/hook.h index 4c547ac15e5ba8..361984c69d49ae 100644 --- a/hook.h +++ b/hook.h @@ -1,5 +1,8 @@ #ifndef HOOK_H #define HOOK_H +#include "strbuf.h" +#include "strvec.h" +#include "run-command.h" /* * Returns the path to the hook file, or NULL if the hook is missing @@ -13,4 +16,42 @@ const char *find_hook(const char *name); */ int hook_exists(const char *hookname); +struct hook { + /* The path to the hook */ + const char *hook_path; +}; + +struct run_hooks_opt +{ + /* Environment vars to be set for each hook */ + struct strvec env; + + /* Args to be passed to each hook */ + struct strvec args; +}; + +#define RUN_HOOKS_OPT_INIT { \ + .env = STRVEC_INIT, \ + .args = STRVEC_INIT, \ +} + +/* + * Callback provided to feed_pipe_fn and consume_sideband_fn. + */ +struct hook_cb_data { + /* rc reflects the cumulative failure state */ + int rc; + const char *hook_name; + struct hook *run_me; + struct run_hooks_opt *options; +}; + +void run_hooks_opt_clear(struct run_hooks_opt *o); + +/** + * Takes an already resolved hook found via find_hook() and runs + * it. Does not call run_hooks_opt_clear() for you. + */ +int run_hooks(const char *hookname, const char *hook_path, + struct run_hooks_opt *options); #endif diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh new file mode 100755 index 00000000000000..644df0a583c836 --- /dev/null +++ b/t/t1800-hook.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +test_description='git-hook command' + +. ./test-lib.sh + +test_expect_success 'git hook usage' ' + test_expect_code 129 git hook && + test_expect_code 129 git hook run && + test_expect_code 129 git hook run -h && + test_expect_code 129 git hook run --unknown 2>err && + grep "unknown option" err +' + +test_expect_success 'setup GIT_TEST_FAKE_HOOKS=true to permit "test-hook" and "does-not-exist" names"' ' + GIT_TEST_FAKE_HOOKS=true && + export GIT_TEST_FAKE_HOOKS +' + +test_expect_success 'git hook run: nonexistent hook' ' + cat >stderr.expect <<-\EOF && + error: cannot find a hook named test-hook + EOF + test_expect_code 1 git hook run test-hook 2>stderr.actual && + test_cmp stderr.expect stderr.actual +' + +test_expect_success 'git hook run: basic' ' + write_script .git/hooks/test-hook <<-EOF && + echo Test hook + EOF + + cat >expect <<-\EOF && + Test hook + EOF + git hook run test-hook 2>actual && + test_cmp expect actual +' + +test_expect_success 'git hook run: stdout and stderr both write to our stderr' ' + write_script .git/hooks/test-hook <<-EOF && + echo >&1 Will end up on stderr + echo >&2 Will end up on stderr + EOF + + cat >stderr.expect <<-\EOF && + Will end up on stderr + Will end up on stderr + EOF + git hook run test-hook >stdout.actual 2>stderr.actual && + test_cmp stderr.expect stderr.actual && + test_must_be_empty stdout.actual +' + +test_expect_success 'git hook run: exit codes are passed along' ' + write_script .git/hooks/test-hook <<-EOF && + exit 1 + EOF + + test_expect_code 1 git hook run test-hook && + + write_script .git/hooks/test-hook <<-EOF && + exit 2 + EOF + + test_expect_code 2 git hook run test-hook && + + write_script .git/hooks/test-hook <<-EOF && + exit 128 + EOF + + test_expect_code 128 git hook run test-hook && + + write_script .git/hooks/test-hook <<-EOF && + exit 129 + EOF + + test_expect_code 129 git hook run test-hook +' + +test_expect_success 'git hook run arg u ments without -- is not allowed' ' + test_expect_code 129 git hook run test-hook arg u ments +' + +test_expect_success 'git hook run -- pass arguments' ' + write_script .git/hooks/test-hook <<-\EOF && + echo $1 + echo $2 + EOF + + cat >expect <<-EOF && + arg + u ments + EOF + + git hook run test-hook -- arg "u ments" 2>actual && + test_cmp expect actual +' + +test_expect_success 'git hook run -- out-of-repo runs excluded' ' + write_script .git/hooks/test-hook <<-EOF && + echo Test hook + EOF + + nongit test_must_fail git hook run test-hook +' + +test_expect_success 'git -c core.hooksPath=<PATH> hook run' ' + mkdir my-hooks && + write_script my-hooks/test-hook <<-\EOF && + echo Hook ran $1 >>actual + EOF + + cat >expect <<-\EOF && + Test hook + Hook ran one + Hook ran two + Hook ran three + Hook ran four + EOF + + # Test various ways of specifying the path. See also + # t1350-config-hooks-path.sh + >actual && + git hook run test-hook -- ignored 2>>actual && + git -c core.hooksPath=my-hooks hook run test-hook -- one 2>>actual && + git -c core.hooksPath=my-hooks/ hook run test-hook -- two 2>>actual && + git -c core.hooksPath="$PWD/my-hooks" hook run test-hook -- three 2>>actual && + git -c core.hooksPath="$PWD/my-hooks/" hook run test-hook -- four 2>>actual && + test_cmp expect actual +' + +test_done From 22f54200dc0918c97856e2f31c1139dd73e33029 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:35 +0200 Subject: [PATCH 136/198] gc: use hook library for pre-auto-gc hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the pre-auto-gc hook away from run-command.h to and over to the new hook.h library. To do this introduce a simple run_hooks_oneshot() wrapper, we'll be using it extensively for these simple cases of wanting to run a single hook under a given name, and having it free the memory we allocate for us. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/gc.c | 3 ++- hook.c | 21 +++++++++++++++++++++ hook.h | 13 +++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index f05d2f0a1ac9cd..2f74cf394d3ccb 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -32,6 +32,7 @@ #include "remote.h" #include "object-store.h" #include "exec-cmd.h" +#include "hook.h" #define FAILED_RUN "failed to run %s" @@ -394,7 +395,7 @@ static int need_to_gc(void) else return 0; - if (run_hook_le(NULL, "pre-auto-gc", NULL)) + if (run_hooks_oneshot("pre-auto-gc", NULL)) return 0; return 1; } diff --git a/hook.c b/hook.c index c8ff39750430c7..981a9bf46e6893 100644 --- a/hook.c +++ b/hook.c @@ -170,3 +170,24 @@ int run_hooks(const char *hook_name, const char *hook_path, return cb_data.rc; } + +int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options) +{ + const char *hook_path; + int ret; + struct run_hooks_opt hook_opt_scratch = RUN_HOOKS_OPT_INIT; + + if (!options) + options = &hook_opt_scratch; + + hook_path = find_hook(hook_name); + if (!hook_path) { + ret = 0; + goto cleanup; + } + + ret = run_hooks(hook_name, hook_path, options); +cleanup: + run_hooks_opt_clear(options); + return ret; +} diff --git a/hook.h b/hook.h index 361984c69d49ae..2201ab9d5681a1 100644 --- a/hook.h +++ b/hook.h @@ -51,7 +51,20 @@ void run_hooks_opt_clear(struct run_hooks_opt *o); /** * Takes an already resolved hook found via find_hook() and runs * it. Does not call run_hooks_opt_clear() for you. + * + * See run_hooks_oneshot() for the simpler one-shot API. */ int run_hooks(const char *hookname, const char *hook_path, struct run_hooks_opt *options); + +/** + * Calls find_hook() on your "hook_name" and runs the hooks (if any) + * with run_hooks(). + * + * If "options" is provided calls run_hooks_opt_clear() on it for + * you. If "options" is NULL a scratch one will be provided for you + * before calling run_hooks(). + */ +int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options); + #endif From aec5e6f509e7977213970ea59244a15b1b923e10 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:36 +0200 Subject: [PATCH 137/198] rebase: convert pre-rebase to use hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the pre-rebase hook away from run-command.h to and over to the new hook.h library. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/rebase.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 12f093121d9ed3..e7c668c99b1caf 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -28,6 +28,7 @@ #include "sequencer.h" #include "rebase-interactive.h" #include "reset.h" +#include "hook.h" #define DEFAULT_REFLOG_ACTION "rebase" @@ -1313,6 +1314,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) char *squash_onto_name = NULL; int reschedule_failed_exec = -1; int allow_preemptive_ff = 1; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; struct option builtin_rebase_options[] = { OPT_STRING(0, "onto", &options.onto_name, N_("revision"), @@ -2022,9 +2024,9 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) } /* If a hook exists, give it a chance to interrupt*/ + strvec_pushl(&hook_opt.args, options.upstream_arg, argc ? argv[0] : NULL, NULL); if (!ok_to_skip_pre_rebase && - run_hook_le(NULL, "pre-rebase", options.upstream_arg, - argc ? argv[0] : NULL, NULL)) + run_hooks_oneshot("pre-rebase", &hook_opt)) die(_("The pre-rebase hook refused to rebase.")); if (options.flags & REBASE_DIFFSTAT) { From 406b8fdfc86a4cc7de3ae86105f47f350e48548e Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:37 +0200 Subject: [PATCH 138/198] am: convert applypatch to use hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Teach pre-applypatch, post-applypatch, and applypatch-msg to use the hook.h library instead of the run-command.h library. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/am.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index c603f3cebdf5a2..e444b18b64a34f 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -446,9 +446,11 @@ static void am_destroy(const struct am_state *state) static int run_applypatch_msg_hook(struct am_state *state) { int ret; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; assert(state->msg); - ret = run_hook_le(NULL, "applypatch-msg", am_path(state, "final-commit"), NULL); + strvec_push(&opt.args, am_path(state, "final-commit")); + ret = run_hooks_oneshot("applypatch-msg", &opt); if (!ret) { FREE_AND_NULL(state->msg); @@ -1609,7 +1611,7 @@ static void do_commit(const struct am_state *state) const char *reflog_msg, *author, *committer = NULL; struct strbuf sb = STRBUF_INIT; - if (run_hook_le(NULL, "pre-applypatch", NULL)) + if (run_hooks_oneshot("pre-applypatch", NULL)) exit(1); if (write_cache_as_tree(&tree, 0, NULL)) @@ -1661,7 +1663,7 @@ static void do_commit(const struct am_state *state) fclose(fp); } - run_hook_le(NULL, "post-applypatch", NULL); + run_hooks_oneshot("post-applypatch", NULL); strbuf_release(&sb); } From 47ed741eafcfff13acf0835d7fdac011b173ef53 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:38 +0200 Subject: [PATCH 139/198] hooks: convert 'post-checkout' hook to hook library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the running of the 'post-checkout' hook away from run-command.h to the new hook.h library. For "worktree" this requires a change to it to run the hooks from a given directory. We could strictly speaking skip the "absolute_path" flag and just check if "dir" is specified, but let's split them up for clarity, as well as for any future user who'd like to set "dir" but not implicitly change the argument to an absolute path. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/checkout.c | 14 +++++++++----- builtin/clone.c | 6 ++++-- builtin/worktree.c | 28 ++++++++++++---------------- hook.c | 9 +++++++++ hook.h | 8 ++++++++ read-cache.c | 1 + reset.c | 14 ++++++++++---- 7 files changed, 53 insertions(+), 27 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index f4cd7747d35dd1..6d69b4c0113fed 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -9,6 +9,7 @@ #include "config.h" #include "diff.h" #include "dir.h" +#include "hook.h" #include "ll-merge.h" #include "lockfile.h" #include "merge-recursive.h" @@ -106,13 +107,16 @@ struct branch_info { static int post_checkout_hook(struct commit *old_commit, struct commit *new_commit, int changed) { - return run_hook_le(NULL, "post-checkout", - oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()), - oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()), - changed ? "1" : "0", NULL); + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + /* "new_commit" can be NULL when checking out from the index before a commit exists. */ - + strvec_pushl(&opt.args, + oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()), + oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()), + changed ? "1" : "0", + NULL); + return run_hooks_oneshot("post-checkout", &opt); } static int update_some(const struct object_id *oid, struct strbuf *base, diff --git a/builtin/clone.c b/builtin/clone.c index 66fe66679c8498..27fc05ee511ef5 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -32,6 +32,7 @@ #include "connected.h" #include "packfile.h" #include "list-objects-filter-options.h" +#include "hook.h" /* * Overall FIXMEs: @@ -775,6 +776,7 @@ static int checkout(int submodule_progress) struct tree *tree; struct tree_desc t; int err = 0; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; if (option_no_checkout) return 0; @@ -820,8 +822,8 @@ static int checkout(int submodule_progress) if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK)) die(_("unable to write new index file")); - err |= run_hook_le(NULL, "post-checkout", oid_to_hex(null_oid()), - oid_to_hex(&oid), "1", NULL); + strvec_pushl(&hook_opt.args, oid_to_hex(null_oid()), oid_to_hex(&oid), "1", NULL); + err |= run_hooks_oneshot("post-checkout", &hook_opt); if (!err && (option_recurse_submodules.nr > 0)) { struct strvec args = STRVEC_INIT; diff --git a/builtin/worktree.c b/builtin/worktree.c index d22ece93e1a805..330867c19bf329 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -382,22 +382,18 @@ static int add_worktree(const char *path, const char *refname, * is_junk is cleared, but do return appropriate code when hook fails. */ if (!ret && opts->checkout) { - const char *hook = find_hook("post-checkout"); - if (hook) { - const char *env[] = { "GIT_DIR", "GIT_WORK_TREE", NULL }; - cp.git_cmd = 0; - cp.no_stdin = 1; - cp.stdout_to_stderr = 1; - cp.dir = path; - cp.env = env; - cp.argv = NULL; - cp.trace2_hook_name = "post-checkout"; - strvec_pushl(&cp.args, absolute_path(hook), - oid_to_hex(null_oid()), - oid_to_hex(&commit->object.oid), - "1", NULL); - ret = run_command(&cp); - } + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + + strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL); + strvec_pushl(&opt.args, + oid_to_hex(null_oid()), + oid_to_hex(&commit->object.oid), + "1", + NULL); + opt.dir = path; + opt.absolute_path = 1; + + ret = run_hooks_oneshot("post-checkout", &opt); } strvec_clear(&child_env); diff --git a/hook.c b/hook.c index 981a9bf46e6893..5ecf79c83977a6 100644 --- a/hook.c +++ b/hook.c @@ -92,6 +92,7 @@ static int pick_next_hook(struct child_process *cp, cp->env = hook_cb->options->env.v; cp->stdout_to_stderr = 1; cp->trace2_hook_name = hook_cb->hook_name; + cp->dir = hook_cb->options->dir; /* add command */ strvec_push(&cp->args, run_me->hook_path); @@ -145,6 +146,7 @@ static int notify_hook_finished(int result, int run_hooks(const char *hook_name, const char *hook_path, struct run_hooks_opt *options) { + struct strbuf abs_path = STRBUF_INIT; struct hook my_hook = { .hook_path = hook_path, }; @@ -158,6 +160,10 @@ int run_hooks(const char *hook_name, const char *hook_path, if (!options) BUG("a struct run_hooks_opt must be provided to run_hooks"); + if (options->absolute_path) { + strbuf_add_absolute_path(&abs_path, hook_path); + my_hook.hook_path = abs_path.buf; + } cb_data.run_me = &my_hook; run_processes_parallel_tr2(jobs, @@ -168,6 +174,9 @@ int run_hooks(const char *hook_name, const char *hook_path, "hook", hook_name); + if (options->absolute_path) + strbuf_release(&abs_path); + return cb_data.rc; } diff --git a/hook.h b/hook.h index 2201ab9d5681a1..53ea3a9649efa2 100644 --- a/hook.h +++ b/hook.h @@ -28,6 +28,14 @@ struct run_hooks_opt /* Args to be passed to each hook */ struct strvec args; + + /* Resolve and run the "absolute_path(hook)" instead of + * "hook". Used for "git worktree" hooks + */ + int absolute_path; + + /* Path to initial working directory for subprocess */ + const char *dir; }; #define RUN_HOOKS_OPT_INIT { \ diff --git a/read-cache.c b/read-cache.c index 99a174b91e65a3..c9e2b013972e17 100644 --- a/read-cache.c +++ b/read-cache.c @@ -28,6 +28,7 @@ #include "sparse-index.h" #include "csum-file.h" #include "promisor-remote.h" +#include "hook.h" /* Mask for the name length in ce_flags in the on-disk index */ diff --git a/reset.c b/reset.c index 4bea758053bbea..6499bc5127d022 100644 --- a/reset.c +++ b/reset.c @@ -7,6 +7,7 @@ #include "tree-walk.h" #include "tree.h" #include "unpack-trees.h" +#include "hook.h" int reset_head(struct repository *r, struct object_id *oid, const char *action, const char *switch_to_branch, unsigned flags, @@ -126,10 +127,15 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, ret = create_symref("HEAD", switch_to_branch, reflog_head); } - if (run_hook) - run_hook_le(NULL, "post-checkout", - oid_to_hex(orig ? orig : null_oid()), - oid_to_hex(oid), "1", NULL); + if (run_hook) { + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + strvec_pushl(&opt.args, + oid_to_hex(orig ? orig : null_oid()), + oid_to_hex(oid), + "1", + NULL); + run_hooks_oneshot("post-checkout", &opt); + } leave_reset_head: strbuf_release(&msg); From f91d94abc2b0dbd5018d1aee09c9e1c5a14ca9b9 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:39 +0200 Subject: [PATCH 140/198] merge: convert post-merge to use hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Teach post-merge to use the hook.h library instead of the run-command.h library to run hooks. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/merge.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/merge.c b/builtin/merge.c index 03f244dd5a09a1..4965df2ac29f8d 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -448,6 +448,7 @@ static void finish(struct commit *head_commit, const struct object_id *new_head, const char *msg) { struct strbuf reflog_message = STRBUF_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; const struct object_id *head = &head_commit->object.oid; if (!msg) @@ -489,7 +490,8 @@ static void finish(struct commit *head_commit, } /* Run a post-merge hook */ - run_hook_le(NULL, "post-merge", squash ? "1" : "0", NULL); + strvec_push(&opt.args, squash ? "1" : "0"); + run_hooks_oneshot("post-merge", &opt); apply_autostash(git_path_merge_autostash(the_repository)); strbuf_release(&reflog_message); From 2a83a2fa5b556d859956da0f4e542b717200c0b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:40 +0200 Subject: [PATCH 141/198] git hook run: add an --ignore-missing flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For certain one-shot hooks we'd like to optimistically run them, and not complain if they don't exist. This will be used by send-email in a subsequent commit. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-hook.txt | 10 +++++++++- builtin/hook.c | 7 ++++++- t/t1800-hook.sh | 5 +++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt index 660d6a992a0f8d..097fb9de63bdf7 100644 --- a/Documentation/git-hook.txt +++ b/Documentation/git-hook.txt @@ -8,7 +8,7 @@ git-hook - run git hooks SYNOPSIS -------- [verse] -'git hook' run <hook-name> [-- <hook-args>] +'git hook' run [--ignore-missing] <hook-name> [-- <hook-args>] DESCRIPTION ----------- @@ -29,6 +29,14 @@ optional `--` (or `--end-of-options`, see linkgit:gitcli[7]). The arguments (if any) differ by hook name, see linkgit:githooks[5] for what those are. +OPTIONS +------- + +--ignore-missing:: + Ignore any missing hook by quietly returning zero. Used for + tools that want to do a blind one-shot run of a hook that may + or may not be present. + SEE ALSO -------- linkgit:githooks[5] diff --git a/builtin/hook.c b/builtin/hook.c index 41dd15550cf142..f33db9953c72bd 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -7,7 +7,7 @@ #include "strvec.h" #define BUILTIN_HOOK_RUN_USAGE \ - N_("git hook run <hook-name> [-- <hook-args>]") + N_("git hook run [--ignore-missing] <hook-name> [-- <hook-args>]") static const char * const builtin_hook_usage[] = { BUILTIN_HOOK_RUN_USAGE, @@ -23,9 +23,12 @@ static int run(int argc, const char **argv, const char *prefix) { int i; struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + int ignore_missing = 0; const char *hook_name; const char *hook_path; struct option run_options[] = { + OPT_BOOL(0, "ignore-missing", &ignore_missing, + N_("exit quietly with a zero exit code if the requested hook cannot be found")), OPT_END(), }; int ret; @@ -53,6 +56,8 @@ static int run(int argc, const char **argv, const char *prefix) git_config(git_default_config, NULL); hook_name = argv[0]; + if (ignore_missing) + return run_hooks_oneshot(hook_name, &opt); hook_path = find_hook(hook_name); if (!hook_path) { error("cannot find a hook named %s", hook_name); diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 644df0a583c836..49df5a2cdfb278 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -25,6 +25,11 @@ test_expect_success 'git hook run: nonexistent hook' ' test_cmp stderr.expect stderr.actual ' +test_expect_success 'git hook run: nonexistent hook with --ignore-missing' ' + git hook run --ignore-missing does-not-exist 2>stderr.actual && + test_must_be_empty stderr.actual +' + test_expect_success 'git hook run: basic' ' write_script .git/hooks/test-hook <<-EOF && echo Test hook From 7a0bcb5fb74c191fd36af11b61011570392bfe94 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:41 +0200 Subject: [PATCH 142/198] send-email: use 'git hook run' for 'sendemail-validate' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the "sendmail-validate" hook to be run via the "git hook run" wrapper instead of via a direct invocation. This is the smallest possibly change to get "send-email" using "git hook run". We still check the hook itself with "-x", and set a "GIT_DIR" variable, both of which are asserted by our tests. We'll need to get rid of this special behavior if we start running N hooks, but for now let's be as close to bug-for-bug compatible as possible. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- git-send-email.perl | 20 ++++++++++++-------- t/t9001-send-email.sh | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index e65d969d0bb257..126850d974bbd4 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -195,13 +195,13 @@ sub format_2822_time { my $editor; sub system_or_msg { - my ($args, $msg) = @_; + my ($args, $msg, $cmd_name) = @_; system(@$args); my $signalled = $? & 127; my $exit_code = $? >> 8; return unless $signalled or $exit_code; - my @sprintf_args = ($args->[0], $exit_code); + my @sprintf_args = ($cmd_name ? $cmd_name : $args->[0], $exit_code); if (defined $msg) { # Quiet the 'redundant' warning category, except we # need to support down to Perl 5.8, so we can't do a @@ -2031,10 +2031,10 @@ sub validate_patch { my ($fn, $xfer_encoding) = @_; if ($repo) { + my $hook_name = 'sendemail-validate'; my $hooks_path = $repo->command_oneline('rev-parse', '--git-path', 'hooks'); require File::Spec; - my $validate_hook = File::Spec->catfile($hooks_path, - 'sendemail-validate'); + my $validate_hook = File::Spec->catfile($hooks_path, $hook_name); my $hook_error; if (-x $validate_hook) { require Cwd; @@ -2044,13 +2044,17 @@ sub validate_patch { chdir($repo->wc_path() or $repo->repo_path()) or die("chdir: $!"); local $ENV{"GIT_DIR"} = $repo->repo_path(); - $hook_error = system_or_msg([$validate_hook, $target]); + my @validate_hook = ("git", "hook", "run", "--ignore-missing", $hook_name, "--", $target); + $hook_error = system_or_msg(\@validate_hook, undef, + "git hook run $hook_name -- <patch>"); chdir($cwd_save) or die("chdir: $!"); } if ($hook_error) { - die sprintf(__("fatal: %s: rejected by sendemail-validate hook\n" . - "%s\n" . - "warning: no patches were sent\n"), $fn, $hook_error); + $hook_error = sprintf(__("fatal: %s: rejected by %s hook\n" . + $hook_error . "\n" . + "warning: no patches were sent\n"), + $fn, $hook_name); + die $hook_error; } } diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 57fc10e7f82186..9ec7d75f0ff67c 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -539,7 +539,7 @@ test_expect_success $PREREQ "--validate respects relative core.hooksPath path" ' test_path_is_file my-hooks.ran && cat >expect <<-EOF && fatal: longline.patch: rejected by sendemail-validate hook - fatal: command '"'"'my-hooks/sendemail-validate'"'"' died with exit code 1 + fatal: command '"'"'git hook run sendemail-validate -- <patch>'"'"' died with exit code 1 warning: no patches were sent EOF test_cmp expect actual @@ -558,7 +558,7 @@ test_expect_success $PREREQ "--validate respects absolute core.hooksPath path" ' test_path_is_file my-hooks.ran && cat >expect <<-EOF && fatal: longline.patch: rejected by sendemail-validate hook - fatal: command '"'"'$hooks_path/sendemail-validate'"'"' died with exit code 1 + fatal: command '"'"'git hook run sendemail-validate -- <patch>'"'"' died with exit code 1 warning: no patches were sent EOF test_cmp expect actual From c4642909e217e0423949a6b0bd84c0acd1a9ea8b Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:42 +0200 Subject: [PATCH 143/198] git-p4: use 'git hook' to run hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of duplicating the behavior of run-command.h:run_hook_le() in Python, we can directly call 'git hook run'. We emulate the existence check with the --ignore-missing flag. As this is the last hook execution in git.git to not go through "git hook run" or the hook.[ch] library we can now be absolutely sure that our assertion in hook.c that only hooks known by the generated (from githooks(5)) hook-list.h are permitted. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- git-p4.py | 72 ++++++------------------------------------------------- 1 file changed, 7 insertions(+), 65 deletions(-) diff --git a/git-p4.py b/git-p4.py index 2b4500226aa7a4..1f24cbf0bca213 100755 --- a/git-p4.py +++ b/git-p4.py @@ -207,71 +207,13 @@ def decode_path(path): return path def run_git_hook(cmd, param=[]): - """Execute a hook if the hook exists.""" - if verbose: - sys.stderr.write("Looking for hook: %s\n" % cmd) - sys.stderr.flush() - - hooks_path = gitConfig("core.hooksPath") - if len(hooks_path) <= 0: - hooks_path = os.path.join(os.environ["GIT_DIR"], "hooks") - - if not isinstance(param, list): - param=[param] - - # resolve hook file name, OS depdenent - hook_file = os.path.join(hooks_path, cmd) - if platform.system() == 'Windows': - if not os.path.isfile(hook_file): - # look for the file with an extension - files = glob.glob(hook_file + ".*") - if not files: - return True - files.sort() - hook_file = files.pop() - while hook_file.upper().endswith(".SAMPLE"): - # The file is a sample hook. We don't want it - if len(files) > 0: - hook_file = files.pop() - else: - return True - - if not os.path.isfile(hook_file) or not os.access(hook_file, os.X_OK): - return True - - return run_hook_command(hook_file, param) == 0 - -def run_hook_command(cmd, param): - """Executes a git hook command - cmd = the command line file to be executed. This can be - a file that is run by OS association. - - param = a list of parameters to pass to the cmd command - - On windows, the extension is checked to see if it should - be run with the Git for Windows Bash shell. If there - is no file extension, the file is deemed a bash shell - and will be handed off to sh.exe. Otherwise, Windows - will be called with the shell to handle the file assocation. - - For non Windows operating systems, the file is called - as an executable. - """ - cli = [cmd] + param - use_shell = False - if platform.system() == 'Windows': - (root,ext) = os.path.splitext(cmd) - if ext == "": - exe_path = os.environ.get("EXEPATH") - if exe_path is None: - exe_path = "" - else: - exe_path = os.path.join(exe_path, "bin") - cli = [os.path.join(exe_path, "SH.EXE")] + cli - else: - use_shell = True - return subprocess.call(cli, shell=use_shell) - + """args are specified with -a <arg> -a <arg> -a <arg>""" + args = ['git', 'hook', 'run', '--ignore-missing', cmd] + if param: + args.append("--") + for p in param: + args.append(p) + return subprocess.call(args) == 0 def write_pipe(c, stdin): if verbose: From 66d2308d1f287362b00171c293a3e8b47671a1ec Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:43 +0200 Subject: [PATCH 144/198] commit: convert {pre-commit,prepare-commit-msg} hook to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move these hooks hook away from run-command.h to and over to the new hook.h library. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- commit.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/commit.c b/commit.c index 143f472c0f24bf..63d7943a86dd51 100644 --- a/commit.c +++ b/commit.c @@ -21,6 +21,7 @@ #include "commit-reach.h" #include "run-command.h" #include "shallow.h" +#include "hook.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -1698,22 +1699,22 @@ size_t ignore_non_trailer(const char *buf, size_t len) int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...) { - struct strvec hook_env = STRVEC_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; va_list args; - int ret; + const char *arg; - strvec_pushf(&hook_env, "GIT_INDEX_FILE=%s", index_file); + strvec_pushf(&opt.env, "GIT_INDEX_FILE=%s", index_file); /* * Let the hook know that no editor will be launched. */ if (!editor_is_used) - strvec_push(&hook_env, "GIT_EDITOR=:"); + strvec_push(&opt.env, "GIT_EDITOR=:"); va_start(args, name); - ret = run_hook_ve(hook_env.v, name, args); + while ((arg = va_arg(args, const char *))) + strvec_push(&opt.args, arg); va_end(args); - strvec_clear(&hook_env); - return ret; + return run_hooks_oneshot(name, &opt); } From b33b42e68951be3dd8056c61d7016edd32b1904a Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:44 +0200 Subject: [PATCH 145/198] read-cache: convert post-index-change to use hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the post-index-change hook away from run-command.h to and over to the new hook.h library. This removes the last direct user of run_hook_ve(), so we can make the function static now. It'll be removed entirely soon. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- read-cache.c | 10 +++++++--- run-command.c | 2 +- run-command.h | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/read-cache.c b/read-cache.c index c9e2b013972e17..90099ca14dfacd 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3068,6 +3068,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l { int ret; int was_full = !istate->sparse_index; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; ret = convert_to_sparse(istate); @@ -3096,9 +3097,12 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l else ret = close_lock_file_gently(lock); - run_hook_le(NULL, "post-index-change", - istate->updated_workdir ? "1" : "0", - istate->updated_skipworktree ? "1" : "0", NULL); + strvec_pushl(&hook_opt.args, + istate->updated_workdir ? "1" : "0", + istate->updated_skipworktree ? "1" : "0", + NULL); + run_hooks_oneshot("post-index-change", &hook_opt); + istate->updated_workdir = 0; istate->updated_skipworktree = 0; diff --git a/run-command.c b/run-command.c index 352f5be16465df..b4341ba1c7bcdb 100644 --- a/run-command.c +++ b/run-command.c @@ -1320,7 +1320,7 @@ int async_with_fork(void) #endif } -int run_hook_ve(const char *const *env, const char *name, va_list args) +static int run_hook_ve(const char *const *env, const char *name, va_list args) { struct child_process hook = CHILD_PROCESS_INIT; const char *p; diff --git a/run-command.h b/run-command.h index f76b740f927b3b..7a867d41217fea 100644 --- a/run-command.h +++ b/run-command.h @@ -219,7 +219,6 @@ int run_command(struct child_process *); */ LAST_ARG_MUST_BE_NULL int run_hook_le(const char *const *env, const char *name, ...); -int run_hook_ve(const char *const *env, const char *name, va_list args); /* * Trigger an auto-gc From 6e9209ee93dbefe856e9181b6e8ecac45531eb61 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:45 +0200 Subject: [PATCH 146/198] receive-pack: convert push-to-checkout hook to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the push-to-checkout hook away from run-command.h to and over to the new hook.h library. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/receive-pack.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 91fa799b66e407..a7d03bbc7d386d 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1435,9 +1435,12 @@ static const char *push_to_checkout(unsigned char *hash, struct strvec *env, const char *work_tree) { + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree)); - if (run_hook_le(env->v, push_to_checkout_hook, - hash_to_hex(hash), NULL)) + strvec_pushv(&opt.env, env->v); + strvec_push(&opt.args, hash_to_hex(hash)); + if (run_hooks_oneshot(push_to_checkout_hook, &opt)) return "push-to-checkout hook declined"; else return NULL; From 3b819b8bd5f51494ea9782c5f28fa633bee5127e Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:46 +0200 Subject: [PATCH 147/198] run-command: remove old run_hook_{le,ve}() hook API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new hook.h library has replaced all run-command.h hook-related functionality. So let's delete this dead code. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- run-command.c | 32 -------------------------------- run-command.h | 16 ---------------- 2 files changed, 48 deletions(-) diff --git a/run-command.c b/run-command.c index b4341ba1c7bcdb..1399243de8a3b3 100644 --- a/run-command.c +++ b/run-command.c @@ -1320,38 +1320,6 @@ int async_with_fork(void) #endif } -static int run_hook_ve(const char *const *env, const char *name, va_list args) -{ - struct child_process hook = CHILD_PROCESS_INIT; - const char *p; - - p = find_hook(name); - if (!p) - return 0; - - strvec_push(&hook.args, p); - while ((p = va_arg(args, const char *))) - strvec_push(&hook.args, p); - hook.env = env; - hook.no_stdin = 1; - hook.stdout_to_stderr = 1; - hook.trace2_hook_name = name; - - return run_command(&hook); -} - -int run_hook_le(const char *const *env, const char *name, ...) -{ - va_list args; - int ret; - - va_start(args, name); - ret = run_hook_ve(env, name, args); - va_end(args); - - return ret; -} - struct io_pump { /* initialized by caller */ int fd; diff --git a/run-command.h b/run-command.h index 7a867d41217fea..cfb6887e4ae3b1 100644 --- a/run-command.h +++ b/run-command.h @@ -204,22 +204,6 @@ int finish_command_in_signal(struct child_process *); */ int run_command(struct child_process *); -/** - * Run a hook. - * The first argument is a pathname to an index file, or NULL - * if the hook uses the default index file or no index is needed. - * The second argument is the name of the hook. - * The further arguments correspond to the hook arguments. - * The last argument has to be NULL to terminate the arguments list. - * If the hook does not exist or is not executable, the return - * value will be zero. - * If it is executable, the hook will be executed and the exit - * status of the hook is returned. - * On execution, .stdout_to_stderr and .no_stdin will be set. - */ -LAST_ARG_MUST_BE_NULL -int run_hook_le(const char *const *env, const char *name, ...); - /* * Trigger an auto-gc */ From fe4ce60e0dcf47a52d4cc5d593e0f5ee65b51b22 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:47 +0200 Subject: [PATCH 148/198] run-command: allow stdin for run_processes_parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it makes sense not to inherit stdin from the parent process to avoid deadlocking, it's not necessary to completely ban stdin to children. An informed user should be able to configure stdin safely. By setting `some_child.process.no_stdin=1` before calling `get_next_task()` we provide a reasonable default behavior but enable users to set up stdin streaming for themselves during the callback. `some_child.process.stdout_to_stderr`, however, remains unmodifiable by `get_next_task()` - the rest of the run_processes_parallel() API depends on child output in stderr. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- run-command.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/run-command.c b/run-command.c index 1399243de8a3b3..482ee2d76c69c4 100644 --- a/run-command.c +++ b/run-command.c @@ -1627,6 +1627,14 @@ static int pp_start_one(struct parallel_processes *pp) if (i == pp->max_processes) BUG("bookkeeping is hard"); + /* + * By default, do not inherit stdin from the parent process - otherwise, + * all children would share stdin! Users may overwrite this to provide + * something to the child's stdin by having their 'get_next_task' + * callback assign 0 to .no_stdin and an appropriate integer to .in. + */ + pp->children[i].process.no_stdin = 1; + code = pp->get_next_task(&pp->children[i].process, &pp->children[i].err, pp->data, @@ -1638,7 +1646,6 @@ static int pp_start_one(struct parallel_processes *pp) } pp->children[i].process.err = -1; pp->children[i].process.stdout_to_stderr = 1; - pp->children[i].process.no_stdin = 1; if (start_command(&pp->children[i].process)) { code = pp->start_failure(&pp->children[i].err, From 4787177bdd8e4a6e7d59bc124917448539cf47bb Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:48 +0200 Subject: [PATCH 149/198] hook: support passing stdin to hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some hooks (such as post-rewrite) need to take input via stdin. Previously, callers provided stdin to hooks by setting run-command.h:child_process.in, which takes a FD. Callers would open the file in question themselves before calling run-command(). However, since we will now need to seek to the front of the file and read it again for every hook which runs, hook.h:run_command() takes a path and handles FD management itself. Since this file is opened for read only, it should not prevent later parallel execution support. On the frontend, this is supported by asking for a file path, rather than by reading stdin. Reading directly from stdin would involve caching the entire stdin (to memory or to disk) and reading it back from the beginning to each hook. We'd want to support cases like insufficient memory or storage for the file. While this may prove useful later, for now the path of least resistance is to just ask the user to make this interim file themselves. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-hook.txt | 7 ++++++- builtin/hook.c | 4 +++- hook.c | 8 +++++++- hook.h | 3 +++ t/t1800-hook.sh | 18 ++++++++++++++++++ 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/Documentation/git-hook.txt b/Documentation/git-hook.txt index 097fb9de63bdf7..fa68c1f3912389 100644 --- a/Documentation/git-hook.txt +++ b/Documentation/git-hook.txt @@ -8,7 +8,7 @@ git-hook - run git hooks SYNOPSIS -------- [verse] -'git hook' run [--ignore-missing] <hook-name> [-- <hook-args>] +'git hook' run [--to-stdin=<path>] [--ignore-missing] <hook-name> [-- <hook-args>] DESCRIPTION ----------- @@ -32,6 +32,11 @@ what those are. OPTIONS ------- +--to-stdin:: + For "run"; Specify a file which will be streamed into the + hook's stdin. The hook will receive the entire file from + beginning to EOF. + --ignore-missing:: Ignore any missing hook by quietly returning zero. Used for tools that want to do a blind one-shot run of a hook that may diff --git a/builtin/hook.c b/builtin/hook.c index f33db9953c72bd..27dce6a2f0ec3c 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -7,7 +7,7 @@ #include "strvec.h" #define BUILTIN_HOOK_RUN_USAGE \ - N_("git hook run [--ignore-missing] <hook-name> [-- <hook-args>]") + N_("git hook run [--ignore-missing] [--to-stdin=<path>] <hook-name> [-- <hook-args>]") static const char * const builtin_hook_usage[] = { BUILTIN_HOOK_RUN_USAGE, @@ -29,6 +29,8 @@ static int run(int argc, const char **argv, const char *prefix) struct option run_options[] = { OPT_BOOL(0, "ignore-missing", &ignore_missing, N_("exit quietly with a zero exit code if the requested hook cannot be found")), + OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"), + N_("file to read into hooks' stdin")), OPT_END(), }; int ret; diff --git a/hook.c b/hook.c index 5ecf79c83977a6..63c9a60921f856 100644 --- a/hook.c +++ b/hook.c @@ -88,7 +88,13 @@ static int pick_next_hook(struct child_process *cp, if (!run_me) return 0; - cp->no_stdin = 1; + /* reopen the file for stdin; run_command closes it. */ + if (hook_cb->options->path_to_stdin) { + cp->no_stdin = 0; + cp->in = xopen(hook_cb->options->path_to_stdin, O_RDONLY); + } else { + cp->no_stdin = 1; + } cp->env = hook_cb->options->env.v; cp->stdout_to_stderr = 1; cp->trace2_hook_name = hook_cb->hook_name; diff --git a/hook.h b/hook.h index 53ea3a9649efa2..cd6a68a3b505bb 100644 --- a/hook.h +++ b/hook.h @@ -36,6 +36,9 @@ struct run_hooks_opt /* Path to initial working directory for subprocess */ const char *dir; + + /* Path to file which should be piped to stdin for each hook */ + const char *path_to_stdin; }; #define RUN_HOOKS_OPT_INIT { \ diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 49df5a2cdfb278..217db848b3f22c 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -135,4 +135,22 @@ test_expect_success 'git -c core.hooksPath=<PATH> hook run' ' test_cmp expect actual ' +test_expect_success 'stdin to hooks' ' + write_script .git/hooks/test-hook <<-\EOF && + echo BEGIN stdin + cat + echo END stdin + EOF + + cat >expect <<-EOF && + BEGIN stdin + hello + END stdin + EOF + + echo hello >input && + git hook run --to-stdin=input test-hook 2>actual && + test_cmp expect actual +' + test_done From 580bc69cb6581a7f343e0f8264a9a113f6c1901a Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:49 +0200 Subject: [PATCH 150/198] am: convert 'post-rewrite' hook to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/am.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index e444b18b64a34f..9e3d4d9ab446a7 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -467,24 +467,12 @@ static int run_applypatch_msg_hook(struct am_state *state) */ static int run_post_rewrite_hook(const struct am_state *state) { - struct child_process cp = CHILD_PROCESS_INIT; - const char *hook = find_hook("post-rewrite"); - int ret; - - if (!hook) - return 0; - - strvec_push(&cp.args, hook); - strvec_push(&cp.args, "rebase"); - - cp.in = xopen(am_path(state, "rewritten"), O_RDONLY); - cp.stdout_to_stderr = 1; - cp.trace2_hook_name = "post-rewrite"; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; - ret = run_command(&cp); + strvec_push(&opt.args, "rebase"); + opt.path_to_stdin = am_path(state, "rewritten"); - close(cp.in); - return ret; + return run_hooks_oneshot("post-rewrite", &opt); } /** From c623a9a2e6f6ab30c42bd0df35eca562e091883e Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:50 +0200 Subject: [PATCH 151/198] run-command: add stdin callback for parallelization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a user of the run_processes_parallel() API wants to pipe a large amount of information to stdin of each parallel command, that information could exceed the buffer of the pipe allocated for that process's stdin. Generally this is solved by repeatedly writing to child_process.in between calls to start_command() and finish_command(); run_processes_parallel() did not provide users an opportunity to access child_process at that time. Because the data might be extremely large (for example, a list of all refs received during a push from a client) simply taking a string_list or strbuf is not as scalable as using a callback; the rest of the run_processes_parallel() API also uses callbacks, so making this feature match the rest of the API reduces mental load on the user. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fetch.c | 1 + builtin/submodule--helper.c | 2 +- hook.c | 1 + run-command.c | 54 +++++++++++++++++++++++++++++++++++-- run-command.h | 17 +++++++++++- submodule.c | 1 + t/helper/test-run-command.c | 31 ++++++++++++++++++--- t/t0061-run-command.sh | 30 +++++++++++++++++++++ 8 files changed, 129 insertions(+), 8 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 25740c13df1bf8..fef6e85d003d48 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1817,6 +1817,7 @@ static int fetch_multiple(struct string_list *list, int max_children) result = run_processes_parallel_tr2(max_children, &fetch_next_remote, &fetch_failed_to_start, + NULL, &fetch_finished, &state, "fetch", "parallel/fetch"); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index f73963ad67da33..f42ded548bffb6 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2294,7 +2294,7 @@ static int update_submodules(struct submodule_update_clone *suc) int i; run_processes_parallel_tr2(suc->max_jobs, update_clone_get_next_task, - update_clone_start_failure, + update_clone_start_failure, NULL, update_clone_task_finished, suc, "submodule", "parallel/update"); diff --git a/hook.c b/hook.c index 63c9a60921f856..a7462639d97b49 100644 --- a/hook.c +++ b/hook.c @@ -175,6 +175,7 @@ int run_hooks(const char *hook_name, const char *hook_path, run_processes_parallel_tr2(jobs, pick_next_hook, notify_start_failure, + NULL, notify_hook_finished, &cb_data, "hook", diff --git a/run-command.c b/run-command.c index 482ee2d76c69c4..f1616858d18eaa 100644 --- a/run-command.c +++ b/run-command.c @@ -1492,6 +1492,7 @@ struct parallel_processes { get_next_task_fn get_next_task; start_failure_fn start_failure; + feed_pipe_fn feed_pipe; task_finished_fn task_finished; struct { @@ -1519,6 +1520,13 @@ static int default_start_failure(struct strbuf *out, return 0; } +static int default_feed_pipe(struct strbuf *pipe, + void *pp_cb, + void *pp_task_cb) +{ + return 1; +} + static int default_task_finished(int result, struct strbuf *out, void *pp_cb, @@ -1549,6 +1557,7 @@ static void pp_init(struct parallel_processes *pp, int n, get_next_task_fn get_next_task, start_failure_fn start_failure, + feed_pipe_fn feed_pipe, task_finished_fn task_finished, void *data) { @@ -1567,6 +1576,7 @@ static void pp_init(struct parallel_processes *pp, pp->get_next_task = get_next_task; pp->start_failure = start_failure ? start_failure : default_start_failure; + pp->feed_pipe = feed_pipe ? feed_pipe : default_feed_pipe; pp->task_finished = task_finished ? task_finished : default_task_finished; pp->nr_processes = 0; @@ -1664,6 +1674,37 @@ static int pp_start_one(struct parallel_processes *pp) return 0; } +static void pp_buffer_stdin(struct parallel_processes *pp) +{ + int i; + struct strbuf sb = STRBUF_INIT; + + /* Buffer stdin for each pipe. */ + for (i = 0; i < pp->max_processes; i++) { + if (pp->children[i].state == GIT_CP_WORKING && + pp->children[i].process.in > 0) { + int done; + strbuf_reset(&sb); + done = pp->feed_pipe(&sb, pp->data, + pp->children[i].data); + if (sb.len) { + if (write_in_full(pp->children[i].process.in, + sb.buf, sb.len) < 0) { + if (errno != EPIPE) + die_errno("write"); + done = 1; + } + } + if (done) { + close(pp->children[i].process.in); + pp->children[i].process.in = 0; + } + } + } + + strbuf_release(&sb); +} + static void pp_buffer_stderr(struct parallel_processes *pp, int output_timeout) { int i; @@ -1728,6 +1769,7 @@ static int pp_collect_finished(struct parallel_processes *pp) pp->nr_processes--; pp->children[i].state = GIT_CP_FREE; pp->pfd[i].fd = -1; + pp->children[i].process.in = 0; child_process_init(&pp->children[i].process); if (i != pp->output_owner) { @@ -1761,6 +1803,7 @@ static int pp_collect_finished(struct parallel_processes *pp) int run_processes_parallel(int n, get_next_task_fn get_next_task, start_failure_fn start_failure, + feed_pipe_fn feed_pipe, task_finished_fn task_finished, void *pp_cb) { @@ -1769,7 +1812,9 @@ int run_processes_parallel(int n, int spawn_cap = 4; struct parallel_processes pp; - pp_init(&pp, n, get_next_task, start_failure, task_finished, pp_cb); + sigchain_push(SIGPIPE, SIG_IGN); + + pp_init(&pp, n, get_next_task, start_failure, feed_pipe, task_finished, pp_cb); while (1) { for (i = 0; i < spawn_cap && !pp.shutdown && @@ -1786,6 +1831,7 @@ int run_processes_parallel(int n, } if (!pp.nr_processes) break; + pp_buffer_stdin(&pp); pp_buffer_stderr(&pp, output_timeout); pp_output(&pp); code = pp_collect_finished(&pp); @@ -1797,11 +1843,15 @@ int run_processes_parallel(int n, } pp_cleanup(&pp); + + sigchain_pop(SIGPIPE); + return 0; } int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task, start_failure_fn start_failure, + feed_pipe_fn feed_pipe, task_finished_fn task_finished, void *pp_cb, const char *tr2_category, const char *tr2_label) { @@ -1811,7 +1861,7 @@ int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task, ((n < 1) ? online_cpus() : n)); result = run_processes_parallel(n, get_next_task, start_failure, - task_finished, pp_cb); + feed_pipe, task_finished, pp_cb); trace2_region_leave(tr2_category, tr2_label, NULL); diff --git a/run-command.h b/run-command.h index cfb6887e4ae3b1..80d394664aec2a 100644 --- a/run-command.h +++ b/run-command.h @@ -422,6 +422,20 @@ typedef int (*start_failure_fn)(struct strbuf *out, void *pp_cb, void *pp_task_cb); +/** + * This callback is called repeatedly on every child process who requests + * start_command() to create a pipe by setting child_process.in < 0. + * + * pp_cb is the callback cookie as passed into run_processes_parallel, and + * pp_task_cb is the callback cookie as passed into get_next_task_fn. + * The contents of 'send' will be read into the pipe and passed to the pipe. + * + * Return nonzero to close the pipe. + */ +typedef int (*feed_pipe_fn)(struct strbuf *pipe, + void *pp_cb, + void *pp_task_cb); + /** * This callback is called on every child process that finished processing. * @@ -456,10 +470,11 @@ typedef int (*task_finished_fn)(int result, int run_processes_parallel(int n, get_next_task_fn, start_failure_fn, + feed_pipe_fn, task_finished_fn, void *pp_cb); int run_processes_parallel_tr2(int n, get_next_task_fn, start_failure_fn, - task_finished_fn, void *pp_cb, + feed_pipe_fn, task_finished_fn, void *pp_cb, const char *tr2_category, const char *tr2_label); /** diff --git a/submodule.c b/submodule.c index 8e611fe1dbf1f7..db1700a502dcd2 100644 --- a/submodule.c +++ b/submodule.c @@ -1632,6 +1632,7 @@ int fetch_populated_submodules(struct repository *r, run_processes_parallel_tr2(max_parallel_jobs, get_next_submodule, fetch_start_failure, + NULL, fetch_finish, &spf, "submodule", "parallel/fetch"); diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index 7ae03dc7123468..9348184d303eab 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -32,8 +32,13 @@ static int parallel_next(struct child_process *cp, return 0; strvec_pushv(&cp->args, d->argv); + cp->in = d->in; + cp->no_stdin = d->no_stdin; strbuf_addstr(err, "preloaded output of a child\n"); number_callbacks++; + + *task_cb = xmalloc(sizeof(int)); + *(int*)(*task_cb) = 2; return 1; } @@ -55,6 +60,17 @@ static int task_finished(int result, return 1; } +static int test_stdin(struct strbuf *pipe, void *cb, void *task_cb) +{ + int *lines_remaining = task_cb; + + if (*lines_remaining) + strbuf_addf(pipe, "sample stdin %d\n", --(*lines_remaining)); + + return !(*lines_remaining); +} + + struct testsuite { struct string_list tests, failed; int next; @@ -185,7 +201,7 @@ static int testsuite(int argc, const char **argv) suite.tests.nr, max_jobs); ret = run_processes_parallel(max_jobs, next_test, test_failed, - test_finished, &suite); + test_stdin, test_finished, &suite); if (suite.failed.nr > 0) { ret = 1; @@ -413,15 +429,22 @@ int cmd__run_command(int argc, const char **argv) if (!strcmp(argv[1], "run-command-parallel")) exit(run_processes_parallel(jobs, parallel_next, - NULL, NULL, &proc)); + NULL, NULL, NULL, &proc)); if (!strcmp(argv[1], "run-command-abort")) exit(run_processes_parallel(jobs, parallel_next, - NULL, task_finished, &proc)); + NULL, NULL, task_finished, &proc)); if (!strcmp(argv[1], "run-command-no-jobs")) exit(run_processes_parallel(jobs, no_job, - NULL, task_finished, &proc)); + NULL, NULL, task_finished, &proc)); + + if (!strcmp(argv[1], "run-command-stdin")) { + proc.in = -1; + proc.no_stdin = 0; + exit (run_processes_parallel(jobs, parallel_next, NULL, + test_stdin, NULL, &proc)); + } fprintf(stderr, "check usage\n"); return 1; diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh index 7d599675e35a75..87759482ad111e 100755 --- a/t/t0061-run-command.sh +++ b/t/t0061-run-command.sh @@ -143,6 +143,36 @@ test_expect_success 'run_command runs in parallel with more tasks than jobs avai test_cmp expect actual ' +cat >expect <<-EOF +preloaded output of a child +listening for stdin: +sample stdin 1 +sample stdin 0 +preloaded output of a child +listening for stdin: +sample stdin 1 +sample stdin 0 +preloaded output of a child +listening for stdin: +sample stdin 1 +sample stdin 0 +preloaded output of a child +listening for stdin: +sample stdin 1 +sample stdin 0 +EOF + +test_expect_success 'run_command listens to stdin' ' + write_script stdin-script <<-\EOF && + echo "listening for stdin:" + while read line; do + echo "$line" + done + EOF + test-tool run-command run-command-stdin 2 ./stdin-script 2>actual && + test_cmp expect actual +' + cat >expect <<-EOF preloaded output of a child asking for a quick stop From c848767a58e71c35135f6683d87f083af49408d1 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:51 +0200 Subject: [PATCH 152/198] hook: provide stdin by string_list or callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where a hook requires only a small amount of information via stdin, it should be simple for users to provide a string_list alone. But in more complicated cases where the stdin is too large to hold in memory, let's instead provide a callback the users can populate line after line. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- hook.c | 33 ++++++++++++++++++++++++++++++++- hook.h | 27 +++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/hook.c b/hook.c index a7462639d97b49..44e73827800813 100644 --- a/hook.c +++ b/hook.c @@ -77,6 +77,29 @@ void run_hooks_opt_clear(struct run_hooks_opt *o) strvec_clear(&o->args); } +int pipe_from_string_list(struct strbuf *pipe, void *pp_cb, void *pp_task_cb) +{ + int *item_idx; + struct hook *ctx = pp_task_cb; + struct hook_cb_data *hook_cb = pp_cb; + struct string_list *to_pipe = hook_cb->options->feed_pipe_ctx; + + /* Bootstrap the state manager if necessary. */ + if (!ctx->feed_pipe_cb_data) { + ctx->feed_pipe_cb_data = xmalloc(sizeof(unsigned int)); + *(int*)ctx->feed_pipe_cb_data = 0; + } + + item_idx = ctx->feed_pipe_cb_data; + + if (*item_idx < to_pipe->nr) { + strbuf_addf(pipe, "%s\n", to_pipe->items[*item_idx].string); + (*item_idx)++; + return 0; + } + return 1; +} + static int pick_next_hook(struct child_process *cp, struct strbuf *out, void *pp_cb, @@ -92,6 +115,10 @@ static int pick_next_hook(struct child_process *cp, if (hook_cb->options->path_to_stdin) { cp->no_stdin = 0; cp->in = xopen(hook_cb->options->path_to_stdin, O_RDONLY); + } else if (hook_cb->options->feed_pipe) { + /* ask for start_command() to make a pipe for us */ + cp->in = -1; + cp->no_stdin = 0; } else { cp->no_stdin = 1; } @@ -175,7 +202,7 @@ int run_hooks(const char *hook_name, const char *hook_path, run_processes_parallel_tr2(jobs, pick_next_hook, notify_start_failure, - NULL, + options->feed_pipe, notify_hook_finished, &cb_data, "hook", @@ -183,6 +210,7 @@ int run_hooks(const char *hook_name, const char *hook_path, if (options->absolute_path) strbuf_release(&abs_path); + free(my_hook.feed_pipe_cb_data); return cb_data.rc; } @@ -196,6 +224,9 @@ int run_hooks_oneshot(const char *hook_name, struct run_hooks_opt *options) if (!options) options = &hook_opt_scratch; + if (options->path_to_stdin && options->feed_pipe) + BUG("choose only one method to populate stdin"); + hook_path = find_hook(hook_name); if (!hook_path) { ret = 0; diff --git a/hook.h b/hook.h index cd6a68a3b505bb..b55f283f90bce9 100644 --- a/hook.h +++ b/hook.h @@ -19,6 +19,12 @@ int hook_exists(const char *hookname); struct hook { /* The path to the hook */ const char *hook_path; + + /* + * Use this to keep state for your feed_pipe_fn if you are using + * run_hooks_opt.feed_pipe. Otherwise, do not touch it. + */ + void *feed_pipe_cb_data; }; struct run_hooks_opt @@ -39,6 +45,19 @@ struct run_hooks_opt /* Path to file which should be piped to stdin for each hook */ const char *path_to_stdin; + + /* + * Callback and state pointer to ask for more content to pipe to stdin. + * Will be called repeatedly, for each hook. See + * hook.c:pipe_from_stdin() for an example. Keep per-hook state in + * hook.feed_pipe_cb_data (per process). Keep initialization context in + * feed_pipe_ctx (shared by all processes). + * + * See 'pipe_from_string_list()' for info about how to specify a + * string_list as the stdin input instead of writing your own handler. + */ + feed_pipe_fn feed_pipe; + void *feed_pipe_ctx; }; #define RUN_HOOKS_OPT_INIT { \ @@ -46,6 +65,14 @@ struct run_hooks_opt .args = STRVEC_INIT, \ } +/* + * To specify a 'struct string_list', set 'run_hooks_opt.feed_pipe_ctx' to the + * string_list and set 'run_hooks_opt.feed_pipe' to 'pipe_from_string_list()'. + * This will pipe each string in the list to stdin, separated by newlines. (Do + * not inject your own newlines.) + */ +int pipe_from_string_list(struct strbuf *pipe, void *pp_cb, void *pp_task_cb); + /* * Callback provided to feed_pipe_fn and consume_sideband_fn. */ From 73edbd92757b086e5e184481618afe5e0969311e Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:52 +0200 Subject: [PATCH 153/198] hook: convert 'post-rewrite' hook in sequencer.c to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using 'hook.h' for 'post-rewrite', we simplify hook invocations by not needing to put together our own 'struct child_process'. The signal handling that's being removed by this commit now takes place in run-command.h:run_processes_parallel(), so it is OK to remove them here. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- sequencer.c | 79 ++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/sequencer.c b/sequencer.c index 9aac08c15451a6..77f809c00e46e6 100644 --- a/sequencer.c +++ b/sequencer.c @@ -35,6 +35,7 @@ #include "commit-reach.h" #include "rebase-interactive.h" #include "reset.h" +#include "string-list.h" #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION" @@ -1147,33 +1148,27 @@ int update_head_with_reflog(const struct commit *old_head, static int run_rewrite_hook(const struct object_id *oldoid, const struct object_id *newoid) { - struct child_process proc = CHILD_PROCESS_INIT; - const char *argv[3]; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct strbuf tmp = STRBUF_INIT; + struct string_list to_stdin = STRING_LIST_INIT_DUP; int code; - struct strbuf sb = STRBUF_INIT; - argv[0] = find_hook("post-rewrite"); - if (!argv[0]) - return 0; + strvec_push(&opt.args, "amend"); - argv[1] = "amend"; - argv[2] = NULL; - - proc.argv = argv; - proc.in = -1; - proc.stdout_to_stderr = 1; - proc.trace2_hook_name = "post-rewrite"; - - code = start_command(&proc); - if (code) - return code; - strbuf_addf(&sb, "%s %s\n", oid_to_hex(oldoid), oid_to_hex(newoid)); - sigchain_push(SIGPIPE, SIG_IGN); - write_in_full(proc.in, sb.buf, sb.len); - close(proc.in); - strbuf_release(&sb); - sigchain_pop(SIGPIPE); - return finish_command(&proc); + strbuf_addf(&tmp, + "%s %s", + oid_to_hex(oldoid), + oid_to_hex(newoid)); + string_list_append(&to_stdin, tmp.buf); + + opt.feed_pipe = pipe_from_string_list; + opt.feed_pipe_ctx = &to_stdin; + + code = run_hooks_oneshot("post-rewrite", &opt); + + strbuf_release(&tmp); + string_list_clear(&to_stdin, 0); + return code; } void commit_post_rewrite(struct repository *r, @@ -4526,30 +4521,20 @@ static int pick_commits(struct repository *r, flush_rewritten_pending(); if (!stat(rebase_path_rewritten_list(), &st) && st.st_size > 0) { - struct child_process child = CHILD_PROCESS_INIT; - const char *post_rewrite_hook = - find_hook("post-rewrite"); - - child.in = open(rebase_path_rewritten_list(), O_RDONLY); - child.git_cmd = 1; - strvec_push(&child.args, "notes"); - strvec_push(&child.args, "copy"); - strvec_push(&child.args, "--for-rewrite=rebase"); + struct child_process notes_cp = CHILD_PROCESS_INIT; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT; + + notes_cp.in = open(rebase_path_rewritten_list(), O_RDONLY); + notes_cp.git_cmd = 1; + strvec_push(¬es_cp.args, "notes"); + strvec_push(¬es_cp.args, "copy"); + strvec_push(¬es_cp.args, "--for-rewrite=rebase"); /* we don't care if this copying failed */ - run_command(&child); - - if (post_rewrite_hook) { - struct child_process hook = CHILD_PROCESS_INIT; - - hook.in = open(rebase_path_rewritten_list(), - O_RDONLY); - hook.stdout_to_stderr = 1; - hook.trace2_hook_name = "post-rewrite"; - strvec_push(&hook.args, post_rewrite_hook); - strvec_push(&hook.args, "rebase"); - /* we don't care if this hook failed */ - run_command(&hook); - } + run_command(¬es_cp); + + hook_opt.path_to_stdin = rebase_path_rewritten_list(); + strvec_push(&hook_opt.args, "rebase"); + run_hooks_oneshot("post-rewrite", &hook_opt); } apply_autostash(rebase_path_autostash()); From c5f26cb3d5e4fce09d740c4d746164e2a1953b47 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:53 +0200 Subject: [PATCH 154/198] transport: convert pre-push hook to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the pre-push hook away from run-command.h to and over to the new hook.h library. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- transport.c | 56 ++++++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 42 deletions(-) diff --git a/transport.c b/transport.c index 77e196f75f551d..4ca8fc0391d274 100644 --- a/transport.c +++ b/transport.c @@ -1203,63 +1203,35 @@ static void die_with_unpushed_submodules(struct string_list *needs_pushing) static int run_pre_push_hook(struct transport *transport, struct ref *remote_refs) { - int ret = 0, x; + int ret = 0; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; struct ref *r; - struct child_process proc = CHILD_PROCESS_INIT; - struct strbuf buf; - const char *argv[4]; - - if (!(argv[0] = find_hook("pre-push"))) - return 0; - - argv[1] = transport->remote->name; - argv[2] = transport->url; - argv[3] = NULL; - - proc.argv = argv; - proc.in = -1; - proc.trace2_hook_name = "pre-push"; - - if (start_command(&proc)) { - finish_command(&proc); - return -1; - } + struct string_list to_stdin = STRING_LIST_INIT_NODUP; - sigchain_push(SIGPIPE, SIG_IGN); - - strbuf_init(&buf, 256); + strvec_push(&opt.args, transport->remote->name); + strvec_push(&opt.args, transport->url); for (r = remote_refs; r; r = r->next) { + struct strbuf buf = STRBUF_INIT; + if (!r->peer_ref) continue; if (r->status == REF_STATUS_REJECT_NONFASTFORWARD) continue; if (r->status == REF_STATUS_REJECT_STALE) continue; if (r->status == REF_STATUS_REJECT_REMOTE_UPDATED) continue; if (r->status == REF_STATUS_UPTODATE) continue; - strbuf_reset(&buf); - strbuf_addf( &buf, "%s %s %s %s\n", + strbuf_addf(&buf, "%s %s %s %s", r->peer_ref->name, oid_to_hex(&r->new_oid), r->name, oid_to_hex(&r->old_oid)); - - if (write_in_full(proc.in, buf.buf, buf.len) < 0) { - /* We do not mind if a hook does not read all refs. */ - if (errno != EPIPE) - ret = -1; - break; - } + string_list_append(&to_stdin, strbuf_detach(&buf, NULL)); } - strbuf_release(&buf); - - x = close(proc.in); - if (!ret) - ret = x; - - sigchain_pop(SIGPIPE); + opt.feed_pipe = pipe_from_string_list; + opt.feed_pipe_ctx = &to_stdin; - x = finish_command(&proc); - if (!ret) - ret = x; + ret = run_hooks_oneshot("pre-push", &opt); + to_stdin.strdup_strings = 1; + string_list_clear(&to_stdin, 0); return ret; } From 09dfbb9151f3c2462fff6ac9b0166901cf3cbfbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:54 +0200 Subject: [PATCH 155/198] hook tests: test for exact "pre-push" hook input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the tests added in ec55559f937 (push: Add support for pre-push hooks, 2013-01-13) to exhaustively test for the exact input we're expecting. This helps a parallel series that's refactoring how the hook is called, to e.g. make sure that we don't miss a trailing newline. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5571-pre-push-hook.sh | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/t/t5571-pre-push-hook.sh b/t/t5571-pre-push-hook.sh index ad8d5804f7b7df..d2857a6fbc07df 100755 --- a/t/t5571-pre-push-hook.sh +++ b/t/t5571-pre-push-hook.sh @@ -11,7 +11,7 @@ HOOKDIR="$(git rev-parse --git-dir)/hooks" HOOK="$HOOKDIR/pre-push" mkdir -p "$HOOKDIR" write_script "$HOOK" <<EOF -cat >/dev/null +cat >actual exit 0 EOF @@ -20,10 +20,16 @@ test_expect_success 'setup' ' git init --bare repo1 && git remote add parent1 repo1 && test_commit one && - git push parent1 HEAD:foreign + cat >expect <<-EOF && + HEAD $(git rev-parse HEAD) refs/heads/foreign $(test_oid zero) + EOF + + test_when_finished "rm actual" && + git push parent1 HEAD:foreign && + test_cmp expect actual ' write_script "$HOOK" <<EOF -cat >/dev/null +cat >actual exit 1 EOF @@ -32,11 +38,18 @@ export COMMIT1 test_expect_success 'push with failing hook' ' test_commit two && - test_must_fail git push parent1 HEAD + cat >expect <<-EOF && + HEAD $(git rev-parse HEAD) refs/heads/main $(test_oid zero) + EOF + + test_when_finished "rm actual" && + test_must_fail git push parent1 HEAD && + test_cmp expect actual ' test_expect_success '--no-verify bypasses hook' ' - git push --no-verify parent1 HEAD + git push --no-verify parent1 HEAD && + test_path_is_missing actual ' COMMIT2="$(git rev-parse HEAD)" From 33f69adb1b8b7c8d4aa4ba2c67b6d99be68581d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:38:55 +0200 Subject: [PATCH 156/198] hook tests: use a modern style for "pre-push" tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Indent the here-docs and use "test_cmp" instead of "diff" in tests added in ec55559f937 (push: Add support for pre-push hooks, 2013-01-13). Let's also use the more typical "expect" instead of "expected" to be consistent with the rest of the test file. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5571-pre-push-hook.sh | 71 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/t/t5571-pre-push-hook.sh b/t/t5571-pre-push-hook.sh index d2857a6fbc07df..6d0d5b854ea3bb 100755 --- a/t/t5571-pre-push-hook.sh +++ b/t/t5571-pre-push-hook.sh @@ -61,15 +61,15 @@ echo "$2" >>actual cat >>actual EOF -cat >expected <<EOF -parent1 -repo1 -refs/heads/main $COMMIT2 refs/heads/foreign $COMMIT1 -EOF - test_expect_success 'push with hook' ' + cat >expected <<-EOF && + parent1 + repo1 + refs/heads/main $COMMIT2 refs/heads/foreign $COMMIT1 + EOF + git push parent1 main:foreign && - diff expected actual + test_cmp expected actual ' test_expect_success 'add a branch' ' @@ -80,49 +80,48 @@ test_expect_success 'add a branch' ' COMMIT3="$(git rev-parse HEAD)" export COMMIT3 -cat >expected <<EOF -parent1 -repo1 -refs/heads/other $COMMIT3 refs/heads/foreign $COMMIT2 -EOF - test_expect_success 'push to default' ' + cat >expect <<-EOF && + parent1 + repo1 + refs/heads/other $COMMIT3 refs/heads/foreign $COMMIT2 + EOF git push && - diff expected actual + test_cmp expect actual ' -cat >expected <<EOF -parent1 -repo1 -refs/tags/one $COMMIT1 refs/tags/tag1 $ZERO_OID -HEAD~ $COMMIT2 refs/heads/prev $ZERO_OID -EOF - test_expect_success 'push non-branches' ' + cat >expect <<-EOF && + parent1 + repo1 + refs/tags/one $COMMIT1 refs/tags/tag1 $ZERO_OID + HEAD~ $COMMIT2 refs/heads/prev $ZERO_OID + EOF + git push parent1 one:tag1 HEAD~:refs/heads/prev && - diff expected actual + test_cmp expect actual ' -cat >expected <<EOF -parent1 -repo1 -(delete) $ZERO_OID refs/heads/prev $COMMIT2 -EOF - test_expect_success 'push delete' ' + cat >expect <<-EOF && + parent1 + repo1 + (delete) $ZERO_OID refs/heads/prev $COMMIT2 + EOF + git push parent1 :prev && - diff expected actual + test_cmp expect actual ' -cat >expected <<EOF -repo1 -repo1 -HEAD $COMMIT3 refs/heads/other $ZERO_OID -EOF - test_expect_success 'push to URL' ' + cat >expect <<-EOF && + repo1 + repo1 + HEAD $COMMIT3 refs/heads/other $ZERO_OID + EOF + git push repo1 HEAD && - diff expected actual + test_cmp expect actual ' test_expect_success 'set up many-ref tests' ' From 2832b22fbecc29c6eb1fa27acca0c3474382e8c0 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:56 +0200 Subject: [PATCH 157/198] reference-transaction: use hook.h to run hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/refs.c b/refs.c index 6211692eaaea53..73d4a9392673fa 100644 --- a/refs.c +++ b/refs.c @@ -2062,47 +2062,33 @@ int ref_update_reject_duplicates(struct string_list *refnames, static int run_transaction_hook(struct ref_transaction *transaction, const char *state) { - struct child_process proc = CHILD_PROCESS_INIT; - struct strbuf buf = STRBUF_INIT; - const char *hook; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct string_list to_stdin = STRING_LIST_INIT_NODUP; int ret = 0, i; - hook = find_hook("reference-transaction"); - if (!hook) - return ret; - - strvec_pushl(&proc.args, hook, state, NULL); - proc.in = -1; - proc.stdout_to_stderr = 1; - proc.trace2_hook_name = "reference-transaction"; - - ret = start_command(&proc); - if (ret) + if (!hook_exists("reference-transaction")) return ret; - sigchain_push(SIGPIPE, SIG_IGN); + strvec_push(&opt.args, state); for (i = 0; i < transaction->nr; i++) { struct ref_update *update = transaction->updates[i]; + struct strbuf buf = STRBUF_INIT; - strbuf_reset(&buf); - strbuf_addf(&buf, "%s %s %s\n", + strbuf_addf(&buf, "%s %s %s", oid_to_hex(&update->old_oid), oid_to_hex(&update->new_oid), update->refname); - - if (write_in_full(proc.in, buf.buf, buf.len) < 0) { - if (errno != EPIPE) - ret = -1; - break; - } + string_list_append(&to_stdin, strbuf_detach(&buf, NULL)); } - close(proc.in); - sigchain_pop(SIGPIPE); - strbuf_release(&buf); + opt.feed_pipe = pipe_from_string_list; + opt.feed_pipe_ctx = &to_stdin; + + ret = run_hooks_oneshot("reference-transaction", &opt); + to_stdin.strdup_strings = 1; + string_list_clear(&to_stdin, 0); - ret |= finish_command(&proc); return ret; } From 2a7f82cc5caf324a0a501ba3d1bfb25256204988 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:57 +0200 Subject: [PATCH 158/198] run-command: allow capturing of collated output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some callers, for example server-side hooks which wish to relay hook output to clients across a transport, want to capture what would normally print to stderr and do something else with it. Allow that via a callback. By calling the callback regardless of whether there's output available, we allow clients to send e.g. a keepalive if necessary. Because we expose a strbuf, not a fd or FILE*, there's no need to create a temporary pipe or similar - we can just skip the print to stderr and instead hand it to the caller. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fetch.c | 2 +- builtin/submodule--helper.c | 2 +- hook.c | 1 + run-command.c | 33 +++++++++++++++++++++++++-------- run-command.h | 18 +++++++++++++++++- submodule.c | 2 +- t/helper/test-run-command.c | 25 ++++++++++++++++++++----- t/t0061-run-command.sh | 7 +++++++ 8 files changed, 73 insertions(+), 17 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index fef6e85d003d48..de14df5085c0cb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1817,7 +1817,7 @@ static int fetch_multiple(struct string_list *list, int max_children) result = run_processes_parallel_tr2(max_children, &fetch_next_remote, &fetch_failed_to_start, - NULL, + NULL, NULL, &fetch_finished, &state, "fetch", "parallel/fetch"); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index f42ded548bffb6..e5e5a8bcfb58e3 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2294,7 +2294,7 @@ static int update_submodules(struct submodule_update_clone *suc) int i; run_processes_parallel_tr2(suc->max_jobs, update_clone_get_next_task, - update_clone_start_failure, NULL, + update_clone_start_failure, NULL, NULL, update_clone_task_finished, suc, "submodule", "parallel/update"); diff --git a/hook.c b/hook.c index 44e73827800813..9ae3007cdd336e 100644 --- a/hook.c +++ b/hook.c @@ -203,6 +203,7 @@ int run_hooks(const char *hook_name, const char *hook_path, pick_next_hook, notify_start_failure, options->feed_pipe, + NULL, notify_hook_finished, &cb_data, "hook", diff --git a/run-command.c b/run-command.c index f1616858d18eaa..aacc336f9519cd 100644 --- a/run-command.c +++ b/run-command.c @@ -1493,6 +1493,7 @@ struct parallel_processes { get_next_task_fn get_next_task; start_failure_fn start_failure; feed_pipe_fn feed_pipe; + consume_sideband_fn consume_sideband; task_finished_fn task_finished; struct { @@ -1558,6 +1559,7 @@ static void pp_init(struct parallel_processes *pp, get_next_task_fn get_next_task, start_failure_fn start_failure, feed_pipe_fn feed_pipe, + consume_sideband_fn consume_sideband, task_finished_fn task_finished, void *data) { @@ -1578,6 +1580,7 @@ static void pp_init(struct parallel_processes *pp, pp->start_failure = start_failure ? start_failure : default_start_failure; pp->feed_pipe = feed_pipe ? feed_pipe : default_feed_pipe; pp->task_finished = task_finished ? task_finished : default_task_finished; + pp->consume_sideband = consume_sideband; pp->nr_processes = 0; pp->output_owner = 0; @@ -1614,7 +1617,10 @@ static void pp_cleanup(struct parallel_processes *pp) * When get_next_task added messages to the buffer in its last * iteration, the buffered output is non empty. */ - strbuf_write(&pp->buffered_output, stderr); + if (pp->consume_sideband) + pp->consume_sideband(&pp->buffered_output, pp->data); + else + strbuf_write(&pp->buffered_output, stderr); strbuf_release(&pp->buffered_output); sigchain_pop_common(); @@ -1735,9 +1741,13 @@ static void pp_buffer_stderr(struct parallel_processes *pp, int output_timeout) static void pp_output(struct parallel_processes *pp) { int i = pp->output_owner; + if (pp->children[i].state == GIT_CP_WORKING && pp->children[i].err.len) { - strbuf_write(&pp->children[i].err, stderr); + if (pp->consume_sideband) + pp->consume_sideband(&pp->children[i].err, pp->data); + else + strbuf_write(&pp->children[i].err, stderr); strbuf_reset(&pp->children[i].err); } } @@ -1776,11 +1786,15 @@ static int pp_collect_finished(struct parallel_processes *pp) strbuf_addbuf(&pp->buffered_output, &pp->children[i].err); strbuf_reset(&pp->children[i].err); } else { - strbuf_write(&pp->children[i].err, stderr); + /* Output errors, then all other finished child processes */ + if (pp->consume_sideband) { + pp->consume_sideband(&pp->children[i].err, pp->data); + pp->consume_sideband(&pp->buffered_output, pp->data); + } else { + strbuf_write(&pp->children[i].err, stderr); + strbuf_write(&pp->buffered_output, stderr); + } strbuf_reset(&pp->children[i].err); - - /* Output all other finished child processes */ - strbuf_write(&pp->buffered_output, stderr); strbuf_reset(&pp->buffered_output); /* @@ -1804,6 +1818,7 @@ int run_processes_parallel(int n, get_next_task_fn get_next_task, start_failure_fn start_failure, feed_pipe_fn feed_pipe, + consume_sideband_fn consume_sideband, task_finished_fn task_finished, void *pp_cb) { @@ -1814,7 +1829,7 @@ int run_processes_parallel(int n, sigchain_push(SIGPIPE, SIG_IGN); - pp_init(&pp, n, get_next_task, start_failure, feed_pipe, task_finished, pp_cb); + pp_init(&pp, n, get_next_task, start_failure, feed_pipe, consume_sideband, task_finished, pp_cb); while (1) { for (i = 0; i < spawn_cap && !pp.shutdown && @@ -1852,6 +1867,7 @@ int run_processes_parallel(int n, int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task, start_failure_fn start_failure, feed_pipe_fn feed_pipe, + consume_sideband_fn consume_sideband, task_finished_fn task_finished, void *pp_cb, const char *tr2_category, const char *tr2_label) { @@ -1861,7 +1877,8 @@ int run_processes_parallel_tr2(int n, get_next_task_fn get_next_task, ((n < 1) ? online_cpus() : n)); result = run_processes_parallel(n, get_next_task, start_failure, - feed_pipe, task_finished, pp_cb); + feed_pipe, consume_sideband, + task_finished, pp_cb); trace2_region_leave(tr2_category, tr2_label, NULL); diff --git a/run-command.h b/run-command.h index 80d394664aec2a..e321d23bbd2d8b 100644 --- a/run-command.h +++ b/run-command.h @@ -436,6 +436,20 @@ typedef int (*feed_pipe_fn)(struct strbuf *pipe, void *pp_cb, void *pp_task_cb); +/** + * If this callback is provided, instead of collating process output to stderr, + * they will be collated into a new pipe. consume_sideband_fn will be called + * repeatedly. When output is available on that pipe, it will be contained in + * 'output'. But it will be called with an empty 'output' too, to allow for + * keepalives or similar operations if necessary. + * + * pp_cb is the callback cookie as passed into run_processes_parallel. + * + * Since this callback is provided with the collated output, no task cookie is + * provided. + */ +typedef void (*consume_sideband_fn)(struct strbuf *output, void *pp_cb); + /** * This callback is called on every child process that finished processing. * @@ -471,10 +485,12 @@ int run_processes_parallel(int n, get_next_task_fn, start_failure_fn, feed_pipe_fn, + consume_sideband_fn, task_finished_fn, void *pp_cb); int run_processes_parallel_tr2(int n, get_next_task_fn, start_failure_fn, - feed_pipe_fn, task_finished_fn, void *pp_cb, + feed_pipe_fn, consume_sideband_fn, + task_finished_fn, void *pp_cb, const char *tr2_category, const char *tr2_label); /** diff --git a/submodule.c b/submodule.c index db1700a502dcd2..32364d8bd56312 100644 --- a/submodule.c +++ b/submodule.c @@ -1632,7 +1632,7 @@ int fetch_populated_submodules(struct repository *r, run_processes_parallel_tr2(max_parallel_jobs, get_next_submodule, fetch_start_failure, - NULL, + NULL, NULL, fetch_finish, &spf, "submodule", "parallel/fetch"); diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index 9348184d303eab..d53db6d11c4dad 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -51,6 +51,16 @@ static int no_job(struct child_process *cp, return 0; } +static void test_consume_sideband(struct strbuf *output, void *cb) +{ + FILE *sideband; + + sideband = fopen("./sideband", "a"); + + strbuf_write(output, sideband); + fclose(sideband); +} + static int task_finished(int result, struct strbuf *err, void *pp_cb, @@ -201,7 +211,7 @@ static int testsuite(int argc, const char **argv) suite.tests.nr, max_jobs); ret = run_processes_parallel(max_jobs, next_test, test_failed, - test_stdin, test_finished, &suite); + test_stdin, NULL, test_finished, &suite); if (suite.failed.nr > 0) { ret = 1; @@ -429,23 +439,28 @@ int cmd__run_command(int argc, const char **argv) if (!strcmp(argv[1], "run-command-parallel")) exit(run_processes_parallel(jobs, parallel_next, - NULL, NULL, NULL, &proc)); + NULL, NULL, NULL, NULL, &proc)); if (!strcmp(argv[1], "run-command-abort")) exit(run_processes_parallel(jobs, parallel_next, - NULL, NULL, task_finished, &proc)); + NULL, NULL, NULL, task_finished, &proc)); if (!strcmp(argv[1], "run-command-no-jobs")) exit(run_processes_parallel(jobs, no_job, - NULL, NULL, task_finished, &proc)); + NULL, NULL, NULL, task_finished, &proc)); if (!strcmp(argv[1], "run-command-stdin")) { proc.in = -1; proc.no_stdin = 0; exit (run_processes_parallel(jobs, parallel_next, NULL, - test_stdin, NULL, &proc)); + test_stdin, NULL, NULL, &proc)); } + if (!strcmp(argv[1], "run-command-sideband")) + exit(run_processes_parallel(jobs, parallel_next, NULL, NULL, + test_consume_sideband, NULL, + &proc)); + fprintf(stderr, "check usage\n"); return 1; } diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh index 87759482ad111e..e99f6c7f445805 100755 --- a/t/t0061-run-command.sh +++ b/t/t0061-run-command.sh @@ -143,6 +143,13 @@ test_expect_success 'run_command runs in parallel with more tasks than jobs avai test_cmp expect actual ' +test_expect_success 'run_command can divert output' ' + test_when_finished rm sideband && + test-tool run-command run-command-sideband 3 sh -c "printf \"%s\n%s\n\" Hello World" 2>actual && + test_must_be_empty actual && + test_cmp expect sideband +' + cat >expect <<-EOF preloaded output of a child listening for stdin: From 9ca517b509914c504fb04741edb103b4b9186f1d Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:58 +0200 Subject: [PATCH 159/198] hooks: allow callers to capture output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some server-side hooks will require capturing output to send over sideband instead of printing directly to stderr. Expose that capability. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- hook.c | 3 ++- hook.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hook.c b/hook.c index 9ae3007cdd336e..efed5f73b4d1fb 100644 --- a/hook.c +++ b/hook.c @@ -203,12 +203,13 @@ int run_hooks(const char *hook_name, const char *hook_path, pick_next_hook, notify_start_failure, options->feed_pipe, - NULL, + options->consume_sideband, notify_hook_finished, &cb_data, "hook", hook_name); + if (options->absolute_path) strbuf_release(&abs_path); free(my_hook.feed_pipe_cb_data); diff --git a/hook.h b/hook.h index b55f283f90bce9..37a9690c2ca368 100644 --- a/hook.h +++ b/hook.h @@ -58,6 +58,14 @@ struct run_hooks_opt */ feed_pipe_fn feed_pipe; void *feed_pipe_ctx; + + /* + * Populate this to capture output and prevent it from being printed to + * stderr. This will be passed directly through to + * run_command:run_parallel_processes(). See t/helper/test-run-command.c + * for an example. + */ + consume_sideband_fn consume_sideband; }; #define RUN_HOOKS_OPT_INIT { \ From 6ca3e24b8e923b4484586051088350b9f3b67c2d Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:38:59 +0200 Subject: [PATCH 160/198] receive-pack: convert 'update' hook to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes use of the new sideband API in hook.h added in the preceding commit. Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/receive-pack.c | 62 ++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a7d03bbc7d386d..31ce4ece4e76ba 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -937,33 +937,53 @@ static int run_receive_hook(struct command *commands, return status; } -static int run_update_hook(struct command *cmd) +static void hook_output_to_sideband(struct strbuf *output, void *cb_data) { - const char *argv[5]; - struct child_process proc = CHILD_PROCESS_INIT; - int code; + int keepalive_active = 0; - argv[0] = find_hook("update"); - if (!argv[0]) - return 0; + if (keepalive_in_sec <= 0) + use_keepalive = KEEPALIVE_NEVER; + if (use_keepalive == KEEPALIVE_ALWAYS) + keepalive_active = 1; - argv[1] = cmd->ref_name; - argv[2] = oid_to_hex(&cmd->old_oid); - argv[3] = oid_to_hex(&cmd->new_oid); - argv[4] = NULL; + /* send a keepalive if there is no data to write */ + if (keepalive_active && !output->len) { + static const char buf[] = "0005\1"; + write_or_die(1, buf, sizeof(buf) - 1); + return; + } - proc.no_stdin = 1; - proc.stdout_to_stderr = 1; - proc.err = use_sideband ? -1 : 0; - proc.argv = argv; - proc.trace2_hook_name = "update"; + if (use_keepalive == KEEPALIVE_AFTER_NUL && !keepalive_active) { + const char *first_null = memchr(output->buf, '\0', output->len); + if (first_null) { + /* The null bit is excluded. */ + size_t before_null = first_null - output->buf; + size_t after_null = output->len - (before_null + 1); + keepalive_active = 1; + send_sideband(1, 2, output->buf, before_null, use_sideband); + send_sideband(1, 2, first_null + 1, after_null, use_sideband); + + return; + } + } + + send_sideband(1, 2, output->buf, output->len, use_sideband); +} + +static int run_update_hook(struct command *cmd) +{ + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + + strvec_pushl(&opt.args, + cmd->ref_name, + oid_to_hex(&cmd->old_oid), + oid_to_hex(&cmd->new_oid), + NULL); - code = start_command(&proc); - if (code) - return code; if (use_sideband) - copy_to_sideband(proc.err, -1, NULL); - return finish_command(&proc); + opt.consume_sideband = hook_output_to_sideband; + + return run_hooks_oneshot("update", &opt); } static struct command *find_command_by_refname(struct command *list, From 908ed9a5db4da3ea01bae95e74fd0450915321e9 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:39:00 +0200 Subject: [PATCH 161/198] post-update: use hook.h library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/receive-pack.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 31ce4ece4e76ba..26e302aab85e37 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1650,33 +1650,20 @@ static const char *update(struct command *cmd, struct shallow_info *si) static void run_update_post_hook(struct command *commands) { struct command *cmd; - struct child_process proc = CHILD_PROCESS_INIT; - const char *hook; - - hook = find_hook("post-update"); - if (!hook) - return; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; for (cmd = commands; cmd; cmd = cmd->next) { if (cmd->error_string || cmd->did_not_exist) continue; - if (!proc.args.nr) - strvec_push(&proc.args, hook); - strvec_push(&proc.args, cmd->ref_name); + strvec_push(&opt.args, cmd->ref_name); } - if (!proc.args.nr) + if (!opt.args.nr) return; - proc.no_stdin = 1; - proc.stdout_to_stderr = 1; - proc.err = use_sideband ? -1 : 0; - proc.trace2_hook_name = "post-update"; + if (use_sideband) + opt.consume_sideband = hook_output_to_sideband; - if (!start_command(&proc)) { - if (use_sideband) - copy_to_sideband(proc.err, -1, NULL); - finish_command(&proc); - } + run_hooks_oneshot("post-update", &opt); } static void check_aliased_update_internal(struct command *cmd, From 4e5f31ba423c11ef17d8c51e3421684bd88666b8 Mon Sep 17 00:00:00 2001 From: Emily Shaffer <emilyshaffer@google.com> Date: Tue, 3 Aug 2021 21:39:01 +0200 Subject: [PATCH 162/198] receive-pack: convert receive hooks to hook.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Emily Shaffer <emilyshaffer@google.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/receive-pack.c | 190 ++++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 107 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 26e302aab85e37..c3984680d7f2bd 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -747,7 +747,7 @@ static int check_cert_push_options(const struct string_list *push_options) return retval; } -static void prepare_push_cert_sha1(struct child_process *proc) +static void prepare_push_cert_sha1(struct run_hooks_opt *opt) { static int already_done; @@ -771,110 +771,42 @@ static void prepare_push_cert_sha1(struct child_process *proc) nonce_status = check_nonce(push_cert.buf, bogs); } if (!is_null_oid(&push_cert_oid)) { - strvec_pushf(&proc->env_array, "GIT_PUSH_CERT=%s", + strvec_pushf(&opt->env, "GIT_PUSH_CERT=%s", oid_to_hex(&push_cert_oid)); - strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_SIGNER=%s", + strvec_pushf(&opt->env, "GIT_PUSH_CERT_SIGNER=%s", sigcheck.signer ? sigcheck.signer : ""); - strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_KEY=%s", + strvec_pushf(&opt->env, "GIT_PUSH_CERT_KEY=%s", sigcheck.key ? sigcheck.key : ""); - strvec_pushf(&proc->env_array, "GIT_PUSH_CERT_STATUS=%c", + strvec_pushf(&opt->env, "GIT_PUSH_CERT_STATUS=%c", sigcheck.result); if (push_cert_nonce) { - strvec_pushf(&proc->env_array, + strvec_pushf(&opt->env, "GIT_PUSH_CERT_NONCE=%s", push_cert_nonce); - strvec_pushf(&proc->env_array, + strvec_pushf(&opt->env, "GIT_PUSH_CERT_NONCE_STATUS=%s", nonce_status); if (nonce_status == NONCE_SLOP) - strvec_pushf(&proc->env_array, + strvec_pushf(&opt->env, "GIT_PUSH_CERT_NONCE_SLOP=%ld", nonce_stamp_slop); } } } +struct receive_hook_feed_context { + struct command *cmd; + int skip_broken; +}; + struct receive_hook_feed_state { struct command *cmd; struct ref_push_report *report; int skip_broken; struct strbuf buf; - const struct string_list *push_options; }; -typedef int (*feed_fn)(void *, const char **, size_t *); -static int run_and_feed_hook(const char *hook_name, feed_fn feed, - struct receive_hook_feed_state *feed_state) -{ - struct child_process proc = CHILD_PROCESS_INIT; - struct async muxer; - const char *argv[2]; - int code; - - argv[0] = find_hook(hook_name); - if (!argv[0]) - return 0; - - argv[1] = NULL; - - proc.argv = argv; - proc.in = -1; - proc.stdout_to_stderr = 1; - proc.trace2_hook_name = hook_name; - - if (feed_state->push_options) { - int i; - for (i = 0; i < feed_state->push_options->nr; i++) - strvec_pushf(&proc.env_array, - "GIT_PUSH_OPTION_%d=%s", i, - feed_state->push_options->items[i].string); - strvec_pushf(&proc.env_array, "GIT_PUSH_OPTION_COUNT=%d", - feed_state->push_options->nr); - } else - strvec_pushf(&proc.env_array, "GIT_PUSH_OPTION_COUNT"); - - if (tmp_objdir) - strvec_pushv(&proc.env_array, tmp_objdir_env(tmp_objdir)); - - if (use_sideband) { - memset(&muxer, 0, sizeof(muxer)); - muxer.proc = copy_to_sideband; - muxer.in = -1; - code = start_async(&muxer); - if (code) - return code; - proc.err = muxer.in; - } - - prepare_push_cert_sha1(&proc); - - code = start_command(&proc); - if (code) { - if (use_sideband) - finish_async(&muxer); - return code; - } - - sigchain_push(SIGPIPE, SIG_IGN); - - while (1) { - const char *buf; - size_t n; - if (feed(feed_state, &buf, &n)) - break; - if (write_in_full(proc.in, buf, n) < 0) - break; - } - close(proc.in); - if (use_sideband) - finish_async(&muxer); - - sigchain_pop(SIGPIPE); - - return finish_command(&proc); -} - -static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep) +static int feed_receive_hook(void *state_) { struct receive_hook_feed_state *state = state_; struct command *cmd = state->cmd; @@ -883,9 +815,7 @@ static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep) state->skip_broken && (cmd->error_string || cmd->did_not_exist)) cmd = cmd->next; if (!cmd) - return -1; /* EOF */ - if (!bufp) - return 0; /* OK, can feed something. */ + return 1; /* EOF - close the pipe*/ strbuf_reset(&state->buf); if (!state->report) state->report = cmd->report; @@ -909,32 +839,36 @@ static int feed_receive_hook(void *state_, const char **bufp, size_t *sizep) cmd->ref_name); state->cmd = cmd->next; } - if (bufp) { - *bufp = state->buf.buf; - *sizep = state->buf.len; - } return 0; } -static int run_receive_hook(struct command *commands, - const char *hook_name, - int skip_broken, - const struct string_list *push_options) +static int feed_receive_hook_cb(struct strbuf *pipe, void *pp_cb, void *pp_task_cb) { - struct receive_hook_feed_state state; - int status; - - strbuf_init(&state.buf, 0); - state.cmd = commands; - state.skip_broken = skip_broken; - state.report = NULL; - if (feed_receive_hook(&state, NULL, NULL)) - return 0; - state.cmd = commands; - state.push_options = push_options; - status = run_and_feed_hook(hook_name, feed_receive_hook, &state); - strbuf_release(&state.buf); - return status; + struct hook *hook = pp_task_cb; + struct receive_hook_feed_state *feed_state = hook->feed_pipe_cb_data; + int rc; + + /* first-time setup */ + if (!feed_state) { + struct hook_cb_data *hook_cb = pp_cb; + struct run_hooks_opt *opt = hook_cb->options; + struct receive_hook_feed_context *ctx = opt->feed_pipe_ctx; + if (!ctx) + BUG("run_hooks_opt.feed_pipe_ctx required for receive hook"); + + feed_state = xmalloc(sizeof(struct receive_hook_feed_state)); + strbuf_init(&feed_state->buf, 0); + feed_state->cmd = ctx->cmd; + feed_state->skip_broken = ctx->skip_broken; + feed_state->report = NULL; + + hook->feed_pipe_cb_data = feed_state; + } + + rc = feed_receive_hook(feed_state); + if (!rc) + strbuf_addbuf(pipe, &feed_state->buf); + return rc; } static void hook_output_to_sideband(struct strbuf *output, void *cb_data) @@ -970,6 +904,48 @@ static void hook_output_to_sideband(struct strbuf *output, void *cb_data) send_sideband(1, 2, output->buf, output->len, use_sideband); } +static int run_receive_hook(struct command *commands, + const char *hook_name, + int skip_broken, + const struct string_list *push_options) +{ + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct receive_hook_feed_context ctx; + struct command *iter = commands; + + /* if there are no valid commands, don't invoke the hook at all. */ + while (iter && skip_broken && (iter->error_string || iter->did_not_exist)) + iter = iter->next; + if (!iter) + return 0; + + if (push_options) { + int i; + for (i = 0; i < push_options->nr; i++) + strvec_pushf(&opt.env, "GIT_PUSH_OPTION_%d=%s", i, + push_options->items[i].string); + strvec_pushf(&opt.env, "GIT_PUSH_OPTION_COUNT=%d", push_options->nr); + } else + strvec_push(&opt.env, "GIT_PUSH_OPTION_COUNT"); + + if (tmp_objdir) + strvec_pushv(&opt.env, tmp_objdir_env(tmp_objdir)); + + prepare_push_cert_sha1(&opt); + + /* set up sideband printer */ + if (use_sideband) + opt.consume_sideband = hook_output_to_sideband; + + /* set up stdin callback */ + ctx.cmd = commands; + ctx.skip_broken = skip_broken; + opt.feed_pipe = feed_receive_hook_cb; + opt.feed_pipe_ctx = &ctx; + + return run_hooks_oneshot(hook_name, &opt); +} + static int run_update_hook(struct command *cmd) { struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; From 76f440d4f743433796e06a921ee68273edf17c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Tue, 3 Aug 2021 21:39:02 +0200 Subject: [PATCH 163/198] hooks: fix a TOCTOU in "did we run a hook?" heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a Time-of-check to time-of-use (TOCTOU) race in code added in 680ee550d72 (commit: skip discarding the index if there is no pre-commit hook, 2017-08-14). We can fix the race passing around information about whether or not we ran the hook in question, instead of running hook_exists() after the fact to check if the hook in question exists. This problem has been noted on-list when 680ee550d72 was discussed[1], but had not been fixed. In addition to fixing this for the pre-commit hook as suggested there I'm also fixing this for the pre-merge-commit hook. See 6098817fd7f (git-merge: honor pre-merge-commit hook, 2019-08-07) for the introduction of its previous behavior. Let's also change this for the push-to-checkout hook. Now instead of checking if the hook exists and either doing a push to checkout or a push to deploy we'll always attempt a push to checkout. If the hook doesn't exist we'll fall back on push to deploy. The same behavior as before, without the TOCTOU race. See 0855331941b (receive-pack: support push-to-checkout hook, 2014-12-01) for the introduction of the previous behavior. This leaves uses of hook_exists() in two places that matter. The "reference-transaction" check in refs.c, see 67541597670 (refs: implement reference transaction hook, 2020-06-19), and the prepare-commit-msg hook, see 66618a50f9c (sequencer: run 'prepare-commit-msg' hook, 2018-01-24). In both of those cases we're saving ourselves CPU time by not preparing data for the hook that we'll then do nothing with if we don't have the hook. So using this "invoked_hook" pattern doesn't make sense in those cases. More importantly, in those cases the worst we'll do is miss that we "should" run the hook because a new hook appeared, whereas in the pre-commit and pre-merge-commit cases we'll skip an important discard_cache() on the bases of our faulty guess. I do think none of these races really matter in practice. It would be some one-off issue as a hook was added or removed. I did think it was stupid that we didn't pass a "did this run?" flag instead of doing this guessing at a distance though, so now we're not guessing anymore. 1. https://lore.kernel.org/git/20170810191613.kpmhzg4seyxy3cpq@sigill.intra.peff.net/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/commit.c | 18 +++++++++++------- builtin/merge.c | 16 ++++++++++------ builtin/receive-pack.c | 8 +++++--- commit.c | 1 + commit.h | 3 ++- hook.c | 4 ++++ hook.h | 10 ++++++++++ sequencer.c | 4 ++-- 8 files changed, 45 insertions(+), 19 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index aa3c741efa9d37..0b7642e8324c98 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -725,11 +725,13 @@ static int prepare_to_commit(const char *index_file, const char *prefix, int clean_message_contents = (cleanup_mode != COMMIT_MSG_CLEANUP_NONE); int old_display_comment_prefix; int merge_contains_scissors = 0; + int invoked_hook = 0; /* This checks and barfs if author is badly specified */ determine_author_info(author_ident); - if (!no_verify && run_commit_hook(use_editor, index_file, "pre-commit", NULL)) + if (!no_verify && run_commit_hook(use_editor, index_file, &invoked_hook, + "pre-commit", NULL)) return 0; if (squash_message) { @@ -1052,10 +1054,10 @@ static int prepare_to_commit(const char *index_file, const char *prefix, return 0; } - if (!no_verify && hook_exists("pre-commit")) { + if (!no_verify && invoked_hook) { /* - * Re-read the index as pre-commit hook could have updated it, - * and write it out as a tree. We must do this before we invoke + * Re-read the index as the pre-commit-commit hook was invoked + * and could have updated it. We must do this before we invoke * the editor and after we invoke run_status above. */ discard_cache(); @@ -1067,7 +1069,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix, return 0; } - if (run_commit_hook(use_editor, index_file, "prepare-commit-msg", + if (run_commit_hook(use_editor, index_file, NULL, "prepare-commit-msg", git_path_commit_editmsg(), hook_arg1, hook_arg2, NULL)) return 0; @@ -1084,7 +1086,8 @@ static int prepare_to_commit(const char *index_file, const char *prefix, } if (!no_verify && - run_commit_hook(use_editor, index_file, "commit-msg", git_path_commit_editmsg(), NULL)) { + run_commit_hook(use_editor, index_file, NULL, "commit-msg", + git_path_commit_editmsg(), NULL)) { return 0; } @@ -1840,7 +1843,8 @@ int cmd_commit(int argc, const char **argv, const char *prefix) repo_rerere(the_repository, 0); run_auto_maintenance(quiet); - run_commit_hook(use_editor, get_index_file(), "post-commit", NULL); + run_commit_hook(use_editor, get_index_file(), NULL, "post-commit", + NULL); if (amend && !no_post_rewrite) { commit_post_rewrite(the_repository, current_head, &oid); } diff --git a/builtin/merge.c b/builtin/merge.c index 4965df2ac29f8d..9bd4a2532c315a 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -843,15 +843,18 @@ static void prepare_to_commit(struct commit_list *remoteheads) { struct strbuf msg = STRBUF_INIT; const char *index_file = get_index_file(); + int invoked_hook = 0; - if (!no_verify && run_commit_hook(0 < option_edit, index_file, "pre-merge-commit", NULL)) + if (!no_verify && run_commit_hook(0 < option_edit, index_file, + &invoked_hook, "pre-merge-commit", + NULL)) abort_commit(remoteheads, NULL); /* - * Re-read the index as pre-merge-commit hook could have updated it, - * and write it out as a tree. We must do this before we invoke + * Re-read the index as the pre-merge-commit hook was invoked + * and could have updated it. We must do this before we invoke * the editor and after we invoke run_status above. */ - if (hook_exists("pre-merge-commit")) + if (invoked_hook) discard_cache(); read_cache_from(index_file); strbuf_addbuf(&msg, &merge_msg); @@ -872,7 +875,8 @@ static void prepare_to_commit(struct commit_list *remoteheads) append_signoff(&msg, ignore_non_trailer(msg.buf, msg.len), 0); write_merge_heads(remoteheads); write_file_buf(git_path_merge_msg(the_repository), msg.buf, msg.len); - if (run_commit_hook(0 < option_edit, get_index_file(), "prepare-commit-msg", + if (run_commit_hook(0 < option_edit, get_index_file(), NULL, + "prepare-commit-msg", git_path_merge_msg(the_repository), "merge", NULL)) abort_commit(remoteheads, NULL); if (0 < option_edit) { @@ -881,7 +885,7 @@ static void prepare_to_commit(struct commit_list *remoteheads) } if (!no_verify && run_commit_hook(0 < option_edit, get_index_file(), - "commit-msg", + NULL, "commit-msg", git_path_merge_msg(the_repository), NULL)) abort_commit(remoteheads, NULL); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c3984680d7f2bd..ebec6f3bb10c05 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1428,10 +1428,12 @@ static const char *push_to_deploy(unsigned char *sha1, static const char *push_to_checkout_hook = "push-to-checkout"; static const char *push_to_checkout(unsigned char *hash, + int *invoked_hook, struct strvec *env, const char *work_tree) { struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + opt.invoked_hook = invoked_hook; strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree)); strvec_pushv(&opt.env, env->v); @@ -1446,6 +1448,7 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w { const char *retval, *work_tree, *git_dir = NULL; struct strvec env = STRVEC_INIT; + int invoked_hook = 0; if (worktree && worktree->path) work_tree = worktree->path; @@ -1463,10 +1466,9 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w strvec_pushf(&env, "GIT_DIR=%s", absolute_path(git_dir)); - if (!hook_exists(push_to_checkout_hook)) + retval = push_to_checkout(sha1, &invoked_hook, &env, work_tree); + if (!invoked_hook) retval = push_to_deploy(sha1, &env, work_tree); - else - retval = push_to_checkout(sha1, &env, work_tree); strvec_clear(&env); return retval; diff --git a/commit.c b/commit.c index 63d7943a86dd51..842e47beae214e 100644 --- a/commit.c +++ b/commit.c @@ -1697,6 +1697,7 @@ size_t ignore_non_trailer(const char *buf, size_t len) } int run_commit_hook(int editor_is_used, const char *index_file, + int *invoked_hook, const char *name, ...) { struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; diff --git a/commit.h b/commit.h index df42eb434f314b..b5a542993c63af 100644 --- a/commit.h +++ b/commit.h @@ -363,7 +363,8 @@ int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused); LAST_ARG_MUST_BE_NULL -int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...); +int run_commit_hook(int editor_is_used, const char *index_file, + int *invoked_hook, const char *name, ...); /* Sign a commit or tag buffer, storing the result in a header. */ int sign_with_header(struct strbuf *buf, const char *keyid); diff --git a/hook.c b/hook.c index efed5f73b4d1fb..ee20b2e3658753 100644 --- a/hook.c +++ b/hook.c @@ -173,6 +173,9 @@ static int notify_hook_finished(int result, hook_cb->rc |= result; + if (hook_cb->invoked_hook) + *hook_cb->invoked_hook = 1; + return 0; } @@ -187,6 +190,7 @@ int run_hooks(const char *hook_name, const char *hook_path, .rc = 0, .hook_name = hook_name, .options = options, + .invoked_hook = options->invoked_hook, }; int jobs = 1; diff --git a/hook.h b/hook.h index 37a9690c2ca368..58dfbf474c9c1a 100644 --- a/hook.h +++ b/hook.h @@ -66,6 +66,15 @@ struct run_hooks_opt * for an example. */ consume_sideband_fn consume_sideband; + + /* + * A pointer which if provided will be set to 1 or 0 depending + * on if a hook was invoked (i.e. existed), regardless of + * whether or not that was successful. Used for avoiding + * TOCTOU races in code that would otherwise call hook_exist() + * after a "maybe hook run" to see if a hook was invoked. + */ + int *invoked_hook; }; #define RUN_HOOKS_OPT_INIT { \ @@ -90,6 +99,7 @@ struct hook_cb_data { const char *hook_name; struct hook *run_me; struct run_hooks_opt *options; + int *invoked_hook; }; void run_hooks_opt_clear(struct run_hooks_opt *o); diff --git a/sequencer.c b/sequencer.c index 77f809c00e46e6..f451e23d0c120d 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1203,7 +1203,7 @@ static int run_prepare_commit_msg_hook(struct repository *r, } else { arg1 = "message"; } - if (run_commit_hook(0, r->index_file, "prepare-commit-msg", name, + if (run_commit_hook(0, r->index_file, NULL, "prepare-commit-msg", name, arg1, arg2, NULL)) ret = error(_("'prepare-commit-msg' hook failed")); @@ -1533,7 +1533,7 @@ static int try_to_commit(struct repository *r, goto out; } - run_commit_hook(0, r->index_file, "post-commit", NULL); + run_commit_hook(0, r->index_file, NULL, "post-commit", NULL); if (flags & AMEND_MSG) commit_post_rewrite(r, current_head, oid); From 74aafa37dd7f4636b187255703579662e88121ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20Walle?= <oystwa@gmail.com> Date: Mon, 2 Aug 2021 19:49:44 +0200 Subject: [PATCH 164/198] clone: Allow combining --bare and --origin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The constraint on passing both these options simultaneously has been present since long before clone was ported to C. At the time no configuration referencing the remote repository was written at all in bare clones. Since df61c88979 (clone: also configure url for bare clones, 2010-03-29) the remote repository is mentioned in the configuration file also for bare repos, so it makes sense to allow the user to rename it if they wish. Signed-off-by: Øystein Walle <oystwa@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/clone.c | 3 --- t/t5606-clone-options.sh | 9 +++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 66fe66679c8498..70ec72ea854508 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1014,9 +1014,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) option_bare = 1; if (option_bare) { - if (option_origin) - die(_("--bare and --origin %s options are incompatible."), - option_origin); if (real_git_dir) die(_("--bare and --separate-git-dir are incompatible.")); option_no_checkout = 1; diff --git a/t/t5606-clone-options.sh b/t/t5606-clone-options.sh index 3a595c0f82c704..fd35b6d06b3420 100755 --- a/t/t5606-clone-options.sh +++ b/t/t5606-clone-options.sh @@ -30,11 +30,12 @@ test_expect_success 'rejects invalid -o/--origin' ' ' -test_expect_success 'disallows --bare with --origin' ' +test_expect_success '--bare works with -o/--origin' ' - test_must_fail git clone -o foo --bare parent clone-bare-o 2>err && - test_debug "cat err" && - test_i18ngrep -e "--bare and --origin foo options are incompatible" err + git clone --bare --origin=somewhere parent clone-bare-o && + url="$(git -C clone-bare-o config --local remote.somewhere.url)" && + test -n "$url" && + test_must_fail git -C clone-bare-o config --local remote.origin.url ' From 4849f5ddf19b0a4d0c9583444518258b9022f138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 12:37:24 +0200 Subject: [PATCH 165/198] test-lib tests: get rid of copy/pasted mock test code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we've split up the write_sub_test_lib_test*() and run_sub_test_lib_test*() functions let's fix those tests in t0000-basic.sh that were verbosely copy/pasting earlier tests. In a subsequent commit we'll add an assertion to check whether we caught all of the copy/pasting. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0000-basic.sh | 228 +++++++++++++---------------------------------- 1 file changed, 60 insertions(+), 168 deletions(-) diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 6fdd5f43caecc3..a0b99d8334965d 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -226,18 +226,13 @@ test_expect_success 'subtest: --verbose option' ' ' test_expect_success 'subtest: --verbose-only option' ' - write_and_run_sub_test_lib_test_err \ - t2345-verbose-only-2 \ - --verbose-only=2 <<-\EOF && - test_expect_success "passing test" true - test_expect_success "test with output" "echo foo" - test_expect_success "failing test" false - test_done - EOF - check_sub_test_lib_test t2345-verbose-only-2 <<-\EOF + run_sub_test_lib_test_err \ + t1234-verbose \ + --verbose-only=2 && + check_sub_test_lib_test t1234-verbose <<-\EOF > ok 1 - passing test > Z - > expecting success of 2345.2 '\''test with output'\'': echo foo + > expecting success of 1234.2 '\''test with output'\'': echo foo > foo > ok 2 - test with output > Z @@ -250,15 +245,9 @@ test_expect_success 'subtest: --verbose-only option' ' test_expect_success 'subtest: skip one with GIT_SKIP_TESTS' ' ( - write_and_run_sub_test_lib_test git-skip-tests-basic \ - --skip="git.2" <<-\EOF && - for i in 1 2 3 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test git-skip-tests-basic <<-\EOF + run_sub_test_lib_test full-pass \ + --skip="full.2" && + check_sub_test_lib_test full-pass <<-\EOF > ok 1 - passing test #1 > ok 2 # skip passing test #2 (GIT_SKIP_TESTS) > ok 3 - passing test #3 @@ -293,15 +282,9 @@ test_expect_success 'subtest: skip several with GIT_SKIP_TESTS' ' test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' ' ( - write_and_run_sub_test_lib_test git-skip-tests-sh-pattern \ - --skip="git.[2-5]" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test git-skip-tests-sh-pattern <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --skip="git.[2-5]" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 # skip passing test #2 (GIT_SKIP_TESTS) > ok 3 # skip passing test #3 (GIT_SKIP_TESTS) @@ -316,15 +299,10 @@ test_expect_success 'subtest: sh pattern skipping with GIT_SKIP_TESTS' ' test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' ' ( - write_and_run_sub_test_lib_test git-skip-tests-entire-suite \ - --skip="git" <<-\EOF && - for i in 1 2 3 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test git-skip-tests-entire-suite <<-\EOF + GIT_SKIP_TESTS="git" && export GIT_SKIP_TESTS && + run_sub_test_lib_test git-skip-tests-several \ + --skip="git" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > 1..0 # SKIP skip all tests in git EOF ) @@ -332,15 +310,10 @@ test_expect_success 'subtest: skip entire test suite with GIT_SKIP_TESTS' ' test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' ' ( - write_and_run_sub_test_lib_test git-skip-tests-unmatched-suite \ - --skip="notgit" <<-\EOF && - for i in 1 2 3 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test git-skip-tests-unmatched-suite <<-\EOF + GIT_SKIP_TESTS="notgit" && export GIT_SKIP_TESTS && + run_sub_test_lib_test full-pass \ + --skip="notfull" && + check_sub_test_lib_test full-pass <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 - passing test #3 @@ -351,14 +324,8 @@ test_expect_success 'subtest: GIT_SKIP_TESTS does not skip unmatched suite' ' ' test_expect_success 'subtest: --run basic' ' - write_and_run_sub_test_lib_test run-basic --run="1,3,5" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-basic <<-\EOF + run_sub_test_lib_test git-skip-tests-several --run="1,3,5" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 # skip passing test #2 (--run) > ok 3 - passing test #3 @@ -371,15 +338,9 @@ test_expect_success 'subtest: --run basic' ' ' test_expect_success 'subtest: --run with a range' ' - write_and_run_sub_test_lib_test run-range \ - --run="1-3" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-range <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="1-3" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 - passing test #3 @@ -392,15 +353,9 @@ test_expect_success 'subtest: --run with a range' ' ' test_expect_success 'subtest: --run with two ranges' ' - write_and_run_sub_test_lib_test run-two-ranges \ - --run="1-2,5-6" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-two-ranges <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="1-2,5-6" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -413,15 +368,9 @@ test_expect_success 'subtest: --run with two ranges' ' ' test_expect_success 'subtest: --run with a left open range' ' - write_and_run_sub_test_lib_test run-left-open-range \ - --run="-3" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-left-open-range <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="-3" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 - passing test #3 @@ -434,15 +383,9 @@ test_expect_success 'subtest: --run with a left open range' ' ' test_expect_success 'subtest: --run with a right open range' ' - write_and_run_sub_test_lib_test run-right-open-range \ - --run="4-" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-right-open-range <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="4-" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 # skip passing test #1 (--run) > ok 2 # skip passing test #2 (--run) > ok 3 # skip passing test #3 (--run) @@ -455,15 +398,9 @@ test_expect_success 'subtest: --run with a right open range' ' ' test_expect_success 'subtest: --run with basic negation' ' - write_and_run_sub_test_lib_test run-basic-neg \ - --run="!3" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-basic-neg <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="!3" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -476,15 +413,9 @@ test_expect_success 'subtest: --run with basic negation' ' ' test_expect_success 'subtest: --run with two negations' ' - write_and_run_sub_test_lib_test run-two-neg \ - --run="!3,!6" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-two-neg <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="!3,!6" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -497,15 +428,9 @@ test_expect_success 'subtest: --run with two negations' ' ' test_expect_success 'subtest: --run a range and negation' ' - write_and_run_sub_test_lib_test run-range-and-neg \ - --run="-4,!2" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-range-and-neg <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="-4,!2" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 # skip passing test #2 (--run) > ok 3 - passing test #3 @@ -518,15 +443,9 @@ test_expect_success 'subtest: --run a range and negation' ' ' test_expect_success 'subtest: --run range negation' ' - write_and_run_sub_test_lib_test run-range-neg \ - --run="!1-3" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-range-neg <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="!1-3" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 # skip passing test #1 (--run) > ok 2 # skip passing test #2 (--run) > ok 3 # skip passing test #3 (--run) @@ -539,15 +458,9 @@ test_expect_success 'subtest: --run range negation' ' ' test_expect_success 'subtest: --run include, exclude and include' ' - write_and_run_sub_test_lib_test run-inc-neg-inc \ - --run="1-5,!1-3,2" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-inc-neg-inc <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="1-5,!1-3,2" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 # skip passing test #1 (--run) > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -560,15 +473,9 @@ test_expect_success 'subtest: --run include, exclude and include' ' ' test_expect_success 'subtest: --run include, exclude and include, comma separated' ' - write_and_run_sub_test_lib_test run-inc-neg-inc-comma \ - --run=1-5,!1-3,2 <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-inc-neg-inc-comma <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run=1-5,!1-3,2 && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 # skip passing test #1 (--run) > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -581,15 +488,9 @@ test_expect_success 'subtest: --run include, exclude and include, comma separate ' test_expect_success 'subtest: --run exclude and include' ' - write_and_run_sub_test_lib_test run-neg-inc \ - --run="!3-,5" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-neg-inc <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="!3-,5" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 - passing test #2 > ok 3 # skip passing test #3 (--run) @@ -602,15 +503,9 @@ test_expect_success 'subtest: --run exclude and include' ' ' test_expect_success 'subtest: --run empty selectors' ' - write_and_run_sub_test_lib_test run-empty-sel \ - --run="1,,3,,,5" <<-\EOF && - for i in 1 2 3 4 5 6 - do - test_expect_success "passing test #$i" "true" - done - test_done - EOF - check_sub_test_lib_test run-empty-sel <<-\EOF + run_sub_test_lib_test git-skip-tests-several \ + --run="1,,3,,,5" && + check_sub_test_lib_test git-skip-tests-several <<-\EOF > ok 1 - passing test #1 > ok 2 # skip passing test #2 (--run) > ok 3 - passing test #3 @@ -660,12 +555,9 @@ test_expect_success 'subtest: --run keyword selection' ' ' test_expect_success 'subtest: --run invalid range end' ' - write_and_run_sub_test_lib_test_err run-inv-range-end \ - --run="1-z" <<-\EOF && - test_expect_success "passing test #1" "true" - test_done - EOF - check_sub_test_lib_test_err run-inv-range-end \ + run_sub_test_lib_test_err run-inv-range-start \ + --run="1-z" && + check_sub_test_lib_test_err run-inv-range-start \ <<-\EOF_OUT 3<<-EOF_ERR > FATAL: Unexpected exit with code 1 EOF_OUT From 96a4473d78a9cfa224abe986f8d3275e10228e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 12:37:25 +0200 Subject: [PATCH 166/198] test-lib tests: assert no copy/pasted mock test code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the preceding commit we got rid of a bunch of copy/pasted test code from t0000-basic.sh in favor of re-using earlier already set up tests. Since copy/pasting the tests is likely to be something done by mistake in the future, let's add an assertion that detects whether this has happened. Now that we don't provide a unique test_description="" to these tests we can rely on the content written out being the same in this case. Let's use the object store to hash it, and attempt to create a "blob-<CONTENT_OID>" tag. If another test has already written out the same content we'll fail, and the tag envelope will provide an error message pointing us in the right direction.. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-subtest.sh | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index cdbc2d933206bb..6857afdaa5d45b 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -1,3 +1,34 @@ +_assert_unique_sub_test () { + name=$1 && + + # Alert about the copy/paste programming + hash=$(git hash-object -w "$name") && + cat >tag.sig <<-EOF && + object $hash + type blob + tag $hash + tagger . <> 0 +0000 + + duplicate script detected! + + This test script was already written as: + + $name + + You can just re-use its test code with your own + run_sub_test_lib_test*() + EOF + + tag=$(git mktag <tag.sig) && + if ! git update-ref refs/tags/blob-$hash $tag $(test_oid zero) 2>/dev/null + then + msg=$(git for-each-ref refs/tags/blob-$hash \ + --format='%(contents)' refs/tags/blob-$hash) + error "on write of $name: $msg" + return 1 + fi +} + write_sub_test_lib_test () { name="$1" # stdin is the body of the test code mkdir "$name" && @@ -7,7 +38,8 @@ write_sub_test_lib_test () { # Point to the t/test-lib.sh, which isn't in ../ as usual . "\$TEST_DIRECTORY"/test-lib.sh EOF - cat >>"$name/$name.sh" + cat >>"$name/$name.sh" && + _assert_unique_sub_test "$name/$name.sh" } _run_sub_test_lib_test_common () { From e52e72f6208eaf1dbde94cb1843af65c513ea64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 12:37:26 +0200 Subject: [PATCH 167/198] test-lib tests: avoid subshell for "test_cmp" for readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of a sub-shell for running the test_cmp of stdout/stderr for the test author was introduced in this form in 565b6fa87bb (tests: refactor mechanics of testing in a sub test-lib, 2012-12-16), but from looking at the history that seemed to have diligently copied my original ad-hoc implementation in 7b905119703 (t/t0000-basic.sh: Run the passing TODO test inside its own test-lib, 2010-08-19). There's no reason to use a subshell here, we try to avoid it in general. It also improves readability, if the test fails we print out the relative path in the trash directory that needs to be looked at. Before that was mostly obscured, since the "write_sub_test_lib_test" will pick the directory for you from the test name. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-subtest.sh | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 6857afdaa5d45b..529f3a4d08bc50 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -111,22 +111,16 @@ run_sub_test_lib_test_err () { check_sub_test_lib_test () { name="$1" # stdin is the expected output from the test - ( - cd "$name" && - test_must_be_empty err && - sed -e 's/^> //' -e 's/Z$//' >expect && - test_cmp expect out - ) + test_must_be_empty "$name"/err && + sed -e 's/^> //' -e 's/Z$//' >"$name"/expect && + test_cmp "$name/"expect "$name"/out } check_sub_test_lib_test_err () { name="$1" # stdin is the expected output from the test # expected error output is in descriptor 3 - ( - cd "$name" && - sed -e 's/^> //' -e 's/Z$//' >expect.out && - test_cmp expect.out out && - sed -e 's/^> //' -e 's/Z$//' <&3 >expect.err && - test_cmp expect.err err - ) + sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out && + test_cmp "$name"/expect.out "$name"/out && + sed -e 's/^> //' -e 's/Z$//' <&3 >"$name"/expect.err && + test_cmp "$name"/expect.err "$name"/err } From dda14ce6f6a22f263ceab94fd6c3a2f59a667734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 12:37:27 +0200 Subject: [PATCH 168/198] test-lib tests: refactor common part of check_sub_test_lib_test*() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the two check_sub_test_lib_test*() functions to avoid duplicating the same comparison they did of stdout. This duplication was initially added when check_sub_test_lib_test_err() was added in 0445e6f0a12 (test-lib: '--run' to run only specific tests, 2014-04-30). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-subtest.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 529f3a4d08bc50..0b9ecdb6dbf438 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -109,18 +109,22 @@ run_sub_test_lib_test_err () { _run_sub_test_lib_test_common '!' "$@" } +_check_sub_test_lib_test_common () { + name="$1" && + sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out && + test_cmp "$name"/expect.out "$name"/out +} + check_sub_test_lib_test () { name="$1" # stdin is the expected output from the test - test_must_be_empty "$name"/err && - sed -e 's/^> //' -e 's/Z$//' >"$name"/expect && - test_cmp "$name/"expect "$name"/out + _check_sub_test_lib_test_common "$name" && + test_must_be_empty "$name"/err } check_sub_test_lib_test_err () { name="$1" # stdin is the expected output from the test + _check_sub_test_lib_test_common "$name" && # expected error output is in descriptor 3 - sed -e 's/^> //' -e 's/Z$//' >"$name"/expect.out && - test_cmp "$name"/expect.out "$name"/out && sed -e 's/^> //' -e 's/Z$//' <&3 >"$name"/expect.err && test_cmp "$name"/expect.err "$name"/err } From 8161050e923bbeca8a47372064c6dffa9022b529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 12:37:28 +0200 Subject: [PATCH 169/198] test-lib tests: assert 1 exit code, not non-zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the testing for test-lib.sh itself to assert that we have a exit code of 1, not any non-zero. Improves code added in 0445e6f0a12 (test-lib: '--run' to run only specific tests, 2014-04-30). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/lib-subtest.sh | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/t/lib-subtest.sh b/t/lib-subtest.sh index 0b9ecdb6dbf438..d6b625d397fdac 100644 --- a/t/lib-subtest.sh +++ b/t/lib-subtest.sh @@ -43,8 +43,8 @@ write_sub_test_lib_test () { } _run_sub_test_lib_test_common () { - neg="$1" name="$2" # stdin is the body of the test code - shift 2 + cmp_op="$1" want_code="$2" name="$3" # stdin is the body of the test code + shift 3 # intercept pseudo-options at the front of the argument list that we # will not pass to child script @@ -80,33 +80,30 @@ _run_sub_test_lib_test_common () { GIT_SKIP_TESTS=$skip && export GIT_SKIP_TESTS && sane_unset GIT_TEST_FAIL_PREREQS && - if test -z "$neg" - then - ./"$name.sh" "$@" >out 2>err - else - ! ./"$name.sh" "$@" >out 2>err - fi + ./"$name.sh" "$@" >out 2>err; + ret=$? && + test "$ret" "$cmp_op" "$want_code" ) } write_and_run_sub_test_lib_test () { name="$1" descr="$2" # stdin is the body of the test code write_sub_test_lib_test "$@" || return 1 - _run_sub_test_lib_test_common '' "$@" + _run_sub_test_lib_test_common -eq 0 "$@" } write_and_run_sub_test_lib_test_err () { name="$1" descr="$2" # stdin is the body of the test code write_sub_test_lib_test "$@" || return 1 - _run_sub_test_lib_test_common '!' "$@" + _run_sub_test_lib_test_common -eq 1 "$@" } run_sub_test_lib_test () { - _run_sub_test_lib_test_common '' "$@" + _run_sub_test_lib_test_common -eq 0 "$@" } run_sub_test_lib_test_err () { - _run_sub_test_lib_test_common '!' "$@" + _run_sub_test_lib_test_common -eq 1 "$@" } _check_sub_test_lib_test_common () { From 45f2aeb194fd6425bb56f3d0ebf61bf39883a496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com> Date: Thu, 5 Aug 2021 13:01:11 +0200 Subject: [PATCH 170/198] commit-graph: fix bogus counter in "Scanning merged commits" progress line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The final value of the counter of the "Scanning merged commits" progress line is always one less than its expected total, e.g.: Scanning merged commits: 83% (5/6), done. This happens because while iterating over an array the loop variable is passed to display_progress() as-is, but while C arrays (and thus the loop variable) start at 0 and end at N-1, the progress counter must end at N. Fix this by passing 'i + 1' to display_progress(), like most other callsites do. There's an RFC series to add a GIT_TEST_CHECK_PROGRESS=1 mode[1] which catches this issue in the 'fetch.writeCommitGraph' and 'fetch.writeCommitGraph with submodules' tests in 't5510-fetch.sh'. The GIT_TEST_CHECK_PROGRESS=1 mode is not part of this series, but future changes to progress.c may add it or similar assertions to catch this and similar bugs elsewhere. 1. https://lore.kernel.org/git/20210620200303.2328957-1-szeder.dev@gmail.com/ Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- commit-graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 1a2602da618c9b..918061f207cf86 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2096,7 +2096,7 @@ static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx) ctx->num_extra_edges = 0; for (i = 0; i < ctx->commits.nr; i++) { - display_progress(ctx->progress, i); + display_progress(ctx->progress, i + 1); if (i && oideq(&ctx->commits.list[i - 1]->object.oid, &ctx->commits.list[i]->object.oid)) { From 2a943937dda6d49827f020ee79287a08f28afa64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 5 Aug 2021 13:01:12 +0200 Subject: [PATCH 171/198] midx: don't provide a total for QSORT() progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The quicksort algorithm can be anywhere between O(n) and O(n^2), so providing a "num objects" as a total means that in some cases we're going to go past 100%. This fixes a logic error in 5ae18df9d8e (midx: during verify group objects by packfile to speed verification, 2019-03-21), which in turn seems to have been diligently copied from my own logic error in the commit-graph.c code, see 890226ccb57 (commit-graph write: add itermediate progress, 2019-01-19). That commit-graph code of mine was removed in 1cbdbf3bef7 (commit-graph: drop count_distinct_commits() function, 2020-12-07), so we don't need to fix that too. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 9a35b0255d5c04..eaae75ab195402 100644 --- a/midx.c +++ b/midx.c @@ -1291,7 +1291,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag if (flags & MIDX_PROGRESS) progress = start_sparse_progress(_("Sorting objects by packfile"), - m->num_objects); + 0); display_progress(progress, 0); /* TODO: Measure QSORT() progress */ QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); stop_progress(&progress); From da660bd15ba6df415a26d9a3ff8df4e0cbfd5148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com> Date: Thu, 5 Aug 2021 13:01:13 +0200 Subject: [PATCH 172/198] entry: show finer-grained counter in "Filtering content" progress line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Filtering content" progress in entry.c:finish_delayed_checkout() is unusual because of how it calculates the progress count and because it shows the progress of a nested loop. It works basically like this: start_delayed_progress(p, nr_of_paths_to_filter) for_each_filter { display_progress(p, nr_of_paths_to_filter - nr_of_paths_still_left_to_filter) for_each_path_handled_by_the_current_filter { checkout_entry() } } stop_progress(p) There are two issues with this approach: - The work done by the last filter (or the only filter if there is only one) is never counted, so if the last filter still has some paths to process, then the counter shown in the "done" progress line will not match the expected total. The partially-RFC series to add a GIT_TEST_CHECK_PROGRESS=1 mode[1] helps spot this issue. Under it the 'missing file in delayed checkout' and 'invalid file in delayed checkout' tests in 't0021-conversion.sh' fail, because both use only one filter. (The test 'delayed checkout in process filter' uses two filters but the first one does all the work, so that test already happens to succeed even with GIT_TEST_CHECK_PROGRESS=1.) - The progress counter is updated only once per filter, not once per processed path, so if a filter has a lot of paths to process, then the counter might stay unchanged for a long while and then make a big jump (though the user still gets a sense of progress, because we call display_throughput() after each processed path to show the amount of processed data). Move the display_progress() call to the inner loop, right next to that checkout_entry() call that does the hard work for each path, and use a dedicated counter variable that is incremented upon processing each path. After this change the 'invalid file in delayed checkout' in 't0021-conversion.sh' would succeed with the GIT_TEST_CHECK_PROGRESS=1 assertion discussed above, but the 'missing file in delayed checkout' test would still fail. It'll fail because its purposefully buggy filter doesn't process any paths, so we won't execute that inner loop at all, see [2] for how to spot that issue without GIT_TEST_CHECK_PROGRESS=1. It's not straightforward to fix it with the current progress.c library (see [3] for an attempt), so let's leave it for now. 1. https://lore.kernel.org/git/20210620200303.2328957-1-szeder.dev@gmail.com/ 2. http://lore.kernel.org/git/20210802214827.GE23408@szeder.dev 3. https://lore.kernel.org/git/20210620200303.2328957-7-szeder.dev@gmail.com/ Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- entry.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/entry.c b/entry.c index 125fabdbd52c4d..d92dd020b3dd94 100644 --- a/entry.c +++ b/entry.c @@ -162,7 +162,7 @@ static int remove_available_paths(struct string_list_item *item, void *cb_data) int finish_delayed_checkout(struct checkout *state, int *nr_checkouts) { int errs = 0; - unsigned delayed_object_count; + unsigned processed_paths = 0; off_t filtered_bytes = 0; struct string_list_item *filter, *path; struct progress *progress; @@ -172,12 +172,10 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts) return errs; dco->state = CE_RETRY; - delayed_object_count = dco->paths.nr; - progress = start_delayed_progress(_("Filtering content"), delayed_object_count); + progress = start_delayed_progress(_("Filtering content"), dco->paths.nr); while (dco->filters.nr > 0) { for_each_string_list_item(filter, &dco->filters) { struct string_list available_paths = STRING_LIST_INIT_NODUP; - display_progress(progress, delayed_object_count - dco->paths.nr); if (!async_query_available_blobs(filter->string, &available_paths)) { /* Filter reported an error */ @@ -224,6 +222,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts) ce = index_file_exists(state->istate, path->string, strlen(path->string), 0); if (ce) { + display_progress(progress, ++processed_paths); errs |= checkout_entry(ce, state, NULL, nr_checkouts); filtered_bytes += ce->ce_stat_data.sd_size; display_throughput(progress, filtered_bytes); From 232290273b8b817312fcd1f5a88e154343e62b92 Mon Sep 17 00:00:00 2001 From: Jeff King <peff@peff.net> Date: Thu, 5 Aug 2021 16:47:54 -0400 Subject: [PATCH 173/198] diff: drop unused options parameter from cmp_in_block_with_wsd() When 8e809cbb2f (diff --color-moved-ws=allow-indentation-change: simplify and optimize, 2021-07-20) stopped looking at o->emitted_symbols and instead took the symbol as a parameter, we no longer need to look at the diff_options struct at all. Dropping the unused parameter makes it clear that the function is independent of the diff options. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- diff.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/diff.c b/diff.c index 2956c8f7103c2a..164af13b4f428a 100644 --- a/diff.c +++ b/diff.c @@ -863,8 +863,7 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, return a_width - b_width; } -static int cmp_in_block_with_wsd(const struct diff_options *o, - const struct moved_entry *cur, +static int cmp_in_block_with_wsd(const struct moved_entry *cur, const struct emitted_diff_symbol *l, struct moved_block *pmb) { @@ -1016,7 +1015,7 @@ static void pmb_advance_or_null(struct diff_options *o, if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) match = cur && - !cmp_in_block_with_wsd(o, cur, l, &pmb[i]); + !cmp_in_block_with_wsd(cur, l, &pmb[i]); else match = cur && cur->es->id == l->id; From 8a41badcffff4479a1317bd668451d13741bf49e Mon Sep 17 00:00:00 2001 From: Jeff King <peff@peff.net> Date: Thu, 5 Aug 2021 20:46:51 -0400 Subject: [PATCH 174/198] refs: drop unused "flags" parameter to lock_ref_oid_basic() Commit 1578215dab (refs/files: remove unused REF_DELETING in lock_ref_oid_basic(), 2021-07-20), dropped the last use of this flag parameter. All of the callers do pass REF_NO_DEREF, but that has been ignored completely since 7a418f3a17 (lock_ref_sha1_basic(): only handle REF_NODEREF mode, 2016-04-22). So we can simply get rid of the parameter entirely. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- refs/files-backend.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 5d12003471e586..9a20d2bfc6ef45 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -876,8 +876,7 @@ static int create_reflock(const char *path, void *cb) * On failure errno is set to something meaningful. */ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, - const char *refname, - unsigned int flags, int *type, + const char *refname, int *type, struct strbuf *err) { struct strbuf ref_file = STRBUF_INIT; @@ -1345,7 +1344,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, logmoved = log; - lock = lock_ref_oid_basic(refs, newrefname, REF_NO_DEREF, NULL, &err); + lock = lock_ref_oid_basic(refs, newrefname, NULL, &err); if (!lock) { if (copy) error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf); @@ -1367,7 +1366,7 @@ static int files_copy_or_rename_ref(struct ref_store *ref_store, goto out; rollback: - lock = lock_ref_oid_basic(refs, oldrefname, REF_NO_DEREF, NULL, &err); + lock = lock_ref_oid_basic(refs, oldrefname, NULL, &err); if (!lock) { error("unable to lock %s for rollback: %s", oldrefname, err.buf); strbuf_release(&err); @@ -1774,7 +1773,7 @@ static int files_create_symref(struct ref_store *ref_store, struct ref_lock *lock; int ret; - lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, NULL, &err); + lock = lock_ref_oid_basic(refs, refname, NULL, &err); if (!lock) { error("%s", err.buf); strbuf_release(&err); @@ -2992,7 +2991,7 @@ static int files_reflog_expire(struct ref_store *ref_store, * reference itself, plus we might need to update the * reference if --updateref was specified: */ - lock = lock_ref_oid_basic(refs, refname, REF_NO_DEREF, &type, &err); + lock = lock_ref_oid_basic(refs, refname, &type, &err); if (!lock) { error("cannot lock ref '%s': %s", refname, err.buf); strbuf_release(&err); From d44ac73b5de38268a60ab91f9b483782b2454592 Mon Sep 17 00:00:00 2001 From: Noah Pendleton <noah.pendleton@gmail.com> Date: Sun, 8 Aug 2021 13:48:47 -0400 Subject: [PATCH 175/198] blame: add config `blame.ignoreRevsFileIsOptional` Setting the config option `blame.ignoreRevsFile` globally to eg `.git-blame-ignore-revs` causes `git blame` to error when the file doesn't exist in the current repository: ``` fatal: could not open object name list: .git-blame-ignore-revs ``` Add a new config option, `blame.ignoreRevsFileIsOptional`, that when set to true, `git blame` will silently ignore any missing ignoreRevsFile's. Signed-off-by: Noah Pendleton <noah.pendleton@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/blame-options.txt | 3 ++- Documentation/config/blame.txt | 5 +++++ builtin/blame.c | 7 ++++++- t/t8013-blame-ignore-revs.sh | 14 ++++++++++---- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt index 117f4cf80645e1..199a28ab79c47e 100644 --- a/Documentation/blame-options.txt +++ b/Documentation/blame-options.txt @@ -134,7 +134,8 @@ take effect. `fsck.skipList`. This option may be repeated, and these files will be processed after any files specified with the `blame.ignoreRevsFile` config option. An empty file name, `""`, will clear the list of revs from - previously processed files. + previously processed files. If `blame.ignoreRevsFileIsOptional` is true, + missing files will be silently ignored. -h:: Show help message. diff --git a/Documentation/config/blame.txt b/Documentation/config/blame.txt index 4d047c17908cd6..2aae851e4b76e3 100644 --- a/Documentation/config/blame.txt +++ b/Documentation/config/blame.txt @@ -27,6 +27,11 @@ blame.ignoreRevsFile:: file names will reset the list of ignored revisions. This option will be handled before the command line option `--ignore-revs-file`. +blame.ignoreRevsFileIsOptional:: + Silently skip missing files specified by ignoreRevsFile or the command line + option `--ignore-revs-file`. If unset, or set to false, missing files will + cause a nonrecoverable error. + blame.markUnblamableLines:: Mark lines that were changed by an ignored revision that we could not attribute to another commit with a '*' in the output of diff --git a/builtin/blame.c b/builtin/blame.c index 641523ff9af693..df132b34cedc2b 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -56,6 +56,7 @@ static int coloring_mode; static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP; static int mark_unblamable_lines; static int mark_ignored_lines; +static int ignorerevsfileisoptional; static struct date_mode blame_date_mode = { DATE_ISO8601 }; static size_t blame_date_width; @@ -715,6 +716,9 @@ static int git_blame_config(const char *var, const char *value, void *cb) string_list_insert(&ignore_revs_file_list, str); return 0; } + if (!strcmp(var, "blame.ignorerevsfileisoptional")) { + ignorerevsfileisoptional = git_config_bool(var, value); + } if (!strcmp(var, "blame.markunblamablelines")) { mark_unblamable_lines = git_config_bool(var, value); return 0; @@ -835,7 +839,8 @@ static void build_ignorelist(struct blame_scoreboard *sb, for_each_string_list_item(i, ignore_revs_file_list) { if (!strcmp(i->string, "")) oidset_clear(&sb->ignore_list); - else + /* skip non-existent files if ignorerevsfileisoptional is set */ + else if (!ignorerevsfileisoptional || file_exists(i->string)) oidset_parse_file_carefully(&sb->ignore_list, i->string, peel_to_commit_oid, sb); } diff --git a/t/t8013-blame-ignore-revs.sh b/t/t8013-blame-ignore-revs.sh index b18633dee1bfb2..f789426cbf2aee 100755 --- a/t/t8013-blame-ignore-revs.sh +++ b/t/t8013-blame-ignore-revs.sh @@ -127,18 +127,24 @@ test_expect_success override_ignore_revs_file ' grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual && test_cmp expect actual ' -test_expect_success bad_files_and_revs ' +test_expect_success bad_revs ' test_must_fail git blame file --ignore-rev NOREV 2>err && test_i18ngrep "cannot find revision NOREV to ignore" err && - test_must_fail git blame file --ignore-revs-file NOFILE 2>err && - test_i18ngrep "could not open.*: NOFILE" err && - echo NOREV >ignore_norev && test_must_fail git blame file --ignore-revs-file ignore_norev 2>err && test_i18ngrep "invalid object name: NOREV" err ' +# Non-existent ignore-revs-file should fail unless +# blame.ignoreRevsFileIsOptional is set +test_expect_success bad_file ' + test_must_fail git blame file --ignore-revs-file NOFILE && + + git config --add blame.ignorerevsfileisoptional true && + git blame file --ignore-revs-file NOFILE +' + # For ignored revs that have added 'unblamable' lines, mark those lines with a # '*' # A--B--X--Y From 7a672febd47e1c25c69732adda62fdfb481f8fb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com> Date: Sun, 8 Aug 2021 18:38:33 -0700 Subject: [PATCH 176/198] ci: run a pedantic build as part of the GitHub workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit similar to the recently added sparse task, it is nice to know as early as possible. add a dockerized build using fedora (that usually has the latest gcc) to be ahead of the curve and avoid older ISO C issues at the same time. Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- .github/workflows/main.yml | 2 ++ ci/install-docker-dependencies.sh | 4 ++++ ci/run-build-and-tests.sh | 10 +++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 73856bafc9debd..b93561978d3f4d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -324,6 +324,8 @@ jobs: image: alpine - jobname: Linux32 image: daald/ubuntu32:xenial + - jobname: pedantic + image: fedora env: jobname: ${{matrix.vector.jobname}} runs-on: ubuntu-latest diff --git a/ci/install-docker-dependencies.sh b/ci/install-docker-dependencies.sh index 26a6689766d7f1..07a8c6b199d39c 100755 --- a/ci/install-docker-dependencies.sh +++ b/ci/install-docker-dependencies.sh @@ -15,4 +15,8 @@ linux-musl) apk add --update build-base curl-dev openssl-dev expat-dev gettext \ pcre2-dev python3 musl-libintl perl-utils ncurses >/dev/null ;; +pedantic) + dnf -yq update >/dev/null && + dnf -yq install make gcc findutils diffutils perl python3 gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null + ;; esac diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 3ce81ffee941b2..f3aba5d6cbb931 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -10,6 +10,11 @@ windows*) cmd //c mklink //j t\\.prove "$(cygpath -aw "$cache_dir/.prove")";; *) ln -s "$cache_dir/.prove" t/.prove;; esac +if test "$jobname" = "pedantic" +then + export DEVOPTS=pedantic +fi + make case "$jobname" in linux-gcc) @@ -35,10 +40,9 @@ linux-clang) export GIT_TEST_DEFAULT_HASH=sha256 make test ;; -linux-gcc-4.8) +linux-gcc-4.8|pedantic) # Don't run the tests; we only care about whether Git can be - # built with GCC 4.8, as it errors out on some undesired (C99) - # constructs that newer compilers seem to quietly accept. + # built with GCC 4.8 or with pedantic ;; *) make test From 2d12b1d828e07a46db7ae4bef46a4726d6cad0dc Mon Sep 17 00:00:00 2001 From: Kim Altintop <kim@eagain.st> Date: Mon, 9 Aug 2021 17:56:45 +0000 Subject: [PATCH 177/198] t5730: introduce fetch command helper Assembling a "raw" fetch command to be fed directly to "test-tool serve-v2" is extracted into a test helper. Suggested-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Kim Altintop <kim@eagain.st> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5703-upload-pack-ref-in-want.sh | 107 ++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 33 deletions(-) diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index e9e471621d59a6..cd4744b016944f 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -40,6 +40,54 @@ write_command () { fi } +# Write a complete fetch command to stdout, suitable for use with `test-tool +# pkt-line`. "want-ref", "want", and "have" values can be given in this order, +# with sections separated by "--". +# +# Examples: +# +# write_fetch_command refs/heads/main +# +# write_fetch_command \ +# refs/heads/main \ +# -- \ +# -- \ +# $(git rev-parse x) +# +# write_fetch_command \ +# -- +# $(git rev-parse a) \ +# -- +# $(git rev-parse b) +write_fetch_command () { + write_command fetch && + echo "0001" && + echo "no-progress" || return + while : + do + case $# in 0) break ;; esac && + case "$1" in --) shift; break ;; esac && + echo "want-ref $1" && + shift || return + done && + while : + do + case $# in 0) break ;; esac && + case "$1" in --) shift; break ;; esac && + echo "want $1" && + shift || return + done && + while : + do + case $# in 0) break ;; esac && + case "$1" in --) shift; break ;; esac && + echo "have $1" && + shift || return + done && + echo "done" && + echo "0000" +} + # c(o/foo) d(o/bar) # \ / # b e(baz) f(main) @@ -97,15 +145,13 @@ test_expect_success 'basic want-ref' ' EOF git rev-parse f >expected_commits && - oid=$(git rev-parse a) && test-tool pkt-line pack >in <<-EOF && - $(write_command fetch) - 0001 - no-progress - want-ref refs/heads/main - have $oid - done - 0000 + $(write_fetch_command \ + refs/heads/main \ + -- \ + -- \ + $(git rev-parse a) \ + ) EOF test-tool serve-v2 --stateless-rpc >out <in && @@ -121,16 +167,14 @@ test_expect_success 'multiple want-ref lines' ' EOF git rev-parse c d >expected_commits && - oid=$(git rev-parse b) && test-tool pkt-line pack >in <<-EOF && - $(write_command fetch) - 0001 - no-progress - want-ref refs/heads/o/foo - want-ref refs/heads/o/bar - have $oid - done - 0000 + $(write_fetch_command \ + refs/heads/o/foo \ + refs/heads/o/bar \ + -- \ + -- \ + $(git rev-parse b) \ + ) EOF test-tool serve-v2 --stateless-rpc >out <in && @@ -145,14 +189,13 @@ test_expect_success 'mix want and want-ref' ' git rev-parse e f >expected_commits && test-tool pkt-line pack >in <<-EOF && - $(write_command fetch) - 0001 - no-progress - want-ref refs/heads/main - want $(git rev-parse e) - have $(git rev-parse a) - done - 0000 + $(write_fetch_command \ + refs/heads/main \ + -- \ + $(git rev-parse e) \ + -- \ + $(git rev-parse a) \ + ) EOF test-tool serve-v2 --stateless-rpc >out <in && @@ -166,15 +209,13 @@ test_expect_success 'want-ref with ref we already have commit for' ' EOF >expected_commits && - oid=$(git rev-parse c) && test-tool pkt-line pack >in <<-EOF && - $(write_command fetch) - 0001 - no-progress - want-ref refs/heads/o/foo - have $oid - done - 0000 + $(write_fetch_command \ + refs/heads/o/foo \ + -- \ + -- \ + $(git rev-parse c) \ + ) EOF test-tool serve-v2 --stateless-rpc >out <in && From f8121e671acb141acfb2e1770353b339942e9125 Mon Sep 17 00:00:00 2001 From: Kim Altintop <kim@eagain.st> Date: Mon, 9 Aug 2021 17:57:09 +0000 Subject: [PATCH 178/198] upload-pack.c: treat want-ref relative to namespace When 'upload-pack' runs within the context of a git namespace, treat any 'want-ref' lines the client sends as relative to that namespace. Also check if the wanted ref is hidden via 'hideRefs'. If it is hidden, respond with an error as if the ref didn't exist. Helped-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Kim Altintop <kim@eagain.st> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t5703-upload-pack-ref-in-want.sh | 129 +++++++++++++++++++++++++++++ upload-pack.c | 18 ++-- 2 files changed, 140 insertions(+), 7 deletions(-) diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index cd4744b016944f..3e1e0b8fe17d97 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -339,6 +339,135 @@ test_expect_success 'fetching with wildcard that matches multiple refs' ' grep "want-ref refs/heads/o/bar" log ' +REPO="$(pwd)/repo-ns" + +test_expect_success 'setup namespaced repo' ' + ( + git init -b main "$REPO" && + cd "$REPO" && + test_commit a && + test_commit b && + git checkout a && + test_commit c && + git checkout a && + test_commit d && + git update-ref refs/heads/ns-no b && + git update-ref refs/namespaces/ns/refs/heads/ns-yes c && + git update-ref refs/namespaces/ns/refs/heads/hidden d + ) && + git -C "$REPO" config uploadpack.allowRefInWant true +' + +test_expect_success 'with namespace: want-ref is considered relative to namespace' ' + wanted_ref=refs/heads/ns-yes && + + oid=$(git -C "$REPO" rev-parse "refs/namespaces/ns/$wanted_ref") && + cat >expected_refs <<-EOF && + $oid $wanted_ref + EOF + cat >expected_commits <<-EOF && + $oid + $(git -C "$REPO" rev-parse a) + EOF + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + GIT_NAMESPACE=ns test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + check_output +' + +test_expect_success 'with namespace: want-ref outside namespace is unknown' ' + wanted_ref=refs/heads/ns-no && + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + test_must_fail env GIT_NAMESPACE=ns \ + test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + grep "unknown ref" out +' + +# Cross-check refs/heads/ns-no indeed exists +test_expect_success 'without namespace: want-ref outside namespace succeeds' ' + wanted_ref=refs/heads/ns-no && + + oid=$(git -C "$REPO" rev-parse $wanted_ref) && + cat >expected_refs <<-EOF && + $oid $wanted_ref + EOF + cat >expected_commits <<-EOF && + $oid + $(git -C "$REPO" rev-parse a) + EOF + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + check_output +' + +test_expect_success 'with namespace: hideRefs is matched, relative to namespace' ' + wanted_ref=refs/heads/hidden && + git -C "$REPO" config transfer.hideRefs $wanted_ref && + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + test_must_fail env GIT_NAMESPACE=ns \ + test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + grep "unknown ref" out +' + +# Cross-check refs/heads/hidden indeed exists +test_expect_success 'with namespace: want-ref succeeds if hideRefs is removed' ' + wanted_ref=refs/heads/hidden && + git -C "$REPO" config --unset transfer.hideRefs $wanted_ref && + + oid=$(git -C "$REPO" rev-parse "refs/namespaces/ns/$wanted_ref") && + cat >expected_refs <<-EOF && + $oid $wanted_ref + EOF + cat >expected_commits <<-EOF && + $oid + $(git -C "$REPO" rev-parse a) + EOF + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + GIT_NAMESPACE=ns test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + check_output +' + +test_expect_success 'without namespace: relative hideRefs does not match' ' + wanted_ref=refs/namespaces/ns/refs/heads/hidden && + git -C "$REPO" config transfer.hideRefs refs/heads/hidden && + + oid=$(git -C "$REPO" rev-parse $wanted_ref) && + cat >expected_refs <<-EOF && + $oid $wanted_ref + EOF + cat >expected_commits <<-EOF && + $oid + $(git -C "$REPO" rev-parse a) + EOF + + test-tool pkt-line pack >in <<-EOF && + $(write_fetch_command $wanted_ref) + EOF + + test-tool -C "$REPO" serve-v2 --stateless-rpc >out <in && + check_output +' + + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd diff --git a/upload-pack.c b/upload-pack.c index 297b76fcb43679..6ce07231d3dc34 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1417,21 +1417,25 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, struct string_list *wanted_refs, struct object_array *want_obj) { - const char *arg; - if (skip_prefix(line, "want-ref ", &arg)) { + const char *refname_nons; + if (skip_prefix(line, "want-ref ", &refname_nons)) { struct object_id oid; struct string_list_item *item; struct object *o; + struct strbuf refname = STRBUF_INIT; - if (read_ref(arg, &oid)) { - packet_writer_error(writer, "unknown ref %s", arg); - die("unknown ref %s", arg); + strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons); + if (ref_is_hidden(refname_nons, refname.buf) || + read_ref(refname.buf, &oid)) { + packet_writer_error(writer, "unknown ref %s", refname_nons); + die("unknown ref %s", refname_nons); } + strbuf_release(&refname); - item = string_list_append(wanted_refs, arg); + item = string_list_append(wanted_refs, refname_nons); item->util = oiddup(&oid); - o = parse_object_or_die(&oid, arg); + o = parse_object_or_die(&oid, refname_nons); if (!(o->flags & WANTED)) { o->flags |= WANTED; add_object_array(o, NULL, want_obj); From 24a6e5bc4e084e9e5e3b8981e98d2df8bc4c2e1a Mon Sep 17 00:00:00 2001 From: Kim Altintop <kim@eagain.st> Date: Mon, 9 Aug 2021 17:57:24 +0000 Subject: [PATCH 179/198] docs: clarify the interaction of transfer.hideRefs and namespaces Expand the section about namespaces in the documentation of `transfer.hideRefs` to point out the subtle differences between `upload-pack` and `receive-pack`. 9bedd82017 (upload-pack.c: treat want-ref relative to namespace, 2021-07-30) taught `upload-pack` to reject `want-ref`s for hidden refs, which is now documented. Signed-off-by: Kim Altintop <kim@eagain.st> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/transfer.txt | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt index 505126a7802319..09ebb399ceb225 100644 --- a/Documentation/config/transfer.txt +++ b/Documentation/config/transfer.txt @@ -52,13 +52,16 @@ If you have multiple hideRefs values, later entries override earlier ones (and entries in more-specific config files override less-specific ones). + If a namespace is in use, the namespace prefix is stripped from each -reference before it is matched against `transfer.hiderefs` patterns. -For example, if `refs/heads/master` is specified in `transfer.hideRefs` and -the current namespace is `foo`, then `refs/namespaces/foo/refs/heads/master` -is omitted from the advertisements but `refs/heads/master` and -`refs/namespaces/bar/refs/heads/master` are still advertised as so-called -"have" lines. In order to match refs before stripping, add a `^` in front of -the ref name. If you combine `!` and `^`, `!` must be specified first. +reference before it is matched against `transfer.hiderefs` patterns. For +example, if `refs/heads/master` is specified in `transfer.hideRefs` and the +current namespace is `foo`, then `refs/namespaces/foo/refs/heads/master` is +omitted from the advertisements. If `uploadpack.allowRefInWant` is set, +`upload-pack` will treat `want-ref refs/heads/master` in a protocol v2 +`fetch` command as if `refs/heads/master` was unknown. Note, however, that +`receive-pack` will still advertise the object id `refs/heads/master` is +pointing to, but will conceil the name of the ref. In order to match refs +before stripping, add a `^` in front of the ref name. If you combine `!` and +`^`, `!` must be specified first. + Even if you hide refs, a client may still be able to steal the target objects via the techniques described in the "SECURITY" section of the From 6a39a348cd4dae9935029d4fc86f25cdb86c68c3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Mon, 9 Aug 2021 14:55:48 -0700 Subject: [PATCH 180/198] ### match next From 86d6ca5886bdfaf92b4b5d1315a2664cc1387fbd Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:06 +0000 Subject: [PATCH 181/198] t7519: rewrite sparse index test The sparse index is tested with the FS Monitor hook and extension since f8fe49e (fsmonitor: integrate with sparse index, 2021-07-14). This test was very fragile because it shared an index across sparse and non-sparse behavior. Since that expansion and contraction could cause the index to lose its FS Monitor bitmap and token, behavior is fragile to changes in 'git sparse-checkout set'. Rewrite the test to use two clones of the original repo: full and sparse. This allows us to also keep the test files (actual, expect, trace2.txt) out of the repos we are testing with 'git status'. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7519-status-fsmonitor.sh | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index deea88d4431d23..6f2cf306f66588 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -389,43 +389,47 @@ test_expect_success 'status succeeds after staging/unstaging' ' # If "!" is supplied, then we verify that we do not call ensure_full_index # during a call to 'git status'. Otherwise, we verify that we _do_ call it. check_sparse_index_behavior () { - git status --porcelain=v2 >expect && - git sparse-checkout init --cone --sparse-index && - git sparse-checkout set dir1 dir2 && + git -C full status --porcelain=v2 >expect && GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ - git status --porcelain=v2 >actual && + git -C sparse status --porcelain=v2 >actual && test_region $1 index ensure_full_index trace2.txt && test_region fsm_hook query trace2.txt && test_cmp expect actual && - rm trace2.txt && - git sparse-checkout disable + rm trace2.txt } test_expect_success 'status succeeds with sparse index' ' - git reset --hard && + git clone . full && + git clone . sparse && + git -C sparse sparse-checkout init --cone --sparse-index && + git -C sparse sparse-checkout set dir1 dir2 && - test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" && - check_sparse_index_behavior ! && - - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" EOF - git config core.fsmonitor .git/hooks/fsmonitor-test && + git -C full config core.fsmonitor ../.git/hooks/fsmonitor-test && + git -C sparse config core.fsmonitor ../.git/hooks/fsmonitor-test && check_sparse_index_behavior ! && - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" printf "dir1/modified\0" EOF check_sparse_index_behavior ! && - cp -r dir1 dir1a && - git add dir1a && - git commit -m "add dir1a" && + git -C sparse sparse-checkout add dir1a && + + for repo in full sparse + do + cp -r $repo/dir1 $repo/dir1a && + git -C $repo add dir1a && + git -C $repo commit -m "add dir1a" || return 1 + done && + git -C sparse sparse-checkout set dir1 dir2 && # This one modifies outside the sparse-checkout definition # and hence we expect to expand the sparse-index. - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" printf "dir1a/modified\0" EOF From 160eea76a2d34b343d189a4386cb667e177663fd Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:07 +0000 Subject: [PATCH 182/198] sparse-index: silently return when not using cone-mode patterns While the sparse-index is only enabled when core.sparseCheckoutCone is also enabled, it is possible for the user to modify the sparse-checkout file manually in a way that does not match cone-mode patterns. In this case, we should refuse to convert an index into a sparse index, since the sparse_checkout_patterns will not be initialized with recursive and parent path hashsets. Also silently return if there are no cache entries, which is a simple case: there are no paths to make sparse! Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- sparse-index.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index c6b4feec413a8f..bc5900eae35b45 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -130,7 +130,8 @@ static int index_has_unmerged_entries(struct index_state *istate) int convert_to_sparse(struct index_state *istate) { int test_env; - if (istate->split_index || istate->sparse_index || + + if (istate->split_index || istate->sparse_index || !istate->cache_nr || !core_apply_sparse_checkout || !core_sparse_checkout_cone) return 0; @@ -158,10 +159,16 @@ int convert_to_sparse(struct index_state *istate) return 0; } - if (!istate->sparse_checkout_patterns->use_cone_patterns) { - warning(_("attempting to use sparse-index without cone mode")); - return -1; - } + /* + * We need cone-mode patterns to use sparse-index. If a user edits + * their sparse-checkout file manually, then we can detect during + * parsing that they are not actually using cone-mode patterns and + * hence we need to abort this conversion _without error_. Warnings + * already exist in the pattern parsing to inform the user of their + * bad patterns. + */ + if (!istate->sparse_checkout_patterns->use_cone_patterns) + return 0; /* * NEEDSWORK: If we have unmerged entries, then stay full. From 3cfe2b83264a4fedfa266e58698322a806d8f16c Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:08 +0000 Subject: [PATCH 183/198] sparse-index: silently return when cache tree fails If cache_tree_update() returns a non-zero value, then it could not create the cache tree. This is likely due to a path having a merge conflict. Since we are already returning early, let's return silently to avoid making it seem like we failed to write the index at all. If we remove our dependence on the cache tree within convert_to_sparse(), then we could still recover from this scenario and have a sparse index. When constructing the cache-tree extension in convert_to_sparse(), it is possible that we construct a tree object that is new to the object database. Without the WRITE_TREE_MISSING_OK flag, this results in an error that halts our conversion to a sparse index. Add this flag to remove this limitation. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- sparse-index.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index bc5900eae35b45..b6e90417556afc 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -179,10 +179,15 @@ int convert_to_sparse(struct index_state *istate) /* Clear and recompute the cache-tree */ cache_tree_free(&istate->cache_tree); - if (cache_tree_update(istate, 0)) { - warning(_("unable to update cache-tree, staying full")); - return -1; - } + /* + * Silently return if there is a problem with the cache tree update, + * which might just be due to a conflict state in some entry. + * + * This might create new tree objects, so be sure to use + * WRITE_TREE_MISSING_OK. + */ + if (cache_tree_update(istate, WRITE_TREE_MISSING_OK)) + return 0; remove_fsmonitor(istate); From 8390e19b518cc8829ef623741937a43087f2b032 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:09 +0000 Subject: [PATCH 184/198] unpack-trees: fix nested sparse-dir search The iterated search in find_cache_entry() was recently modified to include a loop that searches backwards for a sparse directory entry that matches the given traverse_info and name_entry. However, the string comparison failed to actually concatenate those two strings, so this failed to find a sparse directory when it was not a top-level directory. This caused some errors in rare cases where a 'git checkout' spanned a diff that modified files within the sparse directory entry, but we could not correctly find the entry. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- unpack-trees.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index 5786645f315d51..df1f44377232c0 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1255,9 +1255,10 @@ static int sparse_dir_matches_path(const struct cache_entry *ce, static struct cache_entry *find_cache_entry(struct traverse_info *info, const struct name_entry *p) { - struct cache_entry *ce; + struct cache_entry *ce = NULL; int pos = find_cache_pos(info, p->path, p->pathlen); struct unpack_trees_options *o = info->data; + struct strbuf full_path = STRBUF_INIT; if (0 <= pos) return o->src_index->cache[pos]; @@ -1273,6 +1274,10 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info, if (pos < 0 || pos >= o->src_index->cache_nr) return NULL; + strbuf_addstr(&full_path, info->traverse_path); + strbuf_add(&full_path, p->path, p->pathlen); + strbuf_addch(&full_path, '/'); + /* * Due to lexicographic sorting and sparse directory * entries ending with a trailing slash, our path as a @@ -1283,17 +1288,20 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info, while (pos >= 0) { ce = o->src_index->cache[pos]; - if (strncmp(ce->name, p->path, p->pathlen)) - return NULL; + if (strncmp(ce->name, full_path.buf, full_path.len)) { + ce = NULL; + break; + } if (S_ISSPARSEDIR(ce->ce_mode) && sparse_dir_matches_path(ce, info, p)) - return ce; + break; pos--; } - return NULL; + strbuf_release(&full_path); + return ce; } static void debug_path(struct traverse_info *info) From d55b4bcbccb0302a36898277d4735a9e2a0078f0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:10 +0000 Subject: [PATCH 185/198] sparse-checkout: create helper methods As we integrate the sparse index into more builtins, we occasionally need to check the sparse-checkout patterns to see if a path is within the sparse-checkout cone. Create some helper methods that help initialize the patterns and check for pattern matching to make this easier. The existing callers of commands like get_sparse_checkout_patterns() use a custom 'struct pattern_list' that is not necessarily the one in the 'struct index_state', so there are not many previous uses that could adopt these helpers. There are just two in builtin/add.c and sparse-index.c that can use path_in_sparse_checkout(). Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/add.c | 8 ++------ dir.c | 29 +++++++++++++++++++++++++++++ dir.h | 6 ++++++ sparse-index.c | 12 +++--------- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 17528e8f922693..f675bdeae4a5f8 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -190,8 +190,6 @@ static int refresh(int verbose, const struct pathspec *pathspec) struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP; int flags = REFRESH_IGNORE_SKIP_WORKTREE | (verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET); - struct pattern_list pl = { 0 }; - int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl); seen = xcalloc(pathspec->nr, 1); refresh_index(&the_index, flags, pathspec, seen, @@ -199,12 +197,10 @@ static int refresh(int verbose, const struct pathspec *pathspec) for (i = 0; i < pathspec->nr; i++) { if (!seen[i]) { const char *path = pathspec->items[i].original; - int dtype = DT_REG; if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) || - (sparse_checkout_enabled && - !path_matches_pattern_list(path, strlen(path), NULL, - &dtype, &pl, &the_index))) { + (core_apply_sparse_checkout && + path_in_sparse_checkout(path, &the_index) == NOT_MATCHED)) { string_list_append(&only_match_skip_worktree, pathspec->items[i].original); } else { diff --git a/dir.c b/dir.c index 03c4d212672bc4..7027bdfa068266 100644 --- a/dir.c +++ b/dir.c @@ -1439,6 +1439,35 @@ enum pattern_match_result path_matches_pattern_list( return result; } +int init_sparse_checkout_patterns(struct index_state *istate) +{ + if (istate->sparse_checkout_patterns) + return 0; + + CALLOC_ARRAY(istate->sparse_checkout_patterns, 1); + + if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) { + FREE_AND_NULL(istate->sparse_checkout_patterns); + return -1; + } + + return 0; +} + +enum pattern_match_result path_in_sparse_checkout(const char *path, + struct index_state *istate) +{ + int dtype = DT_REG; + init_sparse_checkout_patterns(istate); + + if (!istate->sparse_checkout_patterns) + return MATCHED; + + return path_matches_pattern_list(path, strlen(path), NULL, &dtype, + istate->sparse_checkout_patterns, + istate); +} + static struct path_pattern *last_matching_pattern_from_lists( struct dir_struct *dir, struct index_state *istate, const char *pathname, int pathlen, diff --git a/dir.h b/dir.h index b3e1a54a97145d..9af2e8c4ba4d45 100644 --- a/dir.h +++ b/dir.h @@ -394,6 +394,12 @@ enum pattern_match_result path_matches_pattern_list(const char *pathname, const char *basename, int *dtype, struct pattern_list *pl, struct index_state *istate); + +int init_sparse_checkout_patterns(struct index_state *state); + +enum pattern_match_result path_in_sparse_checkout(const char *path, + struct index_state *istate); + struct dir_entry *dir_add_ignored(struct dir_struct *dir, struct index_state *istate, const char *pathname, int len); diff --git a/sparse-index.c b/sparse-index.c index b6e90417556afc..2efc9fd4910844 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -34,17 +34,14 @@ static int convert_to_sparse_rec(struct index_state *istate, int i, can_convert = 1; int start_converted = num_converted; enum pattern_match_result match; - int dtype = DT_UNKNOWN; struct strbuf child_path = STRBUF_INIT; - struct pattern_list *pl = istate->sparse_checkout_patterns; /* * Is the current path outside of the sparse cone? * Then check if the region can be replaced by a sparse * directory entry (everything is sparse and merged). */ - match = path_matches_pattern_list(ct_path, ct_pathlen, - NULL, &dtype, pl, istate); + match = path_in_sparse_checkout(ct_path, istate); if (match != NOT_MATCHED) can_convert = 0; @@ -153,11 +150,8 @@ int convert_to_sparse(struct index_state *istate) if (!istate->repo->settings.sparse_index) return 0; - if (!istate->sparse_checkout_patterns) { - istate->sparse_checkout_patterns = xcalloc(1, sizeof(struct pattern_list)); - if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) - return 0; - } + if (init_sparse_checkout_patterns(istate) < 0) + return 0; /* * We need cone-mode patterns to use sparse-index. If a user edits From e311574ccf9f6032203d570d6a3a3b3be8ba0404 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:11 +0000 Subject: [PATCH 186/198] attr: be careful about sparse directories Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- attr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/attr.c b/attr.c index d029e681f2880a..a1009f7802900b 100644 --- a/attr.c +++ b/attr.c @@ -14,6 +14,7 @@ #include "utf8.h" #include "quote.h" #include "thread-utils.h" +#include "dir.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@ -744,6 +745,19 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate, if (!istate) return NULL; + /* + * In the case of cone-mode sparse-checkout, getting the + * .gitattributes file from a directory is meaningless: all + * contained paths will be sparse if the .gitattributes is also + * sparse. In the case of a sparse index, it is critical that we + * don't go looking for one as it will expand the index. + */ + init_sparse_checkout_patterns(istate); + if (istate->sparse_checkout_patterns && + istate->sparse_checkout_patterns->use_cone_patterns && + path_in_sparse_checkout(path, istate) == NOT_MATCHED) + return NULL; + buf = read_blob_data_from_index(istate, path, NULL); if (!buf) return NULL; From 7aa0a9109b8df6fab5eb9ce818b13045d9db59a6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:12 +0000 Subject: [PATCH 187/198] sparse-index: add SPARSE_INDEX_IGNORE_CONFIG flag The convert_to_sparse() method checks for the GIT_TEST_SPARSE_INDEX environment variable or the "index.sparse" config setting before converting the index to a sparse one. This is for ease of use since all current consumers are preparing to compress the index before writing it to disk. If these settings are not enabled, then convert_to_sparse() silently returns without doing anything. We will add a consumer in the next change that wants to use the sparse index as an in-memory data structure, regardless of whether the on-disk format should be sparse. To that end, create the SPARSE_INDEX_IGNORE_CONFIG flag that will skip these config checks when enabled. All current consumers are modified to pass '0' in the new 'flags' parameter. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- read-cache.c | 4 ++-- sparse-index.c | 30 ++++++++++++++++-------------- sparse-index.h | 3 ++- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/read-cache.c b/read-cache.c index 9048ef9e905251..f5d4385c4080a1 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3069,7 +3069,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l int ret; int was_full = !istate->sparse_index; - ret = convert_to_sparse(istate); + ret = convert_to_sparse(istate, 0); if (ret) { warning(_("failed to convert to a sparse-index")); @@ -3182,7 +3182,7 @@ static int write_shared_index(struct index_state *istate, int ret, was_full = !istate->sparse_index; move_cache_to_base_index(istate); - convert_to_sparse(istate); + convert_to_sparse(istate, 0); trace2_region_enter_printf("index", "shared/do_write_index", the_repository, "%s", get_tempfile_path(*temp)); diff --git a/sparse-index.c b/sparse-index.c index 2efc9fd4910844..532fd11787ea7a 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -124,7 +124,7 @@ static int index_has_unmerged_entries(struct index_state *istate) return 0; } -int convert_to_sparse(struct index_state *istate) +int convert_to_sparse(struct index_state *istate, int flags) { int test_env; @@ -135,20 +135,22 @@ int convert_to_sparse(struct index_state *istate) if (!istate->repo) istate->repo = the_repository; - /* - * The GIT_TEST_SPARSE_INDEX environment variable triggers the - * index.sparse config variable to be on. - */ - test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1); - if (test_env >= 0) - set_sparse_index_config(istate->repo, test_env); + if (!(flags & SPARSE_INDEX_IGNORE_CONFIG)) { + /* + * The GIT_TEST_SPARSE_INDEX environment variable triggers the + * index.sparse config variable to be on. + */ + test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1); + if (test_env >= 0) + set_sparse_index_config(istate->repo, test_env); - /* - * Only convert to sparse if index.sparse is set. - */ - prepare_repo_settings(istate->repo); - if (!istate->repo->settings.sparse_index) - return 0; + /* + * Only convert to sparse if index.sparse is set. + */ + prepare_repo_settings(istate->repo); + if (!istate->repo->settings.sparse_index) + return 0; + } if (init_sparse_checkout_patterns(istate) < 0) return 0; diff --git a/sparse-index.h b/sparse-index.h index 1115a0d7dd984b..475f4f0f8dae73 100644 --- a/sparse-index.h +++ b/sparse-index.h @@ -2,7 +2,8 @@ #define SPARSE_INDEX_H__ struct index_state; -int convert_to_sparse(struct index_state *istate); +#define SPARSE_INDEX_IGNORE_CONFIG (1 << 0) +int convert_to_sparse(struct index_state *istate, int flags); /* * Some places in the codebase expect to search for a specific path. From 8f3a3a287fa8266b18a697398237d5f5fd7be5c1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Tue, 10 Aug 2021 19:50:13 +0000 Subject: [PATCH 188/198] sparse-checkout: clear tracked sparse dirs When changing the scope of a sparse-checkout using cone mode, we might have some tracked directories go out of scope. The current logic removes the tracked files from within those directories, but leaves the ignored files within those directories. This is a bit unexpected to users who have given input to Git saying they don't need those directories anymore. This is something that is new to the cone mode pattern type: the user has explicitly said "I want these directories and _not_ those directories." The typical sparse-checkout patterns more generally apply to "I want files with with these patterns" so it is natural to leave ignored files as they are. This focus on directories in cone mode provides us an opportunity to change the behavior. Leaving these ignored files in the sparse directories makes it impossible to gain performance benefits in the sparse index. When we track into these directories, we need to know if the files are ignored or not, which might depend on the _tracked_ .gitignore file(s) within the sparse directory. This depends on the indexed version of the file, so the sparse directory must be expanded. By deleting the sparse directories when changing scope (or running 'git sparse-checkout reapply') we regain these performance benefits as if the repository was in a clean state. Since these ignored files are frequently build output or helper files from IDEs, the users should not need the files now that the tracked files are removed. If the tracked files reappear, then they will have newer timestamps than the build artifacts, so the artifacts will need to be regenerated anyway. Use the sparse-index as a data structure in order to find the sparse directories that can be safely deleted. Re-expand the index to a full one if it was full before. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-sparse-checkout.txt | 8 +++ builtin/sparse-checkout.c | 95 +++++++++++++++++++++++++++ t/t1091-sparse-checkout-builtin.sh | 59 +++++++++++++++++ 3 files changed, 162 insertions(+) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index fdcf43f87cb373..f9022b9d5554bd 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -210,6 +210,14 @@ case-insensitive check. This corrects for case mismatched filenames in the 'git sparse-checkout set' command to reflect the expected cone in the working directory. +The cone mode sparse-checkout patterns will also remove ignored files that +are not within the sparse-checkout definition. This is important behavior +to preserve the performance of the sparse index, but also matches that +cone mode patterns care about directories, not files. If there exist files +that are untracked and not ignored, then Git will not delete files within +that directory other than the tracked files that are now out of scope. +These files should be removed manually to ensure Git can behave optimally. + SUBMODULES ---------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8ba9f13787b058..b06c8f885ac4ba 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -100,6 +100,99 @@ static int sparse_checkout_list(int argc, const char **argv) return 0; } +static void clean_tracked_sparse_directories(struct repository *r) +{ + int i, was_full = 0; + struct strbuf path = STRBUF_INIT; + size_t pathlen; + struct string_list_item *item; + struct string_list sparse_dirs = STRING_LIST_INIT_DUP; + + /* + * If we are not using cone mode patterns, then we cannot + * delete directories outside of the sparse cone. + */ + if (!r || !r->index || !r->worktree) + return; + init_sparse_checkout_patterns(r->index); + if (!r->index->sparse_checkout_patterns || + !r->index->sparse_checkout_patterns->use_cone_patterns) + return; + + /* + * Use the sparse index as a data structure to assist finding + * directories that are safe to delete. This conversion to a + * sparse index will not delete directories that contain + * conflicted entries or submodules. + */ + if (!r->index->sparse_index) { + /* + * If something, such as a merge conflict or other concern, + * prevents us from converting to a sparse index, then do + * not try deleting files. + */ + if (convert_to_sparse(r->index, SPARSE_INDEX_IGNORE_CONFIG)) + return; + was_full = 1; + } + + strbuf_addstr(&path, r->worktree); + strbuf_complete(&path, '/'); + pathlen = path.len; + + /* + * Collect directories that have gone out of scope but also + * exist on disk, so there is some work to be done. We need to + * store the entries in a list before exploring, since that might + * expand the sparse-index again. + */ + for (i = 0; i < r->index->cache_nr; i++) { + struct cache_entry *ce = r->index->cache[i]; + + if (S_ISSPARSEDIR(ce->ce_mode) && + repo_file_exists(r, ce->name)) + string_list_append(&sparse_dirs, ce->name); + } + + for_each_string_list_item(item, &sparse_dirs) { + struct dir_struct dir = DIR_INIT; + struct pathspec p = { 0 }; + struct strvec s = STRVEC_INIT; + + strbuf_setlen(&path, pathlen); + strbuf_addstr(&path, item->string); + + dir.flags |= DIR_SHOW_IGNORED_TOO; + + setup_standard_excludes(&dir); + strvec_push(&s, path.buf); + + parse_pathspec(&p, PATHSPEC_GLOB, 0, NULL, s.v); + fill_directory(&dir, r->index, &p); + + if (dir.nr) { + warning(_("directory '%s' contains untracked files," + " but is not in the sparse-checkout cone"), + item->string); + } else if (remove_dir_recursively(&path, 0)) { + /* + * Removal is "best effort". If something blocks + * the deletion, then continue with a warning. + */ + warning(_("failed to remove directory '%s'"), + item->string); + } + + dir_clear(&dir); + } + + string_list_clear(&sparse_dirs, 0); + strbuf_release(&path); + + if (was_full) + ensure_full_index(r->index); +} + static int update_working_directory(struct pattern_list *pl) { enum update_sparsity_result result; @@ -141,6 +234,8 @@ static int update_working_directory(struct pattern_list *pl) else rollback_lock_file(&lock_file); + clean_tracked_sparse_directories(r); + r->index->sparse_checkout_patterns = NULL; return result; } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 38fc8340f5c9b7..71236981e6484f 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -642,4 +642,63 @@ test_expect_success MINGW 'cone mode replaces backslashes with slashes' ' check_files repo/deep a deeper1 ' +test_expect_success 'cone mode clears ignored subdirectories' ' + rm repo/.git/info/sparse-checkout && + + git -C repo sparse-checkout init --cone && + git -C repo sparse-checkout set deep/deeper1 && + + cat >repo/.gitignore <<-\EOF && + obj/ + *.o + EOF + + git -C repo add .gitignore && + git -C repo commit -m ".gitignore" && + + mkdir -p repo/obj repo/folder1/obj repo/deep/deeper2/obj && + for file in folder1/obj/a obj/a folder1/file.o folder1.o \ + deep/deeper2/obj/a deep/deeper2/file.o file.o + do + echo ignored >repo/$file || return 1 + done && + + git -C repo status --porcelain=v2 >out && + test_must_be_empty out && + + git -C repo sparse-checkout reapply && + test_path_is_missing repo/folder1 && + test_path_is_missing repo/deep/deeper2 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + git -C repo status --porcelain=v2 >out && + test_must_be_empty out && + + git -C repo sparse-checkout set deep/deeper2 && + test_path_is_missing repo/deep/deeper1 && + test_path_is_dir repo/deep/deeper2 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + >repo/deep/deeper2/ignored.o && + >repo/deep/deeper2/untracked && + + # When an untracked file is in the way, all untracked files + # (even ignored files) are preserved. + git -C repo sparse-checkout set folder1 2>err && + grep "contains untracked files" err && + test_path_is_file repo/deep/deeper2/ignored.o && + test_path_is_file repo/deep/deeper2/untracked && + + # The rest of the cone matches expectation + test_path_is_missing repo/deep/deeper1 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + git -C repo status --porcelain=v2 >out && + echo "? deep/deeper2/untracked" >expect && + test_cmp expect out +' + test_done From 65d8c5252d0ab231bef03cf21d312713f9261a0c Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:39 -0700 Subject: [PATCH 189/198] submodule: lazily add submodule ODBs as alternates Teach Git to add submodule ODBs as alternates to the object store of the_repository only upon the first access of an object not in the_repository, and not when add_submodule_odb() is called. This provides a means of gradually migrating from accessing a submodule's object through alternates to accessing a submodule's object by explicitly passing its repository object. Any Git command can declare that it might access submodule objects by calling add_submodule_odb() (as they do now), but the submodule ODBs themselves will not be added until needed, so individual commands and/or combinations of arguments can be migrated one by one. [The advantage of explicit repository-object passing is code clarity (it is clear which repository an object read is from), performance (there is no need to linearly search through all submodule ODBs whenever an object is accessed from any repository, whether superproject or submodule), and the possibility of future features like partial clone submodules (which right now is not possible because if an object is missing, we do not know which repository to lazy-fetch into).] This commit also introduces an environment variable that a test may set to make the actual registration of alternates fatal, in order to demonstrate that its codepaths do not need this registration. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- object-file.c | 5 +++++ submodule.c | 20 +++++++++++++++++++- submodule.h | 7 +++++++ t/README | 10 ++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 3d27dc8dea3337..621b121bcb90e1 100644 --- a/object-file.c +++ b/object-file.c @@ -32,6 +32,7 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "submodule.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -1592,6 +1593,10 @@ static int do_oid_object_info_extended(struct repository *r, break; } + if (register_all_submodule_odb_as_alternates()) + /* We added some alternates; retry */ + continue; + /* Check if it is a missing object */ if (fetch_if_missing && repo_has_promisor_remote(r) && !already_retried && diff --git a/submodule.c b/submodule.c index 8e611fe1dbf1f7..8fde90e9063b3a 100644 --- a/submodule.c +++ b/submodule.c @@ -165,6 +165,8 @@ void stage_updated_gitmodules(struct index_state *istate) die(_("staging updated .gitmodules failed")); } +static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP; + /* TODO: remove this function, use repo_submodule_init instead. */ int add_submodule_odb(const char *path) { @@ -178,12 +180,28 @@ int add_submodule_odb(const char *path) ret = -1; goto done; } - add_to_alternates_memory(objects_directory.buf); + string_list_insert(&added_submodule_odb_paths, + strbuf_detach(&objects_directory, NULL)); done: strbuf_release(&objects_directory); return ret; } +int register_all_submodule_odb_as_alternates(void) +{ + int i; + int ret = added_submodule_odb_paths.nr; + + for (i = 0; i < added_submodule_odb_paths.nr; i++) + add_to_alternates_memory(added_submodule_odb_paths.items[i].string); + if (ret) { + string_list_clear(&added_submodule_odb_paths, 0); + if (git_env_bool("GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB", 0)) + BUG("register_all_submodule_odb_as_alternates() called"); + } + return ret; +} + void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path) { diff --git a/submodule.h b/submodule.h index 84640c49c1149d..c252784bc2fabe 100644 --- a/submodule.h +++ b/submodule.h @@ -97,7 +97,14 @@ int submodule_uses_gitfile(const char *path); #define SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED (1<<2) int bad_to_remove_submodule(const char *path, unsigned flags); +/* + * Call add_submodule_odb() to add the submodule at the given path to a list. + * When register_all_submodule_odb_as_alternates() is called, the object stores + * of all submodules in that list will be added as alternates in + * the_repository. + */ int add_submodule_odb(const char *path); +int register_all_submodule_odb_as_alternates(void); /* * Checks if there are submodule changes in a..b. If a is the null OID, diff --git a/t/README b/t/README index 9e701223020380..8b67b4f00b5a9e 100644 --- a/t/README +++ b/t/README @@ -448,6 +448,16 @@ GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting to <n> and 'checkout.thresholdForParallelism' to 0, forcing the execution of the parallel-checkout code. +GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=<boolean>, when true, makes +registering submodule ODBs as alternates a fatal action. Support for +this environment variable can be removed once the migration to +explicitly providing repositories when accessing submodule objects is +complete (in which case we might want to replace this with a trace2 +call so that users can make it visible if accessing submodule objects +without an explicit repository still happens) or needs to be abandoned +for whatever reason (in which case the migrated codepaths still retain +their performance benefits). + Naming Tests ------------ From e37cfff2abd87c3e8c038be8ff3b2a7402a3b03c Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:40 -0700 Subject: [PATCH 190/198] grep: use submodule-ODB-as-alternate lazy-addition In the parent commit, Git was taught to add submodule ODBs as alternates lazily, but grep does not use this because it computes the path to add directly, not going through add_submodule_odb(). Add an equivalent to add_submodule_odb() that takes the exact ODB path and teach grep to use it. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 2 +- submodule.c | 5 +++++ submodule.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index 7d2f8e5adb69c4..87bcb934a24c34 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -450,7 +450,7 @@ static int grep_submodule(struct grep_opt *opt, * store is no longer global and instead is a member of the repository * object. */ - add_to_alternates_memory(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo.objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); diff --git a/submodule.c b/submodule.c index 8fde90e9063b3a..8de1aecaeb0311 100644 --- a/submodule.c +++ b/submodule.c @@ -187,6 +187,11 @@ int add_submodule_odb(const char *path) return ret; } +void add_submodule_odb_by_path(const char *path) +{ + string_list_insert(&added_submodule_odb_paths, xstrdup(path)); +} + int register_all_submodule_odb_as_alternates(void) { int i; diff --git a/submodule.h b/submodule.h index c252784bc2fabe..17a06cc43bf3d9 100644 --- a/submodule.h +++ b/submodule.h @@ -104,6 +104,7 @@ int bad_to_remove_submodule(const char *path, unsigned flags); * the_repository. */ int add_submodule_odb(const char *path); +void add_submodule_odb_by_path(const char *path); int register_all_submodule_odb_as_alternates(void); /* From 30a95d5f1339978f31ed53ed8b847ffa4f011173 Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:41 -0700 Subject: [PATCH 191/198] grep: typesafe versions of grep_source_init grep_source_init() can create "struct grep_source" objects and, depending on the value of the type passed, some void-pointer parameters have different meanings. Because one of these types (GREP_SOURCE_OID) will require an additional parameter in a subsequent patch, take the opportunity to increase clarity and type safety by replacing this function with individual functions for each type. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 4 ++-- grep.c | 43 +++++++++++++++++++++++++++---------------- grep.h | 8 +++++--- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 87bcb934a24c34..e454335e9d8fee 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -333,7 +333,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid); strbuf_release(&pathbuf); if (num_threads > 1) { @@ -359,7 +359,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) struct grep_source gs; grep_source_name(opt, filename, 0, &buf); - grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename); + grep_source_init_file(&gs, buf.buf, filename); strbuf_release(&buf); if (num_threads > 1) { diff --git a/grep.c b/grep.c index 424a39591b05e5..ba3711dc567d9b 100644 --- a/grep.c +++ b/grep.c @@ -1830,7 +1830,7 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); + grep_source_init_buf(&gs); gs.buf = buf; gs.size = size; @@ -1840,28 +1840,39 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) return r; } -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier) +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path) { - gs->type = type; + gs->type = GREP_SOURCE_FILE; gs->name = xstrdup_or_null(name); gs->path = xstrdup_or_null(path); gs->buf = NULL; gs->size = 0; gs->driver = NULL; + gs->identifier = xstrdup(path); +} - switch (type) { - case GREP_SOURCE_FILE: - gs->identifier = xstrdup(identifier); - break; - case GREP_SOURCE_OID: - gs->identifier = oiddup(identifier); - break; - case GREP_SOURCE_BUF: - gs->identifier = NULL; - break; - } +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid) +{ + gs->type = GREP_SOURCE_OID; + gs->name = xstrdup_or_null(name); + gs->path = xstrdup_or_null(path); + gs->buf = NULL; + gs->size = 0; + gs->driver = NULL; + gs->identifier = oiddup(oid); +} + +void grep_source_init_buf(struct grep_source *gs) +{ + gs->type = GREP_SOURCE_BUF; + gs->name = NULL; + gs->path = NULL; + gs->buf = NULL; + gs->size = 0; + gs->driver = NULL; + gs->identifier = NULL; } void grep_source_clear(struct grep_source *gs) diff --git a/grep.h b/grep.h index 72f82b1e302397..f4a3090f1c8f29 100644 --- a/grep.h +++ b/grep.h @@ -195,9 +195,11 @@ struct grep_source { struct userdiff_driver *driver; }; -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier); +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path); +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid); +void grep_source_init_buf(struct grep_source *gs); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs, From f53d26f101d12f9ab6456f4af99ab830023b1f95 Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:42 -0700 Subject: [PATCH 192/198] grep: read submodule entry with explicit repo Replace an existing parse_object_or_die() call (which implicitly works on the_repository) with a function call that allows a repository to be passed in. There is no such direct equivalent to parse_object_or_die(), but we only need the type of the object, so replace with oid_object_info(). Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index e454335e9d8fee..9e61c7c9936c8a 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -457,27 +457,27 @@ static int grep_submodule(struct grep_opt *opt, subopt.repo = &subrepo; if (oid) { - struct object *object; + enum object_type object_type; struct tree_desc tree; void *data; unsigned long size; struct strbuf base = STRBUF_INIT; obj_read_lock(); - object = parse_object_or_die(oid, NULL); + object_type = oid_object_info(&subrepo, oid, NULL); obj_read_unlock(); data = read_object_with_reference(&subrepo, - &object->oid, tree_type, + oid, tree_type, &size, NULL); if (!data) - die(_("unable to read tree (%s)"), oid_to_hex(&object->oid)); + die(_("unable to read tree (%s)"), oid_to_hex(oid)); strbuf_addstr(&base, filename); strbuf_addch(&base, '/'); init_tree_desc(&tree, data, size); hit = grep_tree(&subopt, pathspec, &tree, &base, base.len, - object->type == OBJ_COMMIT); + object_type == OBJ_COMMIT); strbuf_release(&base); free(data); } else { From f601ca4f06670fb57fa097b78eb3bfcd2a58183b Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:43 -0700 Subject: [PATCH 193/198] grep: allocate subrepos on heap Currently, struct repository objects corresponding to submodules are allocated on the stack in grep_submodule(). This currently works because they will not be used once grep_submodule() exits, but a subsequent patch will require these structs to be accessible for longer (perhaps even in another thread). Allocate them on the heap and clear them only at the very end. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 9e61c7c9936c8a..5a40e18e477395 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -65,6 +65,9 @@ static int todo_done; /* Has all work items been added? */ static int all_work_added; +static struct repository **repos_to_free; +static size_t repos_to_free_nr, repos_to_free_alloc; + /* This lock protects all the variables above. */ static pthread_mutex_t grep_mutex; @@ -168,6 +171,17 @@ static void work_done(struct work_item *w) grep_unlock(); } +static void free_repos(void) +{ + int i; + + for (i = 0; i < repos_to_free_nr; i++) { + repo_clear(repos_to_free[i]); + free(repos_to_free[i]); + } + free(repos_to_free); +} + static void *run(void *arg) { int hit = 0; @@ -415,19 +429,24 @@ static int grep_submodule(struct grep_opt *opt, const struct object_id *oid, const char *filename, const char *path, int cached) { - struct repository subrepo; + struct repository *subrepo; struct repository *superproject = opt->repo; const struct submodule *sub; struct grep_opt subopt; - int hit; + int hit = 0; sub = submodule_from_path(superproject, null_oid(), path); if (!is_submodule_active(superproject, path)) return 0; - if (repo_submodule_init(&subrepo, superproject, sub)) + subrepo = xmalloc(sizeof(*subrepo)); + if (repo_submodule_init(subrepo, superproject, sub)) { + free(subrepo); return 0; + } + ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc); + repos_to_free[repos_to_free_nr++] = subrepo; /* * NEEDSWORK: repo_read_gitmodules() might call @@ -438,7 +457,7 @@ static int grep_submodule(struct grep_opt *opt, * subrepo's odbs to the in-memory alternates list. */ obj_read_lock(); - repo_read_gitmodules(&subrepo, 0); + repo_read_gitmodules(subrepo, 0); /* * NEEDSWORK: This adds the submodule's object directory to the list of @@ -450,11 +469,11 @@ static int grep_submodule(struct grep_opt *opt, * store is no longer global and instead is a member of the repository * object. */ - add_submodule_odb_by_path(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); - subopt.repo = &subrepo; + subopt.repo = subrepo; if (oid) { enum object_type object_type; @@ -464,9 +483,9 @@ static int grep_submodule(struct grep_opt *opt, struct strbuf base = STRBUF_INIT; obj_read_lock(); - object_type = oid_object_info(&subrepo, oid, NULL); + object_type = oid_object_info(subrepo, oid, NULL); obj_read_unlock(); - data = read_object_with_reference(&subrepo, + data = read_object_with_reference(subrepo, oid, tree_type, &size, NULL); if (!data) @@ -484,7 +503,6 @@ static int grep_submodule(struct grep_opt *opt, hit = grep_cache(&subopt, pathspec, cached); } - repo_clear(&subrepo); return hit; } @@ -1182,5 +1200,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) run_pager(&opt, prefix); clear_pathspec(&pathspec); free_grep_patterns(&opt); + free_repos(); return !hit; } From b9d5dec89df646df030aa6a95b3a70e034a0ba7a Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:44 -0700 Subject: [PATCH 194/198] grep: add repository to OID grep sources Record the repository whenever an OID grep source is created, and teach the worker threads to explicitly provide the repository when accessing objects. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 2 +- grep.c | 7 +++++-- grep.h | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 5a40e18e477395..ea6df6dca4ccd9 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -347,7 +347,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init_oid(&gs, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); strbuf_release(&pathbuf); if (num_threads > 1) { diff --git a/grep.c b/grep.c index ba3711dc567d9b..14c677b4ae1a77 100644 --- a/grep.c +++ b/grep.c @@ -1853,7 +1853,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name, } void grep_source_init_oid(struct grep_source *gs, const char *name, - const char *path, const struct object_id *oid) + const char *path, const struct object_id *oid, + struct repository *repo) { gs->type = GREP_SOURCE_OID; gs->name = xstrdup_or_null(name); @@ -1862,6 +1863,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name, gs->size = 0; gs->driver = NULL; gs->identifier = oiddup(oid); + gs->repo = repo; } void grep_source_init_buf(struct grep_source *gs) @@ -1901,7 +1903,8 @@ static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; - gs->buf = read_object_file(gs->identifier, &type, &gs->size); + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, + &gs->size); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, diff --git a/grep.h b/grep.h index f4a3090f1c8f29..c5234f9b38fd08 100644 --- a/grep.h +++ b/grep.h @@ -187,6 +187,7 @@ struct grep_source { GREP_SOURCE_BUF, } type; void *identifier; + struct repository *repo; /* if GREP_SOURCE_OID */ char *buf; unsigned long size; @@ -198,7 +199,8 @@ struct grep_source { void grep_source_init_file(struct grep_source *gs, const char *name, const char *path); void grep_source_init_oid(struct grep_source *gs, const char *name, - const char *path, const struct object_id *oid); + const char *path, const struct object_id *oid, + struct repository *repo); void grep_source_init_buf(struct grep_source *gs); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); From 8ac00fb3ce03bd8066bd5ae8257e8456355f8976 Mon Sep 17 00:00:00 2001 From: Jonathan Tan <jonathantanmy@google.com> Date: Tue, 10 Aug 2021 11:28:45 -0700 Subject: [PATCH 195/198] t7814: show lack of alternate ODB-adding The previous patches have made "git grep" no longer need to add submodule ODBs as alternates, at least for the code paths tested in t7814. Demonstrate this by making adding a submodule ODB as an alternate fatal in this test. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7814-grep-recurse-submodules.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index 828cb3ba5818fd..3172f5b936489e 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -8,6 +8,9 @@ submodules. . ./test-lib.sh +GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=1 +export GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB + test_expect_success 'setup directory structure and submodule' ' echo "(1|2)d(3|4)" >a && mkdir b && From 96a547145144849918c64f7144ae090ec595dbcf Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Tue, 10 Aug 2021 15:12:01 -0700 Subject: [PATCH 196/198] userdiff: comment on the builtin patterns Remind developers that they do not need to go overboard to implement patterns to prepare for invalid constructs. They only have to be sufficiently permissive, assuming that the payload is syntactically correct. Text stolen mostly from Johannes Sixt. Signed-off-by: Junio C Hamano <gitster@pobox.com> --- userdiff.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/userdiff.c b/userdiff.c index 3c3bbe38b048db..29adef7d6dce3e 100644 --- a/userdiff.c +++ b/userdiff.c @@ -13,6 +13,16 @@ static int drivers_alloc; #define IPATTERN(name, pattern, word_regex) \ { name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, \ word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" } + +/* + * Built-in drivers for various languages, sorted by their names + * (except that the "default" is left at the end). + * + * When writing or updating patterns, assume that the contents these + * patterns are applied to are syntactically correct. You do not have + * to implement all syntactical corner cases---the patterns have to be + * sufficiently permissive. + */ static struct userdiff_driver builtin_drivers[] = { IPATTERN("ada", "!^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n" From d393e45f8cccb270eef82e34cc744c1428f77998 Mon Sep 17 00:00:00 2001 From: Josh Steadmon <steadmon@google.com> Date: Tue, 10 Aug 2021 12:20:38 -0700 Subject: [PATCH 197/198] sequencer: advise if skipping cherry-picked commit Silently skipping commits when rebasing with --no-reapply-cherry-picks (currently the default behavior) can cause user confusion. Issue advice in this case so that users are aware of what's happening. Signed-off-by: Josh Steadmon <steadmon@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/advice.txt | 3 +++ advice.c | 3 +++ advice.h | 1 + sequencer.c | 22 ++++++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index 8b2849ff7b3f5c..063eec2511d37e 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -44,6 +44,9 @@ advice.*:: Shown when linkgit:git-push[1] rejects a forced update of a branch when its remote-tracking ref has updates that we do not have locally. + skippedCherryPicks:: + Shown when linkgit:git-rebase[1] skips a commit that has already + been cherry-picked onto the upstream branch. statusAheadBehind:: Shown when linkgit:git-status[1] computes the ahead/behind counts for a local ref compared to its remote tracking ref, diff --git a/advice.c b/advice.c index 0b9c89c48ab996..629e594252eb57 100644 --- a/advice.c +++ b/advice.c @@ -34,6 +34,7 @@ int advice_checkout_ambiguous_remote_branch_name = 1; int advice_submodule_alternate_error_strategy_die = 1; int advice_add_ignored_file = 1; int advice_add_empty_pathspec = 1; +int advice_skipped_cherry_picks = 1; static int advice_use_color = -1; static char advice_colors[][COLOR_MAXLEN] = { @@ -96,6 +97,7 @@ static struct { { "submoduleAlternateErrorStrategyDie", &advice_submodule_alternate_error_strategy_die }, { "addIgnoredFile", &advice_add_ignored_file }, { "addEmptyPathspec", &advice_add_empty_pathspec }, + { "skippedCherryPicks", &advice_skipped_cherry_picks }, /* make this an alias for backward compatibility */ { "pushNonFastForward", &advice_push_update_rejected } @@ -139,6 +141,7 @@ static struct { [ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE] = { "submoduleAlternateErrorStrategyDie", 1 }, [ADVICE_UPDATE_SPARSE_PATH] = { "updateSparsePath", 1 }, [ADVICE_WAITING_FOR_EDITOR] = { "waitingForEditor", 1 }, + [ADVICE_SKIPPED_CHERRY_PICKS] = { "skippedCherryPicks", 1 }, }; static const char turn_off_instructions[] = diff --git a/advice.h b/advice.h index 9f8ffc73546b39..d705bf164c36ca 100644 --- a/advice.h +++ b/advice.h @@ -75,6 +75,7 @@ extern int advice_add_empty_pathspec; ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE, ADVICE_UPDATE_SPARSE_PATH, ADVICE_WAITING_FOR_EDITOR, + ADVICE_SKIPPED_CHERRY_PICKS, }; int git_default_advice_config(const char *var, const char *value); diff --git a/sequencer.c b/sequencer.c index 7f07cd00f3f20a..1235f61c9d19dd 100644 --- a/sequencer.c +++ b/sequencer.c @@ -5099,6 +5099,7 @@ static int make_script_with_merges(struct pretty_print_context *pp, int keep_empty = flags & TODO_LIST_KEEP_EMPTY; int rebase_cousins = flags & TODO_LIST_REBASE_COUSINS; int root_with_onto = flags & TODO_LIST_ROOT_WITH_ONTO; + int skipped_commit = 0; struct strbuf buf = STRBUF_INIT, oneline = STRBUF_INIT; struct strbuf label = STRBUF_INIT; struct commit_list *commits = NULL, **tail = &commits, *iter; @@ -5149,8 +5150,13 @@ static int make_script_with_merges(struct pretty_print_context *pp, oidset_insert(&interesting, &commit->object.oid); is_empty = is_original_commit_empty(commit); - if (!is_empty && (commit->object.flags & PATCHSAME)) + if (!is_empty && (commit->object.flags & PATCHSAME)) { + advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS, + _("skipped previously applied commit %s"), + short_commit_name(commit)); + skipped_commit = 1; continue; + } if (is_empty && !keep_empty) continue; @@ -5214,6 +5220,9 @@ static int make_script_with_merges(struct pretty_print_context *pp, oidcpy(&entry->entry.oid, &commit->object.oid); oidmap_put(&commit2todo, entry); } + if (skipped_commit) + advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS, + _("use --reapply-cherry-picks to include skipped commits")); /* * Second phase: @@ -5334,6 +5343,7 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc, const char *insn = flags & TODO_LIST_ABBREVIATE_CMDS ? "p" : "pick"; int rebase_merges = flags & TODO_LIST_REBASE_MERGES; int reapply_cherry_picks = flags & TODO_LIST_REAPPLY_CHERRY_PICKS; + int skipped_commit = 0; repo_init_revisions(r, &revs, NULL); revs.verbose_header = 1; @@ -5369,8 +5379,13 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc, while ((commit = get_revision(&revs))) { int is_empty = is_original_commit_empty(commit); - if (!is_empty && (commit->object.flags & PATCHSAME)) + if (!is_empty && (commit->object.flags & PATCHSAME)) { + advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS, + _("skipped previously applied commit %s"), + short_commit_name(commit)); + skipped_commit = 1; continue; + } if (is_empty && !keep_empty) continue; strbuf_addf(out, "%s %s ", insn, @@ -5380,6 +5395,9 @@ int sequencer_make_script(struct repository *r, struct strbuf *out, int argc, strbuf_addf(out, " %c empty", comment_line_char); strbuf_addch(out, '\n'); } + if (skipped_commit) + advise_if_enabled(ADVICE_SKIPPED_CHERRY_PICKS, + _("use --reapply-cherry-picks to include skipped commits")); return 0; } From 193625e6b26e0c4947e2165cac12e13d3d8c2fe8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Tue, 10 Aug 2021 15:42:57 -0700 Subject: [PATCH 198/198] SQUASH??? --- advice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advice.c b/advice.c index 629e594252eb57..efc92d33af74d8 100644 --- a/advice.c +++ b/advice.c @@ -34,7 +34,7 @@ int advice_checkout_ambiguous_remote_branch_name = 1; int advice_submodule_alternate_error_strategy_die = 1; int advice_add_ignored_file = 1; int advice_add_empty_pathspec = 1; -int advice_skipped_cherry_picks = 1; +static int advice_skipped_cherry_picks = 1; static int advice_use_color = -1; static char advice_colors[][COLOR_MAXLEN] = {