From 61bec0c32f2e6507adf8ec72220387671fffc5c7 Mon Sep 17 00:00:00 2001 From: "Daniel A. Wozniak" Date: Thu, 11 Jun 2026 17:23:39 -0700 Subject: [PATCH] Fix archive.extracted ownership on tar archives with no directory members PR #61896 added a common-prefix fallback to archive.list so that archive.extracted could enforce user/group ownership on top-level directories that aren't listed as their own member in the tarball. That fallback only consulted ret["dirs"], so archives whose members are all files - such as Oracle's GraalVM JDK tarballs, which contain ``graalvm-jdk-21.0.8+12.1/LICENSE.txt`` and friends but no entry for the ``graalvm-jdk-21.0.8+12.1/`` directory itself - still returned ``top_level_dirs=[]`` and silently skipped the ownership enforcement loop in salt/states/archive.py. Include files and links in the common-prefix computation so the top-level directory is discovered regardless of which member types the archive happens to contain. Fixes #68227 --- changelog/68227.fixed.md | 1 + salt/modules/archive.py | 9 ++++- .../functional/modules/test_archive.py | 38 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 changelog/68227.fixed.md diff --git a/changelog/68227.fixed.md b/changelog/68227.fixed.md new file mode 100644 index 000000000000..8a93f6afc7f6 --- /dev/null +++ b/changelog/68227.fixed.md @@ -0,0 +1 @@ +Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. diff --git a/salt/modules/archive.py b/salt/modules/archive.py index e4dfa5a82b2e..dda47329af68 100644 --- a/salt/modules/archive.py +++ b/salt/modules/archive.py @@ -423,8 +423,13 @@ def _unsupported_format(archive_format): } top_level_dirs = [x for x in ret["dirs"] if x.count("/") == 1] # the common_prefix logic handles scenarios where the TLD - # isn't listed as an archive member on its own - common_prefix = os.path.commonprefix(ret["dirs"]) + # isn't listed as an archive member on its own. Consider files + # and links in addition to dirs so that archives which contain + # only file members (e.g. Oracle's GraalVM JDK tarballs) still + # surface their shared top-level directory. + common_prefix = os.path.commonprefix( + ret["dirs"] + ret["files"] + ret["links"] + ) if "/" in common_prefix: common_prefix = common_prefix.split("/")[0] + "/" if common_prefix not in top_level_dirs: diff --git a/tests/pytests/functional/modules/test_archive.py b/tests/pytests/functional/modules/test_archive.py index 1ec0a4e092fb..81f7383420c1 100644 --- a/tests/pytests/functional/modules/test_archive.py +++ b/tests/pytests/functional/modules/test_archive.py @@ -393,3 +393,41 @@ def test_tar_list_with_similar_top_level_dirs(archive, tmp_path): str(tmp_path / "tld_test.tar.gz"), archive_format="tar", verbose=True ) assert ret == expected + + +@pytest.mark.skip_on_windows +def test_tar_list_no_directory_members_only_files_68227(archive, tmp_path): + """ + Regression test for issue 68227. + + Some tar archives (e.g. Oracle's GraalVM JDK distribution) contain only + file members and no explicit directory members, even though all files + share a common top-level directory prefix. ``archive.list`` should still + report that common prefix as the top-level directory so that + ``archive.extracted`` can enforce user/group ownership on it. + """ + import io + import tarfile + + archive_path = tmp_path / "no_dir_members.tar.gz" + # Build the archive directly with tarfile so we can guarantee that no + # directory members are written. ``tar -cvzf`` always emits an entry for + # the containing directory. + with tarfile.open(str(archive_path), "w:gz") as tar: + for rel in ("LICENSE.txt", "GRAALVM-README.md", "bin/java"): + data = b"x" + info = tarfile.TarInfo(name="graalvm-jdk-21.0.8+12.1/" + rel) + info.size = len(data) + tar.addfile(info, fileobj=io.BytesIO(data)) + + # Sanity-check the archive only has file members + with tarfile.open(str(archive_path)) as tar: + assert not any(m.isdir() for m in tar.getmembers()) + + ret = archive.list(str(archive_path), archive_format="tar", verbose=True) + assert ret["dirs"] == [] + assert "graalvm-jdk-21.0.8+12.1/" in ret["top_level_dirs"], ( + "Expected the common file prefix to be reported as a top-level " + "directory so that archive.extracted can enforce ownership on it; " + f"got top_level_dirs={ret['top_level_dirs']!r}" + )