diff --git a/changelog/68227.fixed.md b/changelog/68227.fixed.md new file mode 100644 index 00000000000..8a93f6afc7f --- /dev/null +++ b/changelog/68227.fixed.md @@ -0,0 +1 @@ +Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. diff --git a/salt/modules/archive.py b/salt/modules/archive.py index e4dfa5a82b2..dda47329af6 100644 --- a/salt/modules/archive.py +++ b/salt/modules/archive.py @@ -423,8 +423,13 @@ def _unsupported_format(archive_format): } top_level_dirs = [x for x in ret["dirs"] if x.count("/") == 1] # the common_prefix logic handles scenarios where the TLD - # isn't listed as an archive member on its own - common_prefix = os.path.commonprefix(ret["dirs"]) + # isn't listed as an archive member on its own. Consider files + # and links in addition to dirs so that archives which contain + # only file members (e.g. Oracle's GraalVM JDK tarballs) still + # surface their shared top-level directory. + common_prefix = os.path.commonprefix( + ret["dirs"] + ret["files"] + ret["links"] + ) if "/" in common_prefix: common_prefix = common_prefix.split("/")[0] + "/" if common_prefix not in top_level_dirs: diff --git a/tests/pytests/functional/modules/test_archive.py b/tests/pytests/functional/modules/test_archive.py index 1ec0a4e092f..81f7383420c 100644 --- a/tests/pytests/functional/modules/test_archive.py +++ b/tests/pytests/functional/modules/test_archive.py @@ -393,3 +393,41 @@ def test_tar_list_with_similar_top_level_dirs(archive, tmp_path): str(tmp_path / "tld_test.tar.gz"), archive_format="tar", verbose=True ) assert ret == expected + + +@pytest.mark.skip_on_windows +def test_tar_list_no_directory_members_only_files_68227(archive, tmp_path): + """ + Regression test for issue 68227. + + Some tar archives (e.g. Oracle's GraalVM JDK distribution) contain only + file members and no explicit directory members, even though all files + share a common top-level directory prefix. ``archive.list`` should still + report that common prefix as the top-level directory so that + ``archive.extracted`` can enforce user/group ownership on it. + """ + import io + import tarfile + + archive_path = tmp_path / "no_dir_members.tar.gz" + # Build the archive directly with tarfile so we can guarantee that no + # directory members are written. ``tar -cvzf`` always emits an entry for + # the containing directory. + with tarfile.open(str(archive_path), "w:gz") as tar: + for rel in ("LICENSE.txt", "GRAALVM-README.md", "bin/java"): + data = b"x" + info = tarfile.TarInfo(name="graalvm-jdk-21.0.8+12.1/" + rel) + info.size = len(data) + tar.addfile(info, fileobj=io.BytesIO(data)) + + # Sanity-check the archive only has file members + with tarfile.open(str(archive_path)) as tar: + assert not any(m.isdir() for m in tar.getmembers()) + + ret = archive.list(str(archive_path), archive_format="tar", verbose=True) + assert ret["dirs"] == [] + assert "graalvm-jdk-21.0.8+12.1/" in ret["top_level_dirs"], ( + "Expected the common file prefix to be reported as a top-level " + "directory so that archive.extracted can enforce ownership on it; " + f"got top_level_dirs={ret['top_level_dirs']!r}" + )