From bf032c279c98858d5cb9a7d3c61f5d10b885bf0e Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:14:49 +0000 Subject: [PATCH 1/6] Test invocation needed for repeat publish --- tests/test_publish_directory.py | 79 +++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py index 5909b18..4f9f005 100644 --- a/tests/test_publish_directory.py +++ b/tests/test_publish_directory.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import json +import shutil from pathlib import Path, PurePath from unittest.mock import MagicMock, patch @@ -71,7 +72,8 @@ def test_npg_publish_tree_compatibility_ultima( monkeypatch: MonkeyPatch, ): # Arrange - src = Path("./tests/data/ultima/minimal").absolute() + src = tmp_path / "minimal" + shutil.copytree("./tests/data/ultima/minimal", src) # empty_collection stands in for $ZONE/ultimagen/runs # SOP: Destination collection doesn't exist dest = ( @@ -85,39 +87,53 @@ def test_npg_publish_tree_compatibility_ultima( # Public root_metadata = tmp_path / "root_metadata.json" root_metadata.write_text(json.dumps([{"attribute": "a1", "value": "v1"}])) - self._main( - [ - str(src), - str(dest), - "--group", - "public", - "--exclude", - f"{src}/000001-", - "--exclude", - ".md5", - "--metadata-file", - str(root_metadata), - ] - ) + root_args = [ + str(src), + str(dest), + "--group", + "public", + "--exclude", + f"{src}/000001-", + "--exclude", + ".md5", + "--metadata-file", + str(root_metadata), + "--force" + ] + self._main(root_args) # Study sample_metadata = tmp_path / "sample_metadata.json" sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}])) - self._main( - [ - str(src / "000001-a"), - str(dest / "000001-a"), - "--group", - "ss_1000#testZone", - "--exclude", - ".md5", - "--metadata-file", - str(sample_metadata), - ] - ) + sample_dir_args = [ + str(src / "000001-a"), + str(dest / "000001-a"), + "--group", + "ss_1000#testZone", + "--exclude", + ".md5", + "--metadata-file", + str(sample_metadata), + "--force" + ] + self._main(sample_dir_args) # Private - self._main([str(src / "000001-d"), str(dest / "000001-d")]) + private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"] + self._main(private_dir_args) + + # Repeated publish + self._main(root_args) + self._main(sample_dir_args) + self._main(private_dir_args) + + # Repeated publish: New file + (src / "c.txt").write_text("new") + self._main(root_args) + + # Repeated publish: Modified file + (src / "c.txt").write_text("modified") + self._main(root_args) # Assert assert is_inheritance_enabled( @@ -142,9 +158,11 @@ def test_npg_publish_tree_compatibility_ultima( Collection(dest / "000001-d"), DataObject(dest / "000001_a.txt"), DataObject(dest / "b.txt"), + DataObject(dest / "c.txt"), DataObject(dest / "000001-a" / "000002-c.txt"), DataObject(dest / "000001-d" / "000001-d.txt"), ] + assert DataObject(dest / "c.txt").read() == "modified" assert DataObject(dest / "000001_a.txt").acl() == [ ADMIN_AC, @@ -156,6 +174,11 @@ def test_npg_publish_tree_compatibility_ultima( PUBLIC_AC, UNMANAGED_AC, ] + assert DataObject(dest / "c.txt").acl() == [ + ADMIN_AC, + PUBLIC_AC, + UNMANAGED_AC, + ] assert is_inheritance_enabled(dest / "000001-a") assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC] From 4766a0741509c4d1b20778829321db6485bd6295 Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:47:54 +0000 Subject: [PATCH 2/6] Align docs with code --- src/npg_irods/publish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/npg_irods/publish.py b/src/npg_irods/publish.py index 688a1d1..ef7d8fd 100644 --- a/src/npg_irods/publish.py +++ b/src/npg_irods/publish.py @@ -67,7 +67,7 @@ def publish_directory( exists, the operation is skipped for that path. See DataObject.put() for more information. The default is False. force: If a data object is written, overwrite any data object already present - in iRODS. The default is True. + in iRODS. The default is False. handle_exceptions: Report a count of any errors encountered during publishing, rather than raising an exception. The default is True. If False and one or more errors are encountered, a PublishingError is raised from the first From 21b0439d626f7fb62fc9137895825de1973bef44 Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:05:05 +0000 Subject: [PATCH 3/6] Test repeat publish edge cases - changed/new files, changed/new/deleted metadata, changed group --- .../data/ultima/minimal/000001-a/000002-c.txt | 1 + .../ultima/minimal/000001-a/000002-c.txt.md5 | 1 + .../data/ultima/minimal/000001-d/000001-d.txt | 1 + tests/data/ultima/minimal/000001_a.txt | 1 + tests/data/ultima/minimal/000001_a.txt.md5 | 1 + tests/data/ultima/minimal/b.txt | 1 + tests/data/ultima/minimal/b.txt.md5 | 1 + tests/helpers/__init__.py | 1 + tests/test_publish_directory.py | 103 ++++++++++++++++-- 9 files changed, 102 insertions(+), 9 deletions(-) diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt b/tests/data/ultima/minimal/000001-a/000002-c.txt index e69de29..b66da45 100644 --- a/tests/data/ultima/minimal/000001-a/000002-c.txt +++ b/tests/data/ultima/minimal/000001-a/000002-c.txt @@ -0,0 +1 @@ +ee305ec7-dd05-43be-b266-f7bf5e21800c \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 index e69de29..2ea3cf7 100644 --- a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 +++ b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 @@ -0,0 +1 @@ +ac06fd24fc0edc84761c799c975d73c3 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001-d/000001-d.txt b/tests/data/ultima/minimal/000001-d/000001-d.txt index e69de29..0f63a86 100644 --- a/tests/data/ultima/minimal/000001-d/000001-d.txt +++ b/tests/data/ultima/minimal/000001-d/000001-d.txt @@ -0,0 +1 @@ +c18dee80-d2d4-44dc-8b8f-68161862fe12 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001_a.txt b/tests/data/ultima/minimal/000001_a.txt index e69de29..b8807e2 100644 --- a/tests/data/ultima/minimal/000001_a.txt +++ b/tests/data/ultima/minimal/000001_a.txt @@ -0,0 +1 @@ +615b338a-2e3f-4e41-861c-c8b03e11cf19 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001_a.txt.md5 b/tests/data/ultima/minimal/000001_a.txt.md5 index e69de29..a14e8da 100644 --- a/tests/data/ultima/minimal/000001_a.txt.md5 +++ b/tests/data/ultima/minimal/000001_a.txt.md5 @@ -0,0 +1 @@ +6e785a5236e0b5480025469d312b2aeb \ No newline at end of file diff --git a/tests/data/ultima/minimal/b.txt b/tests/data/ultima/minimal/b.txt index e69de29..2a2bff7 100644 --- a/tests/data/ultima/minimal/b.txt +++ b/tests/data/ultima/minimal/b.txt @@ -0,0 +1 @@ +0879abcc-5755-4620-9b43-f52f0c5f69ea \ No newline at end of file diff --git a/tests/data/ultima/minimal/b.txt.md5 b/tests/data/ultima/minimal/b.txt.md5 index e69de29..51b8b90 100644 --- a/tests/data/ultima/minimal/b.txt.md5 +++ b/tests/data/ultima/minimal/b.txt.md5 @@ -0,0 +1 @@ +2b1c6c7095b550e86230c4996d1595e4 \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index b83b363..8f2d015 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -70,6 +70,7 @@ ADMIN_AC = AC("irods", Permission.OWN, ZONE) PUBLIC_AC = AC("public", Permission.READ, ZONE) STUDY_AC = AC("ss_1000", Permission.READ, ZONE) +STUDY2_AC = AC("ss_2000", Permission.READ, ZONE) UNMANAGED_AC = AC("unmanaged", Permission.READ, ZONE) """e.g. dnap-ro""" diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py index 4f9f005..29a521b 100644 --- a/tests/test_publish_directory.py +++ b/tests/test_publish_directory.py @@ -16,10 +16,17 @@ # along with this program. If not, see . import json import shutil -from pathlib import Path, PurePath +from pathlib import PurePath from unittest.mock import MagicMock, patch -from helpers import is_inheritance_enabled, ADMIN_AC, PUBLIC_AC, STUDY_AC, UNMANAGED_AC +from helpers import ( + is_inheritance_enabled, + ADMIN_AC, + PUBLIC_AC, + UNMANAGED_AC, + STUDY2_AC, + history_in_meta, +) from npg_irods.cli import publish_directory from pytest import LogCaptureFixture, MonkeyPatch from pytest import mark as m @@ -90,6 +97,7 @@ def test_npg_publish_tree_compatibility_ultima( root_args = [ str(src), str(dest), + "--force", "--group", "public", "--exclude", @@ -98,23 +106,26 @@ def test_npg_publish_tree_compatibility_ultima( ".md5", "--metadata-file", str(root_metadata), - "--force" ] self._main(root_args) # Study sample_metadata = tmp_path / "sample_metadata.json" - sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}])) + sample_metadata.write_text( + json.dumps( + [{"attribute": "a2", "value": "v2"}, {"attribute": "a3", "value": "v3"}] + ) + ) sample_dir_args = [ str(src / "000001-a"), str(dest / "000001-a"), + "--force", "--group", "ss_1000#testZone", "--exclude", ".md5", "--metadata-file", str(sample_metadata), - "--force" ] self._main(sample_dir_args) @@ -122,7 +133,7 @@ def test_npg_publish_tree_compatibility_ultima( private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"] self._main(private_dir_args) - # Repeated publish + # Repeated publish: No changes self._main(root_args) self._main(sample_dir_args) self._main(private_dir_args) @@ -130,10 +141,46 @@ def test_npg_publish_tree_compatibility_ultima( # Repeated publish: New file (src / "c.txt").write_text("new") self._main(root_args) + created_values = [ + x + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "dcterms:created" + ] + assert len(created_values) == 1 + c_created = created_values[0] # Repeated publish: Modified file (src / "c.txt").write_text("modified") self._main(root_args) + created_values = [ + x + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "dcterms:created" + ] + assert len(created_values) == 1 + c_created_updated = created_values[0] + + # Repeated publish: Different group, different metadata, no file changes + sample_metadata_updated = tmp_path / "sample_metadata_updated.json" + # a2: Deleted + # a3: Updated + # a4: Added + sample_metadata_updated.write_text( + json.dumps( + [ + {"attribute": "a3", "value": "v3_updated"}, + {"attribute": "a4", "value": "v4"}, + ] + ) + ) + sample_dir_updated_args = sample_dir_args.copy() + sample_dir_updated_args[sample_dir_updated_args.index("ss_1000#testZone")] = ( + "ss_2000#testZone" + ) + sample_dir_updated_args[sample_dir_updated_args.index(str(sample_metadata))] = ( + str(sample_metadata_updated) + ) + self._main(sample_dir_updated_args) # Assert assert is_inheritance_enabled( @@ -162,6 +209,21 @@ def test_npg_publish_tree_compatibility_ultima( DataObject(dest / "000001-a" / "000002-c.txt"), DataObject(dest / "000001-d" / "000001-d.txt"), ] + assert [x.attribute for x in DataObject(dest / "b.txt").metadata()] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "type", + ] + assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "md5", + "type", + ] + assert c_created_updated == c_created + assert DataObject(dest / "c.txt").read() == "modified" assert DataObject(dest / "000001_a.txt").acl() == [ @@ -181,13 +243,36 @@ def test_npg_publish_tree_compatibility_ultima( ] assert is_inheritance_enabled(dest / "000001-a") - assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC] - assert Collection(dest / "000001-a").metadata() == [AVU("a2", "v2")] + assert Collection(dest / "000001-a").acl() == [ + ADMIN_AC, + STUDY2_AC, + UNMANAGED_AC, + ] + assert [ + x + for x in Collection(dest / "000001-a").metadata() + if x.attribute != "a3_history" + ] == [ + AVU("a2", "v2"), # Earlier metadata preserved + AVU("a3", "v3_updated"), + AVU("a4", "v4"), + ] + assert history_in_meta( + AVU.history(AVU("a3", "v3")), Collection(dest / "000001-a").metadata() + ) + assert len(Collection(dest / "000001-a").metadata()) == 4 + assert DataObject(dest / "000001-a" / "000002-c.txt").acl() == [ ADMIN_AC, - STUDY_AC, + STUDY2_AC, UNMANAGED_AC, ] + assert [x.attribute for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "type", + ] assert is_inheritance_enabled(dest / "000001-d") assert Collection(dest / "000001-d").acl() == [ From a3d76e7d4b56c88740dc968a56d68da067ecc3e8 Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:14:04 +0000 Subject: [PATCH 4/6] Format --- tests/test_publish_directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py index 29a521b..6023898 100644 --- a/tests/test_publish_directory.py +++ b/tests/test_publish_directory.py @@ -267,7 +267,10 @@ def test_npg_publish_tree_compatibility_ultima( STUDY2_AC, UNMANAGED_AC, ] - assert [x.attribute for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()] == [ + assert [ + x.attribute + for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata() + ] == [ "dcterms:created", "dcterms:creator", "md5", From ac2f77da7f5683827bcacadae324d5af6ad699e4 Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Wed, 4 Feb 2026 09:59:10 +0000 Subject: [PATCH 5/6] Code review --- tests/helpers/__init__.py | 8 ++++++-- tests/test_publish_directory.py | 28 +++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 8f2d015..4abd587 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -18,11 +18,11 @@ # @author Keith James """Helper functions for testing.""" - +import hashlib import os import subprocess from datetime import datetime -from pathlib import PurePath +from pathlib import PurePath, Path from os import PathLike import pytest @@ -242,3 +242,7 @@ def _run(cmd: list[str]): return raise RodsError(completed.stderr.decode("utf-8").strip()) + + +def get_md5(path: Path): + return hashlib.md5(path.read_bytes()).hexdigest() diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py index 6023898..422c58e 100644 --- a/tests/test_publish_directory.py +++ b/tests/test_publish_directory.py @@ -26,6 +26,7 @@ UNMANAGED_AC, STUDY2_AC, history_in_meta, + get_md5, ) from npg_irods.cli import publish_directory from pytest import LogCaptureFixture, MonkeyPatch @@ -140,6 +141,7 @@ def test_npg_publish_tree_compatibility_ultima( # Repeated publish: New file (src / "c.txt").write_text("new") + c_txt_original_md5 = get_md5(src / "c.txt") self._main(root_args) created_values = [ x @@ -147,10 +149,11 @@ def test_npg_publish_tree_compatibility_ultima( if x.attribute == "dcterms:created" ] assert len(created_values) == 1 - c_created = created_values[0] + c_txt_original_dcterms_created = created_values[0] # Repeated publish: Modified file (src / "c.txt").write_text("modified") + c_txt_modified_md5 = get_md5(src / "c.txt") self._main(root_args) created_values = [ x @@ -158,7 +161,10 @@ def test_npg_publish_tree_compatibility_ultima( if x.attribute == "dcterms:created" ] assert len(created_values) == 1 - c_created_updated = created_values[0] + c_txt_modified_dcterms_created = created_values[0] + assert ( + c_txt_modified_dcterms_created == c_txt_original_dcterms_created + ), "Original file creation date should not change when we update contents of the file" # Repeated publish: Different group, different metadata, no file changes sample_metadata_updated = tmp_path / "sample_metadata_updated.json" @@ -215,6 +221,7 @@ def test_npg_publish_tree_compatibility_ultima( "md5", "type", ] + assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [ "dcterms:created", "dcterms:creator", @@ -222,9 +229,20 @@ def test_npg_publish_tree_compatibility_ultima( "md5", "type", ] - assert c_created_updated == c_created - - assert DataObject(dest / "c.txt").read() == "modified" + assert [ + x.value + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "md5" + ] == [ + c_txt_original_md5, + c_txt_modified_md5, + ], "New md5 attribute should be added, previous preserved to provide history" + assert ( + DataObject(dest / "c.txt").read() == "modified" + ), "Contents of data object in iRODS should have updated" + assert ( + DataObject(dest / "c.txt").checksum() == c_txt_modified_md5 + ), "Checksum should have updated" assert DataObject(dest / "000001_a.txt").acl() == [ ADMIN_AC, From 5d2c74038acff3943608367ca0a0925cc9fc8696 Mon Sep 17 00:00:00 2001 From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com> Date: Wed, 4 Feb 2026 10:23:57 +0000 Subject: [PATCH 6/6] Updated black --- tests/helpers/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 4abd587..db5df15 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -18,6 +18,7 @@ # @author Keith James """Helper functions for testing.""" + import hashlib import os import subprocess