diff --git a/src/npg_irods/publish.py b/src/npg_irods/publish.py index 688a1d1d..ef7d8fd1 100644 --- a/src/npg_irods/publish.py +++ b/src/npg_irods/publish.py @@ -67,7 +67,7 @@ def publish_directory( exists, the operation is skipped for that path. See DataObject.put() for more information. The default is False. force: If a data object is written, overwrite any data object already present - in iRODS. The default is True. + in iRODS. The default is False. handle_exceptions: Report a count of any errors encountered during publishing, rather than raising an exception. The default is True. If False and one or more errors are encountered, a PublishingError is raised from the first diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt b/tests/data/ultima/minimal/000001-a/000002-c.txt index e69de29b..b66da45c 100644 --- a/tests/data/ultima/minimal/000001-a/000002-c.txt +++ b/tests/data/ultima/minimal/000001-a/000002-c.txt @@ -0,0 +1 @@ +ee305ec7-dd05-43be-b266-f7bf5e21800c \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 index e69de29b..2ea3cf76 100644 --- a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 +++ b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 @@ -0,0 +1 @@ +ac06fd24fc0edc84761c799c975d73c3 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001-d/000001-d.txt b/tests/data/ultima/minimal/000001-d/000001-d.txt index e69de29b..0f63a868 100644 --- a/tests/data/ultima/minimal/000001-d/000001-d.txt +++ b/tests/data/ultima/minimal/000001-d/000001-d.txt @@ -0,0 +1 @@ +c18dee80-d2d4-44dc-8b8f-68161862fe12 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001_a.txt b/tests/data/ultima/minimal/000001_a.txt index e69de29b..b8807e21 100644 --- a/tests/data/ultima/minimal/000001_a.txt +++ b/tests/data/ultima/minimal/000001_a.txt @@ -0,0 +1 @@ +615b338a-2e3f-4e41-861c-c8b03e11cf19 \ No newline at end of file diff --git a/tests/data/ultima/minimal/000001_a.txt.md5 b/tests/data/ultima/minimal/000001_a.txt.md5 index e69de29b..a14e8da4 100644 --- a/tests/data/ultima/minimal/000001_a.txt.md5 +++ b/tests/data/ultima/minimal/000001_a.txt.md5 @@ -0,0 +1 @@ +6e785a5236e0b5480025469d312b2aeb \ No newline at end of file diff --git a/tests/data/ultima/minimal/b.txt b/tests/data/ultima/minimal/b.txt index e69de29b..2a2bff7e 100644 --- a/tests/data/ultima/minimal/b.txt +++ b/tests/data/ultima/minimal/b.txt @@ -0,0 +1 @@ +0879abcc-5755-4620-9b43-f52f0c5f69ea \ No newline at end of file diff --git a/tests/data/ultima/minimal/b.txt.md5 b/tests/data/ultima/minimal/b.txt.md5 index e69de29b..51b8b90e 100644 --- a/tests/data/ultima/minimal/b.txt.md5 +++ b/tests/data/ultima/minimal/b.txt.md5 @@ -0,0 +1 @@ +2b1c6c7095b550e86230c4996d1595e4 \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index b83b3631..db5df15e 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -19,10 +19,11 @@ """Helper functions for testing.""" +import hashlib import os import subprocess from datetime import datetime -from pathlib import PurePath +from pathlib import PurePath, Path from os import PathLike import pytest @@ -70,6 +71,7 @@ ADMIN_AC = AC("irods", Permission.OWN, ZONE) PUBLIC_AC = AC("public", Permission.READ, ZONE) STUDY_AC = AC("ss_1000", Permission.READ, ZONE) +STUDY2_AC = AC("ss_2000", Permission.READ, ZONE) UNMANAGED_AC = AC("unmanaged", Permission.READ, ZONE) """e.g. dnap-ro""" @@ -241,3 +243,7 @@ def _run(cmd: list[str]): return raise RodsError(completed.stderr.decode("utf-8").strip()) + + +def get_md5(path: Path): + return hashlib.md5(path.read_bytes()).hexdigest() diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py index 5909b18a..422c58e5 100644 --- a/tests/test_publish_directory.py +++ b/tests/test_publish_directory.py @@ -15,10 +15,19 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import json -from pathlib import Path, PurePath +import shutil +from pathlib import PurePath from unittest.mock import MagicMock, patch -from helpers import is_inheritance_enabled, ADMIN_AC, PUBLIC_AC, STUDY_AC, UNMANAGED_AC +from helpers import ( + is_inheritance_enabled, + ADMIN_AC, + PUBLIC_AC, + UNMANAGED_AC, + STUDY2_AC, + history_in_meta, + get_md5, +) from npg_irods.cli import publish_directory from pytest import LogCaptureFixture, MonkeyPatch from pytest import mark as m @@ -71,7 +80,8 @@ def test_npg_publish_tree_compatibility_ultima( monkeypatch: MonkeyPatch, ): # Arrange - src = Path("./tests/data/ultima/minimal").absolute() + src = tmp_path / "minimal" + shutil.copytree("./tests/data/ultima/minimal", src) # empty_collection stands in for $ZONE/ultimagen/runs # SOP: Destination collection doesn't exist dest = ( @@ -85,39 +95,98 @@ def test_npg_publish_tree_compatibility_ultima( # Public root_metadata = tmp_path / "root_metadata.json" root_metadata.write_text(json.dumps([{"attribute": "a1", "value": "v1"}])) - self._main( - [ - str(src), - str(dest), - "--group", - "public", - "--exclude", - f"{src}/000001-", - "--exclude", - ".md5", - "--metadata-file", - str(root_metadata), - ] - ) + root_args = [ + str(src), + str(dest), + "--force", + "--group", + "public", + "--exclude", + f"{src}/000001-", + "--exclude", + ".md5", + "--metadata-file", + str(root_metadata), + ] + self._main(root_args) # Study sample_metadata = tmp_path / "sample_metadata.json" - sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}])) - self._main( - [ - str(src / "000001-a"), - str(dest / "000001-a"), - "--group", - "ss_1000#testZone", - "--exclude", - ".md5", - "--metadata-file", - str(sample_metadata), - ] + sample_metadata.write_text( + json.dumps( + [{"attribute": "a2", "value": "v2"}, {"attribute": "a3", "value": "v3"}] + ) ) + sample_dir_args = [ + str(src / "000001-a"), + str(dest / "000001-a"), + "--force", + "--group", + "ss_1000#testZone", + "--exclude", + ".md5", + "--metadata-file", + str(sample_metadata), + ] + self._main(sample_dir_args) # Private - self._main([str(src / "000001-d"), str(dest / "000001-d")]) + private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"] + self._main(private_dir_args) + + # Repeated publish: No changes + self._main(root_args) + self._main(sample_dir_args) + self._main(private_dir_args) + + # Repeated publish: New file + (src / "c.txt").write_text("new") + c_txt_original_md5 = get_md5(src / "c.txt") + self._main(root_args) + created_values = [ + x + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "dcterms:created" + ] + assert len(created_values) == 1 + c_txt_original_dcterms_created = created_values[0] + + # Repeated publish: Modified file + (src / "c.txt").write_text("modified") + c_txt_modified_md5 = get_md5(src / "c.txt") + self._main(root_args) + created_values = [ + x + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "dcterms:created" + ] + assert len(created_values) == 1 + c_txt_modified_dcterms_created = created_values[0] + assert ( + c_txt_modified_dcterms_created == c_txt_original_dcterms_created + ), "Original file creation date should not change when we update contents of the file" + + # Repeated publish: Different group, different metadata, no file changes + sample_metadata_updated = tmp_path / "sample_metadata_updated.json" + # a2: Deleted + # a3: Updated + # a4: Added + sample_metadata_updated.write_text( + json.dumps( + [ + {"attribute": "a3", "value": "v3_updated"}, + {"attribute": "a4", "value": "v4"}, + ] + ) + ) + sample_dir_updated_args = sample_dir_args.copy() + sample_dir_updated_args[sample_dir_updated_args.index("ss_1000#testZone")] = ( + "ss_2000#testZone" + ) + sample_dir_updated_args[sample_dir_updated_args.index(str(sample_metadata))] = ( + str(sample_metadata_updated) + ) + self._main(sample_dir_updated_args) # Assert assert is_inheritance_enabled( @@ -142,9 +211,38 @@ def test_npg_publish_tree_compatibility_ultima( Collection(dest / "000001-d"), DataObject(dest / "000001_a.txt"), DataObject(dest / "b.txt"), + DataObject(dest / "c.txt"), DataObject(dest / "000001-a" / "000002-c.txt"), DataObject(dest / "000001-d" / "000001-d.txt"), ] + assert [x.attribute for x in DataObject(dest / "b.txt").metadata()] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "type", + ] + + assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "md5", + "type", + ] + assert [ + x.value + for x in DataObject(dest / "c.txt").metadata() + if x.attribute == "md5" + ] == [ + c_txt_original_md5, + c_txt_modified_md5, + ], "New md5 attribute should be added, previous preserved to provide history" + assert ( + DataObject(dest / "c.txt").read() == "modified" + ), "Contents of data object in iRODS should have updated" + assert ( + DataObject(dest / "c.txt").checksum() == c_txt_modified_md5 + ), "Checksum should have updated" assert DataObject(dest / "000001_a.txt").acl() == [ ADMIN_AC, @@ -156,15 +254,46 @@ def test_npg_publish_tree_compatibility_ultima( PUBLIC_AC, UNMANAGED_AC, ] + assert DataObject(dest / "c.txt").acl() == [ + ADMIN_AC, + PUBLIC_AC, + UNMANAGED_AC, + ] assert is_inheritance_enabled(dest / "000001-a") - assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC] - assert Collection(dest / "000001-a").metadata() == [AVU("a2", "v2")] + assert Collection(dest / "000001-a").acl() == [ + ADMIN_AC, + STUDY2_AC, + UNMANAGED_AC, + ] + assert [ + x + for x in Collection(dest / "000001-a").metadata() + if x.attribute != "a3_history" + ] == [ + AVU("a2", "v2"), # Earlier metadata preserved + AVU("a3", "v3_updated"), + AVU("a4", "v4"), + ] + assert history_in_meta( + AVU.history(AVU("a3", "v3")), Collection(dest / "000001-a").metadata() + ) + assert len(Collection(dest / "000001-a").metadata()) == 4 + assert DataObject(dest / "000001-a" / "000002-c.txt").acl() == [ ADMIN_AC, - STUDY_AC, + STUDY2_AC, UNMANAGED_AC, ] + assert [ + x.attribute + for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata() + ] == [ + "dcterms:created", + "dcterms:creator", + "md5", + "type", + ] assert is_inheritance_enabled(dest / "000001-d") assert Collection(dest / "000001-d").acl() == [