From bf032c279c98858d5cb9a7d3c61f5d10b885bf0e Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Mon, 26 Jan 2026 15:14:49 +0000
Subject: [PATCH 1/6] Test invocation needed for repeat publish
---
tests/test_publish_directory.py | 79 +++++++++++++++++++++------------
1 file changed, 51 insertions(+), 28 deletions(-)
diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py
index 5909b18..4f9f005 100644
--- a/tests/test_publish_directory.py
+++ b/tests/test_publish_directory.py
@@ -15,6 +15,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import json
+import shutil
from pathlib import Path, PurePath
from unittest.mock import MagicMock, patch
@@ -71,7 +72,8 @@ def test_npg_publish_tree_compatibility_ultima(
monkeypatch: MonkeyPatch,
):
# Arrange
- src = Path("./tests/data/ultima/minimal").absolute()
+ src = tmp_path / "minimal"
+ shutil.copytree("./tests/data/ultima/minimal", src)
# empty_collection stands in for $ZONE/ultimagen/runs
# SOP: Destination collection doesn't exist
dest = (
@@ -85,39 +87,53 @@ def test_npg_publish_tree_compatibility_ultima(
# Public
root_metadata = tmp_path / "root_metadata.json"
root_metadata.write_text(json.dumps([{"attribute": "a1", "value": "v1"}]))
- self._main(
- [
- str(src),
- str(dest),
- "--group",
- "public",
- "--exclude",
- f"{src}/000001-",
- "--exclude",
- ".md5",
- "--metadata-file",
- str(root_metadata),
- ]
- )
+ root_args = [
+ str(src),
+ str(dest),
+ "--group",
+ "public",
+ "--exclude",
+ f"{src}/000001-",
+ "--exclude",
+ ".md5",
+ "--metadata-file",
+ str(root_metadata),
+ "--force"
+ ]
+ self._main(root_args)
# Study
sample_metadata = tmp_path / "sample_metadata.json"
sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}]))
- self._main(
- [
- str(src / "000001-a"),
- str(dest / "000001-a"),
- "--group",
- "ss_1000#testZone",
- "--exclude",
- ".md5",
- "--metadata-file",
- str(sample_metadata),
- ]
- )
+ sample_dir_args = [
+ str(src / "000001-a"),
+ str(dest / "000001-a"),
+ "--group",
+ "ss_1000#testZone",
+ "--exclude",
+ ".md5",
+ "--metadata-file",
+ str(sample_metadata),
+ "--force"
+ ]
+ self._main(sample_dir_args)
# Private
- self._main([str(src / "000001-d"), str(dest / "000001-d")])
+ private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"]
+ self._main(private_dir_args)
+
+ # Repeated publish
+ self._main(root_args)
+ self._main(sample_dir_args)
+ self._main(private_dir_args)
+
+ # Repeated publish: New file
+ (src / "c.txt").write_text("new")
+ self._main(root_args)
+
+ # Repeated publish: Modified file
+ (src / "c.txt").write_text("modified")
+ self._main(root_args)
# Assert
assert is_inheritance_enabled(
@@ -142,9 +158,11 @@ def test_npg_publish_tree_compatibility_ultima(
Collection(dest / "000001-d"),
DataObject(dest / "000001_a.txt"),
DataObject(dest / "b.txt"),
+ DataObject(dest / "c.txt"),
DataObject(dest / "000001-a" / "000002-c.txt"),
DataObject(dest / "000001-d" / "000001-d.txt"),
]
+ assert DataObject(dest / "c.txt").read() == "modified"
assert DataObject(dest / "000001_a.txt").acl() == [
ADMIN_AC,
@@ -156,6 +174,11 @@ def test_npg_publish_tree_compatibility_ultima(
PUBLIC_AC,
UNMANAGED_AC,
]
+ assert DataObject(dest / "c.txt").acl() == [
+ ADMIN_AC,
+ PUBLIC_AC,
+ UNMANAGED_AC,
+ ]
assert is_inheritance_enabled(dest / "000001-a")
assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC]
From 4766a0741509c4d1b20778829321db6485bd6295 Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Mon, 26 Jan 2026 16:47:54 +0000
Subject: [PATCH 2/6] Align docs with code
---
src/npg_irods/publish.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/npg_irods/publish.py b/src/npg_irods/publish.py
index 688a1d1..ef7d8fd 100644
--- a/src/npg_irods/publish.py
+++ b/src/npg_irods/publish.py
@@ -67,7 +67,7 @@ def publish_directory(
exists, the operation is skipped for that path. See DataObject.put() for
more information. The default is False.
force: If a data object is written, overwrite any data object already present
- in iRODS. The default is True.
+ in iRODS. The default is False.
handle_exceptions: Report a count of any errors encountered during publishing,
rather than raising an exception. The default is True. If False and one or
more errors are encountered, a PublishingError is raised from the first
From 21b0439d626f7fb62fc9137895825de1973bef44 Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Tue, 27 Jan 2026 16:05:05 +0000
Subject: [PATCH 3/6] Test repeat publish edge cases - changed/new files,
changed/new/deleted metadata, changed group
---
.../data/ultima/minimal/000001-a/000002-c.txt | 1 +
.../ultima/minimal/000001-a/000002-c.txt.md5 | 1 +
.../data/ultima/minimal/000001-d/000001-d.txt | 1 +
tests/data/ultima/minimal/000001_a.txt | 1 +
tests/data/ultima/minimal/000001_a.txt.md5 | 1 +
tests/data/ultima/minimal/b.txt | 1 +
tests/data/ultima/minimal/b.txt.md5 | 1 +
tests/helpers/__init__.py | 1 +
tests/test_publish_directory.py | 103 ++++++++++++++++--
9 files changed, 102 insertions(+), 9 deletions(-)
diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt b/tests/data/ultima/minimal/000001-a/000002-c.txt
index e69de29..b66da45 100644
--- a/tests/data/ultima/minimal/000001-a/000002-c.txt
+++ b/tests/data/ultima/minimal/000001-a/000002-c.txt
@@ -0,0 +1 @@
+ee305ec7-dd05-43be-b266-f7bf5e21800c
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5 b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5
index e69de29..2ea3cf7 100644
--- a/tests/data/ultima/minimal/000001-a/000002-c.txt.md5
+++ b/tests/data/ultima/minimal/000001-a/000002-c.txt.md5
@@ -0,0 +1 @@
+ac06fd24fc0edc84761c799c975d73c3
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/000001-d/000001-d.txt b/tests/data/ultima/minimal/000001-d/000001-d.txt
index e69de29..0f63a86 100644
--- a/tests/data/ultima/minimal/000001-d/000001-d.txt
+++ b/tests/data/ultima/minimal/000001-d/000001-d.txt
@@ -0,0 +1 @@
+c18dee80-d2d4-44dc-8b8f-68161862fe12
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/000001_a.txt b/tests/data/ultima/minimal/000001_a.txt
index e69de29..b8807e2 100644
--- a/tests/data/ultima/minimal/000001_a.txt
+++ b/tests/data/ultima/minimal/000001_a.txt
@@ -0,0 +1 @@
+615b338a-2e3f-4e41-861c-c8b03e11cf19
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/000001_a.txt.md5 b/tests/data/ultima/minimal/000001_a.txt.md5
index e69de29..a14e8da 100644
--- a/tests/data/ultima/minimal/000001_a.txt.md5
+++ b/tests/data/ultima/minimal/000001_a.txt.md5
@@ -0,0 +1 @@
+6e785a5236e0b5480025469d312b2aeb
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/b.txt b/tests/data/ultima/minimal/b.txt
index e69de29..2a2bff7 100644
--- a/tests/data/ultima/minimal/b.txt
+++ b/tests/data/ultima/minimal/b.txt
@@ -0,0 +1 @@
+0879abcc-5755-4620-9b43-f52f0c5f69ea
\ No newline at end of file
diff --git a/tests/data/ultima/minimal/b.txt.md5 b/tests/data/ultima/minimal/b.txt.md5
index e69de29..51b8b90 100644
--- a/tests/data/ultima/minimal/b.txt.md5
+++ b/tests/data/ultima/minimal/b.txt.md5
@@ -0,0 +1 @@
+2b1c6c7095b550e86230c4996d1595e4
\ No newline at end of file
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
index b83b363..8f2d015 100644
--- a/tests/helpers/__init__.py
+++ b/tests/helpers/__init__.py
@@ -70,6 +70,7 @@
ADMIN_AC = AC("irods", Permission.OWN, ZONE)
PUBLIC_AC = AC("public", Permission.READ, ZONE)
STUDY_AC = AC("ss_1000", Permission.READ, ZONE)
+STUDY2_AC = AC("ss_2000", Permission.READ, ZONE)
UNMANAGED_AC = AC("unmanaged", Permission.READ, ZONE)
"""e.g. dnap-ro"""
diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py
index 4f9f005..29a521b 100644
--- a/tests/test_publish_directory.py
+++ b/tests/test_publish_directory.py
@@ -16,10 +16,17 @@
# along with this program. If not, see .
import json
import shutil
-from pathlib import Path, PurePath
+from pathlib import PurePath
from unittest.mock import MagicMock, patch
-from helpers import is_inheritance_enabled, ADMIN_AC, PUBLIC_AC, STUDY_AC, UNMANAGED_AC
+from helpers import (
+ is_inheritance_enabled,
+ ADMIN_AC,
+ PUBLIC_AC,
+ UNMANAGED_AC,
+ STUDY2_AC,
+ history_in_meta,
+)
from npg_irods.cli import publish_directory
from pytest import LogCaptureFixture, MonkeyPatch
from pytest import mark as m
@@ -90,6 +97,7 @@ def test_npg_publish_tree_compatibility_ultima(
root_args = [
str(src),
str(dest),
+ "--force",
"--group",
"public",
"--exclude",
@@ -98,23 +106,26 @@ def test_npg_publish_tree_compatibility_ultima(
".md5",
"--metadata-file",
str(root_metadata),
- "--force"
]
self._main(root_args)
# Study
sample_metadata = tmp_path / "sample_metadata.json"
- sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}]))
+ sample_metadata.write_text(
+ json.dumps(
+ [{"attribute": "a2", "value": "v2"}, {"attribute": "a3", "value": "v3"}]
+ )
+ )
sample_dir_args = [
str(src / "000001-a"),
str(dest / "000001-a"),
+ "--force",
"--group",
"ss_1000#testZone",
"--exclude",
".md5",
"--metadata-file",
str(sample_metadata),
- "--force"
]
self._main(sample_dir_args)
@@ -122,7 +133,7 @@ def test_npg_publish_tree_compatibility_ultima(
private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"]
self._main(private_dir_args)
- # Repeated publish
+ # Repeated publish: No changes
self._main(root_args)
self._main(sample_dir_args)
self._main(private_dir_args)
@@ -130,10 +141,46 @@ def test_npg_publish_tree_compatibility_ultima(
# Repeated publish: New file
(src / "c.txt").write_text("new")
self._main(root_args)
+ created_values = [
+ x
+ for x in DataObject(dest / "c.txt").metadata()
+ if x.attribute == "dcterms:created"
+ ]
+ assert len(created_values) == 1
+ c_created = created_values[0]
# Repeated publish: Modified file
(src / "c.txt").write_text("modified")
self._main(root_args)
+ created_values = [
+ x
+ for x in DataObject(dest / "c.txt").metadata()
+ if x.attribute == "dcterms:created"
+ ]
+ assert len(created_values) == 1
+ c_created_updated = created_values[0]
+
+ # Repeated publish: Different group, different metadata, no file changes
+ sample_metadata_updated = tmp_path / "sample_metadata_updated.json"
+ # a2: Deleted
+ # a3: Updated
+ # a4: Added
+ sample_metadata_updated.write_text(
+ json.dumps(
+ [
+ {"attribute": "a3", "value": "v3_updated"},
+ {"attribute": "a4", "value": "v4"},
+ ]
+ )
+ )
+ sample_dir_updated_args = sample_dir_args.copy()
+ sample_dir_updated_args[sample_dir_updated_args.index("ss_1000#testZone")] = (
+ "ss_2000#testZone"
+ )
+ sample_dir_updated_args[sample_dir_updated_args.index(str(sample_metadata))] = (
+ str(sample_metadata_updated)
+ )
+ self._main(sample_dir_updated_args)
# Assert
assert is_inheritance_enabled(
@@ -162,6 +209,21 @@ def test_npg_publish_tree_compatibility_ultima(
DataObject(dest / "000001-a" / "000002-c.txt"),
DataObject(dest / "000001-d" / "000001-d.txt"),
]
+ assert [x.attribute for x in DataObject(dest / "b.txt").metadata()] == [
+ "dcterms:created",
+ "dcterms:creator",
+ "md5",
+ "type",
+ ]
+ assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [
+ "dcterms:created",
+ "dcterms:creator",
+ "md5",
+ "md5",
+ "type",
+ ]
+ assert c_created_updated == c_created
+
assert DataObject(dest / "c.txt").read() == "modified"
assert DataObject(dest / "000001_a.txt").acl() == [
@@ -181,13 +243,36 @@ def test_npg_publish_tree_compatibility_ultima(
]
assert is_inheritance_enabled(dest / "000001-a")
- assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC]
- assert Collection(dest / "000001-a").metadata() == [AVU("a2", "v2")]
+ assert Collection(dest / "000001-a").acl() == [
+ ADMIN_AC,
+ STUDY2_AC,
+ UNMANAGED_AC,
+ ]
+ assert [
+ x
+ for x in Collection(dest / "000001-a").metadata()
+ if x.attribute != "a3_history"
+ ] == [
+ AVU("a2", "v2"), # Earlier metadata preserved
+ AVU("a3", "v3_updated"),
+ AVU("a4", "v4"),
+ ]
+ assert history_in_meta(
+ AVU.history(AVU("a3", "v3")), Collection(dest / "000001-a").metadata()
+ )
+ assert len(Collection(dest / "000001-a").metadata()) == 4
+
assert DataObject(dest / "000001-a" / "000002-c.txt").acl() == [
ADMIN_AC,
- STUDY_AC,
+ STUDY2_AC,
UNMANAGED_AC,
]
+ assert [x.attribute for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()] == [
+ "dcterms:created",
+ "dcterms:creator",
+ "md5",
+ "type",
+ ]
assert is_inheritance_enabled(dest / "000001-d")
assert Collection(dest / "000001-d").acl() == [
From a3d76e7d4b56c88740dc968a56d68da067ecc3e8 Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Tue, 27 Jan 2026 16:14:04 +0000
Subject: [PATCH 4/6] Format
---
tests/test_publish_directory.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py
index 29a521b..6023898 100644
--- a/tests/test_publish_directory.py
+++ b/tests/test_publish_directory.py
@@ -267,7 +267,10 @@ def test_npg_publish_tree_compatibility_ultima(
STUDY2_AC,
UNMANAGED_AC,
]
- assert [x.attribute for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()] == [
+ assert [
+ x.attribute
+ for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()
+ ] == [
"dcterms:created",
"dcterms:creator",
"md5",
From ac2f77da7f5683827bcacadae324d5af6ad699e4 Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Wed, 4 Feb 2026 09:59:10 +0000
Subject: [PATCH 5/6] Code review
---
tests/helpers/__init__.py | 8 ++++++--
tests/test_publish_directory.py | 28 +++++++++++++++++++++++-----
2 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
index 8f2d015..4abd587 100644
--- a/tests/helpers/__init__.py
+++ b/tests/helpers/__init__.py
@@ -18,11 +18,11 @@
# @author Keith James
"""Helper functions for testing."""
-
+import hashlib
import os
import subprocess
from datetime import datetime
-from pathlib import PurePath
+from pathlib import PurePath, Path
from os import PathLike
import pytest
@@ -242,3 +242,7 @@ def _run(cmd: list[str]):
return
raise RodsError(completed.stderr.decode("utf-8").strip())
+
+
+def get_md5(path: Path):
+ return hashlib.md5(path.read_bytes()).hexdigest()
diff --git a/tests/test_publish_directory.py b/tests/test_publish_directory.py
index 6023898..422c58e 100644
--- a/tests/test_publish_directory.py
+++ b/tests/test_publish_directory.py
@@ -26,6 +26,7 @@
UNMANAGED_AC,
STUDY2_AC,
history_in_meta,
+ get_md5,
)
from npg_irods.cli import publish_directory
from pytest import LogCaptureFixture, MonkeyPatch
@@ -140,6 +141,7 @@ def test_npg_publish_tree_compatibility_ultima(
# Repeated publish: New file
(src / "c.txt").write_text("new")
+ c_txt_original_md5 = get_md5(src / "c.txt")
self._main(root_args)
created_values = [
x
@@ -147,10 +149,11 @@ def test_npg_publish_tree_compatibility_ultima(
if x.attribute == "dcterms:created"
]
assert len(created_values) == 1
- c_created = created_values[0]
+ c_txt_original_dcterms_created = created_values[0]
# Repeated publish: Modified file
(src / "c.txt").write_text("modified")
+ c_txt_modified_md5 = get_md5(src / "c.txt")
self._main(root_args)
created_values = [
x
@@ -158,7 +161,10 @@ def test_npg_publish_tree_compatibility_ultima(
if x.attribute == "dcterms:created"
]
assert len(created_values) == 1
- c_created_updated = created_values[0]
+ c_txt_modified_dcterms_created = created_values[0]
+ assert (
+ c_txt_modified_dcterms_created == c_txt_original_dcterms_created
+ ), "Original file creation date should not change when we update contents of the file"
# Repeated publish: Different group, different metadata, no file changes
sample_metadata_updated = tmp_path / "sample_metadata_updated.json"
@@ -215,6 +221,7 @@ def test_npg_publish_tree_compatibility_ultima(
"md5",
"type",
]
+
assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [
"dcterms:created",
"dcterms:creator",
@@ -222,9 +229,20 @@ def test_npg_publish_tree_compatibility_ultima(
"md5",
"type",
]
- assert c_created_updated == c_created
-
- assert DataObject(dest / "c.txt").read() == "modified"
+ assert [
+ x.value
+ for x in DataObject(dest / "c.txt").metadata()
+ if x.attribute == "md5"
+ ] == [
+ c_txt_original_md5,
+ c_txt_modified_md5,
+ ], "New md5 attribute should be added, previous preserved to provide history"
+ assert (
+ DataObject(dest / "c.txt").read() == "modified"
+ ), "Contents of data object in iRODS should have updated"
+ assert (
+ DataObject(dest / "c.txt").checksum() == c_txt_modified_md5
+ ), "Checksum should have updated"
assert DataObject(dest / "000001_a.txt").acl() == [
ADMIN_AC,
From 5d2c74038acff3943608367ca0a0925cc9fc8696 Mon Sep 17 00:00:00 2001
From: Calum Eadie <199819990+ce10-sanger@users.noreply.github.com>
Date: Wed, 4 Feb 2026 10:23:57 +0000
Subject: [PATCH 6/6] Updated black
---
tests/helpers/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
index 4abd587..db5df15 100644
--- a/tests/helpers/__init__.py
+++ b/tests/helpers/__init__.py
@@ -18,6 +18,7 @@
# @author Keith James
"""Helper functions for testing."""
+
import hashlib
import os
import subprocess