Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/npg_irods/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def publish_directory(
exists, the operation is skipped for that path. See DataObject.put() for
more information. The default is False.
force: If a data object is written, overwrite any data object already present
in iRODS. The default is True.
in iRODS. The default is False.
handle_exceptions: Report a count of any errors encountered during publishing,
rather than raising an exception. The default is True. If False and one or
more errors are encountered, a PublishingError is raised from the first
Expand Down
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/000001-a/000002-c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ee305ec7-dd05-43be-b266-f7bf5e21800c
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/000001-a/000002-c.txt.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ac06fd24fc0edc84761c799c975d73c3
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/000001-d/000001-d.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
c18dee80-d2d4-44dc-8b8f-68161862fe12
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/000001_a.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
615b338a-2e3f-4e41-861c-c8b03e11cf19
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/000001_a.txt.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6e785a5236e0b5480025469d312b2aeb
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0879abcc-5755-4620-9b43-f52f0c5f69ea
1 change: 1 addition & 0 deletions tests/data/ultima/minimal/b.txt.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2b1c6c7095b550e86230c4996d1595e4
8 changes: 7 additions & 1 deletion tests/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

"""Helper functions for testing."""

import hashlib
import os
import subprocess
from datetime import datetime
from pathlib import PurePath
from pathlib import PurePath, Path
from os import PathLike

import pytest
Expand Down Expand Up @@ -70,6 +71,7 @@
ADMIN_AC = AC("irods", Permission.OWN, ZONE)
PUBLIC_AC = AC("public", Permission.READ, ZONE)
STUDY_AC = AC("ss_1000", Permission.READ, ZONE)
STUDY2_AC = AC("ss_2000", Permission.READ, ZONE)
UNMANAGED_AC = AC("unmanaged", Permission.READ, ZONE)
"""e.g. dnap-ro"""

Expand Down Expand Up @@ -241,3 +243,7 @@ def _run(cmd: list[str]):
return

raise RodsError(completed.stderr.decode("utf-8").strip())


def get_md5(path: Path):
return hashlib.md5(path.read_bytes()).hexdigest()
195 changes: 162 additions & 33 deletions tests/test_publish_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,19 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
from pathlib import Path, PurePath
import shutil
from pathlib import PurePath
from unittest.mock import MagicMock, patch

from helpers import is_inheritance_enabled, ADMIN_AC, PUBLIC_AC, STUDY_AC, UNMANAGED_AC
from helpers import (
is_inheritance_enabled,
ADMIN_AC,
PUBLIC_AC,
UNMANAGED_AC,
STUDY2_AC,
history_in_meta,
get_md5,
)
from npg_irods.cli import publish_directory
from pytest import LogCaptureFixture, MonkeyPatch
from pytest import mark as m
Expand Down Expand Up @@ -71,7 +80,8 @@ def test_npg_publish_tree_compatibility_ultima(
monkeypatch: MonkeyPatch,
):
# Arrange
src = Path("./tests/data/ultima/minimal").absolute()
src = tmp_path / "minimal"
shutil.copytree("./tests/data/ultima/minimal", src)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really, test prep should be set up in a fixture (see conftest.py) so that the majority of the code in the tests is testing things, rather than building state.

# empty_collection stands in for $ZONE/ultimagen/runs
# SOP: Destination collection doesn't exist
dest = (
Expand All @@ -85,39 +95,98 @@ def test_npg_publish_tree_compatibility_ultima(
# Public
root_metadata = tmp_path / "root_metadata.json"
root_metadata.write_text(json.dumps([{"attribute": "a1", "value": "v1"}]))
self._main(
[
str(src),
str(dest),
"--group",
"public",
"--exclude",
f"{src}/000001-",
"--exclude",
".md5",
"--metadata-file",
str(root_metadata),
]
)
root_args = [
str(src),
str(dest),
"--force",
"--group",
"public",
"--exclude",
f"{src}/000001-",
"--exclude",
".md5",
"--metadata-file",
str(root_metadata),
]
self._main(root_args)

# Study
sample_metadata = tmp_path / "sample_metadata.json"
sample_metadata.write_text(json.dumps([{"attribute": "a2", "value": "v2"}]))
self._main(
[
str(src / "000001-a"),
str(dest / "000001-a"),
"--group",
"ss_1000#testZone",
"--exclude",
".md5",
"--metadata-file",
str(sample_metadata),
]
sample_metadata.write_text(
json.dumps(
[{"attribute": "a2", "value": "v2"}, {"attribute": "a3", "value": "v3"}]
)
)
sample_dir_args = [
str(src / "000001-a"),
str(dest / "000001-a"),
"--force",
"--group",
"ss_1000#testZone",
"--exclude",
".md5",
"--metadata-file",
str(sample_metadata),
]
self._main(sample_dir_args)

# Private
self._main([str(src / "000001-d"), str(dest / "000001-d")])
private_dir_args = [str(src / "000001-d"), str(dest / "000001-d"), "--force"]
self._main(private_dir_args)

# Repeated publish: No changes
self._main(root_args)
self._main(sample_dir_args)
self._main(private_dir_args)

# Repeated publish: New file
(src / "c.txt").write_text("new")
c_txt_original_md5 = get_md5(src / "c.txt")
self._main(root_args)
created_values = [
x
for x in DataObject(dest / "c.txt").metadata()
if x.attribute == "dcterms:created"
]
assert len(created_values) == 1
c_txt_original_dcterms_created = created_values[0]

# Repeated publish: Modified file
(src / "c.txt").write_text("modified")
c_txt_modified_md5 = get_md5(src / "c.txt")
self._main(root_args)
created_values = [
x
for x in DataObject(dest / "c.txt").metadata()
if x.attribute == "dcterms:created"
]
assert len(created_values) == 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assertion on line 225 should be moved here. However, I am not sure what is being tested on line 225. Are we testing that the original file creation date does not change when we asked to update the content of the file? Fair enough. We should also test that the content of the file in iRODS has been updated and a new md5 has been assigned iRODS. So we should capture md5 of the original file on disk and md5 of the updated file on disk, make sure that they differ and check that we get correct md5 and optionally get the content of the file from iRODS and compare to the updated file on disk.

c_txt_modified_dcterms_created = created_values[0]
assert (
c_txt_modified_dcterms_created == c_txt_original_dcterms_created
), "Original file creation date should not change when we update contents of the file"

# Repeated publish: Different group, different metadata, no file changes
sample_metadata_updated = tmp_path / "sample_metadata_updated.json"
# a2: Deleted
# a3: Updated
# a4: Added
sample_metadata_updated.write_text(
json.dumps(
[
{"attribute": "a3", "value": "v3_updated"},
{"attribute": "a4", "value": "v4"},
]
)
)
sample_dir_updated_args = sample_dir_args.copy()
sample_dir_updated_args[sample_dir_updated_args.index("ss_1000#testZone")] = (
"ss_2000#testZone"
)
sample_dir_updated_args[sample_dir_updated_args.index(str(sample_metadata))] = (
str(sample_metadata_updated)
)
self._main(sample_dir_updated_args)

# Assert
assert is_inheritance_enabled(
Expand All @@ -142,9 +211,38 @@ def test_npg_publish_tree_compatibility_ultima(
Collection(dest / "000001-d"),
DataObject(dest / "000001_a.txt"),
DataObject(dest / "b.txt"),
DataObject(dest / "c.txt"),
DataObject(dest / "000001-a" / "000002-c.txt"),
DataObject(dest / "000001-d" / "000001-d.txt"),
]
assert [x.attribute for x in DataObject(dest / "b.txt").metadata()] == [
"dcterms:created",
"dcterms:creator",
"md5",
"type",
]

assert [x.attribute for x in DataObject(dest / "c.txt").metadata()] == [
"dcterms:created",
"dcterms:creator",
"md5",
"md5",
"type",
]
assert [
x.value
for x in DataObject(dest / "c.txt").metadata()
if x.attribute == "md5"
] == [
c_txt_original_md5,
c_txt_modified_md5,
], "New md5 attribute should be added, previous preserved to provide history"
assert (
DataObject(dest / "c.txt").read() == "modified"
), "Contents of data object in iRODS should have updated"
assert (
DataObject(dest / "c.txt").checksum() == c_txt_modified_md5
), "Checksum should have updated"
Comment on lines +218 to +245
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take care not to recapitulate tests that are already covered in partisan (which is responsible for what you're testing here).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cheers for the feedback Keith. Yea I've been wondering about the structure/approach here too - I'll tighten it up so we've got something tighter to discuss around, share context of what I'm trying to do here and then perhaps we can take a look at it together


assert DataObject(dest / "000001_a.txt").acl() == [
ADMIN_AC,
Expand All @@ -156,15 +254,46 @@ def test_npg_publish_tree_compatibility_ultima(
PUBLIC_AC,
UNMANAGED_AC,
]
assert DataObject(dest / "c.txt").acl() == [
ADMIN_AC,
PUBLIC_AC,
UNMANAGED_AC,
]

assert is_inheritance_enabled(dest / "000001-a")
assert Collection(dest / "000001-a").acl() == [ADMIN_AC, STUDY_AC, UNMANAGED_AC]
assert Collection(dest / "000001-a").metadata() == [AVU("a2", "v2")]
assert Collection(dest / "000001-a").acl() == [
ADMIN_AC,
STUDY2_AC,
UNMANAGED_AC,
]
assert [
x
for x in Collection(dest / "000001-a").metadata()
if x.attribute != "a3_history"
] == [
AVU("a2", "v2"), # Earlier metadata preserved
AVU("a3", "v3_updated"),
AVU("a4", "v4"),
]
assert history_in_meta(
AVU.history(AVU("a3", "v3")), Collection(dest / "000001-a").metadata()
)
assert len(Collection(dest / "000001-a").metadata()) == 4

assert DataObject(dest / "000001-a" / "000002-c.txt").acl() == [
ADMIN_AC,
STUDY_AC,
STUDY2_AC,
UNMANAGED_AC,
]
assert [
x.attribute
for x in DataObject(dest / "000001-a" / "000002-c.txt").metadata()
] == [
"dcterms:created",
"dcterms:creator",
"md5",
"type",
]

assert is_inheritance_enabled(dest / "000001-d")
assert Collection(dest / "000001-d").acl() == [
Expand Down