Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions packages/essnmx/src/ess/nmx/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
"small_nmx_nexus.hdf.zip": Entry(
alg="md5", chk="96877cddc9f6392c96890069657710ca", extractor="unzip"
),
"essnmx-reduced-26.4.1.hdf.zip": Entry(
alg="md5", chk="f7877cce3967dec0aa51461c1100824d", extractor="unzip"
),
},
)

Expand Down Expand Up @@ -75,3 +78,15 @@ def get_small_nmx_nexus() -> pathlib.Path:
"""Return the path to a small NMX NeXus file."""

return get_path("small_nmx_nexus.hdf.zip")


def get_small_nmx_reduced() -> pathlib.Path:
"""Return the path to a reduced NXlauetof file.

It is reduced from the small nmx nexus file
using specific version of essnmx.
It is frozen as reference file to check if the output
of essnmx-reduce changed.
"""

return get_path("essnmx-reduced-26.4.1.hdf.zip")
109 changes: 109 additions & 0 deletions packages/essnmx/tests/executable_output_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2026 Scipp contributors (https://github.com/scipp)
"""Compare the workflow output to be consistent with the reference output file.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we could simplify this by saving the reference solution in the Scipp hdf5 format.
That way, loading and cmoparing to the reference would just be a

ref = sc.io.load_hdf5('reference.h5')
assert sc.identical(da, ref)

However, this would not be testing that the data in the NX lauetof format has not changed.
So if something gets updated in the reduced file writer that breaks things, it would not be caught by such an implementation...

Can we somehow think of a way to combine both the simplicity and full coverage?

Copy link
Copy Markdown
Member Author

@YooSunYoung YooSunYoung Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't really think of simpler solution than this...
and I think this test should stay naive and simple and hard-coded as possible so that anyone can understand what should not change.


This test detects any changes in the output file.
If anything must change, consult the IDS member
and update the frozen file in the `ess.nmx.data` registry.
"""

import pathlib

# The bitshuffle plugin needs to be imported at least once in the session
# so that h5py can use the plugin.
import bitshuffle.h5 # noqa: F401
import h5py
import numpy as np
import pytest

from ess.nmx.configurations import (
InputConfig,
OutputConfig,
ReductionConfig,
WorkflowConfig,
)
from ess.nmx.data import get_small_nmx_nexus, get_small_nmx_reduced
from ess.nmx.executables import reduction
from ess.nmx.types import Compression


def assert_h5_attrs_equal(
attrs_left: h5py.AttributeManager,
attrs_right: h5py.AttributeManager,
cur_path: pathlib.Path,
) -> None:

assert attrs_left.keys() == attrs_right.keys()
for attr_key, attr in attrs_left.items():
if not isinstance(attr, str) and hasattr(attr, '__len__'):
assert all(attr == attrs_right[attr_key]), cur_path
else:
assert attr == attrs_right[attr_key], cur_path


def assert_h5obj_equal(
obj_left: h5py.Group | h5py.Dataset,
obj_right: h5py.Group | h5py.Dataset,
_cur_path: pathlib.Path = pathlib.Path('/'),
excluded_paths: tuple[pathlib.Path, ...] = (),
) -> None:
assert type(obj_left) is type(obj_right)
assert_h5_attrs_equal(obj_left.attrs, obj_right.attrs, _cur_path)
if isinstance(obj_left, h5py.Group) and isinstance(obj_right, h5py.Group):
for key in obj_left.keys():
cur_sub_path = _cur_path / key
if cur_sub_path in excluded_paths:
continue
else:
assert key in obj_right
assert_h5obj_equal(
obj_left[key],
obj_right[key],
_cur_path=cur_sub_path,
excluded_paths=excluded_paths,
)
else:
values_left = obj_left[()]
values_right = obj_right[()]
assert type(values_left) is type(values_right)
if not isinstance(values_left, np.ndarray) or not isinstance(
values_right, np.ndarray
):
assert values_left == values_right, _cur_path
else:
assert values_left.shape == values_right.shape, _cur_path
# We use np.allclose instead of np.all(left==right)
# since some reduction operations might give a small difference every time.
# Tolerance of 1e-13 was decided since assertion starts failing at 1e-15
assert np.allclose(values_left, values_right, rtol=1e-13, atol=1e-13), (
_cur_path
)


def test_compare_output_file_with_frozen(tmp_path: pathlib.Path):
"""Test that the executable runs and returns the expected output."""

# Make a new output file from current implementation.
input_file = get_small_nmx_nexus()
output_file = tmp_path / "scipp_output_current.h5"
assert not output_file.exists()
config = ReductionConfig(
inputs=InputConfig(input_file=[input_file.as_posix()]),
workflow=WorkflowConfig(),
output=OutputConfig(
output_file=output_file.as_posix(),
compression=Compression.NONE,
skip_file_output=False,
),
)
with pytest.warns(RuntimeWarning, match="No crystal rotation*"):
_ = reduction(config=config)

entry_path = pathlib.Path('/entry')
excluded_paths = (
entry_path / 'reducer/program', # version should be different
)
ref_file_path = get_small_nmx_reduced()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would really like to find a good way to make updating reference solutions easy.
This has been one of the main reasons why I have not been motivated to introduce a whole load of regression tests, because of past experience where we just spent so much time constantly updating reference solutions.

Something like a flag when running pytest that, instead of failing the test, it would make a new file and suggest a patch to the data registry to make updating less cumbersome.

But this was mostly just to get a discussion going. It is beyond the scope of this PR.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agree...

with h5py.File(output_file) as cur_file:
with h5py.File(ref_file_path) as reference_file:
assert_h5obj_equal(reference_file, cur_file, excluded_paths=excluded_paths)
Loading