From d16dc2b9ad2211bd732a8fc34407f72b69dc82c6 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 11 Feb 2026 13:25:33 +0000
Subject: [PATCH 1/5] Fixes to dataset equivalence testing on xarray loads.

---
 .../test_xarray_load_and_save_equivalence.py  | 41 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py
index 0030dca..8b532f0 100644
--- a/tests/integration/test_xarray_load_and_save_equivalence.py
+++ b/tests/integration/test_xarray_load_and_save_equivalence.py
@@ -6,6 +6,7 @@
 (2) check equivalence of files : xarray -> file VS xarray->ncdata->file
 """
 
+import numpy as np
 import pytest
 import xarray
 
@@ -13,7 +14,6 @@
 from ncdata.threadlock_sharing import lockshare_context
 from ncdata.utils import dataset_differences
 from ncdata.xarray import from_xarray, to_xarray
-
 from tests.data_testcase_schemas import (
     BAD_LOADSAVE_TESTCASES,
     session_testdir,
@@ -38,6 +38,35 @@ def use_xarraylock():
         yield
 
 
+def equivalence_fix_datasets(
+    ds_from: xarray.Dataset, ds_to: xarray.Dataset
+) -> (xarray.Dataset, xarray.Dataset):
+    """
+    Modify datasets in legitimate ways to make "ds_from.identical(ds_to)".
+
+    The key differences are due to coordinates remaining lazy in loading via ncdata, but
+    have data fetched in the "normal" load.
+    The coordinates apparently remain 'identical', but it affects the dataset indexes.
+
+    Minimum found necessary : where in 'ds_from' we find a lazy coordinate, which is a
+    real one in 'ds_to', remove the associated index from 'ds_to'.
+    """
+    drop_indices = []
+    for varname, var in ds_from.variables.items():
+        if hasattr(var.data, "compute"):
+            var_other = ds_to.variables.get(varname, None)
+            if isinstance(var_other.data, np.ndarray):
+                # This is lazy, but the reference var is real :  replace with real data.
+                if varname in ds_to.indexes:
+                    drop_indices.append(varname)
+
+    # NB drop_indexes is *not* an inplace operation!
+    # So replace returned 'ds_to' with new dataset.
+    ds_to = ds_to.drop_indexes(drop_indices)
+    # NB: as it currently is, we do *not* ever have to modify/replace 'ds_from'.
+    return ds_from, ds_to
+
+
 def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
     source_filepath = standard_testcase.filepath
     ncdata = from_nc4(source_filepath)
@@ -52,7 +81,15 @@ def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
     # Load same, via ncdata
     xr_ncdata_ds = to_xarray(ncdata)
 
-    # Treat as OK if it passes xarray comparison
+    # Check that datasets are "equal" : but NB this only compares values
+    assert xr_ds.equals(xr_ncdata_ds)
+
+    # 'Fix' equivalence, by making lazy vars real + removing missing indices.
+    # These are the expected differences due to ncdata passing lazy arrays.
+    # This should then make "Dataset.identical" true.
+    xr_ncdata_ds, xr_ds = equivalence_fix_datasets(
+        ds_from=xr_ncdata_ds, ds_to=xr_ds
+    )
     assert xr_ds.identical(xr_ncdata_ds)
 
 

From 08879a0d520e1dc242c5cb30fdba8f5c3b5114c4 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 11 Feb 2026 16:00:32 +0000
Subject: [PATCH 2/5] Added towncrier fragment.

---
 docs/changelog_fragments/195.dev.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/changelog_fragments/195.dev.rst

diff --git a/docs/changelog_fragments/195.dev.rst b/docs/changelog_fragments/195.dev.rst
new file mode 100644
index 0000000..e8a68a4
--- /dev/null
+++ b/docs/changelog_fragments/195.dev.rst
@@ -0,0 +1 @@
+Fixed xarray load tests for new behaviour of xarray.Dataset.identical.

From 38861c25db0d72bd1d38cec03b78a6d7b2c0d934 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 12 Feb 2026 17:12:48 +0000
Subject: [PATCH 3/5] Use own concept of 'dataset equivalence'.

---
 .../test_xarray_load_and_save_equivalence.py  | 81 +++++++++++--------
 1 file changed, 48 insertions(+), 33 deletions(-)

diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py
index 8b532f0..9290b62 100644
--- a/tests/integration/test_xarray_load_and_save_equivalence.py
+++ b/tests/integration/test_xarray_load_and_save_equivalence.py
@@ -38,33 +38,57 @@ def use_xarraylock():
         yield
 
 
-def equivalence_fix_datasets(
-    ds_from: xarray.Dataset, ds_to: xarray.Dataset
-) -> (xarray.Dataset, xarray.Dataset):
+def check_load_equivalence(ds1: xarray.Dataset, ds2: xarray.Dataset):
     """
-    Modify datasets in legitimate ways to make "ds_from.identical(ds_to)".
+    Check that datasets differ only in "expected" ways.
 
     The key differences are due to coordinates remaining lazy in loading via ncdata, but
-    have data fetched in the "normal" load.
-    The coordinates apparently remain 'identical', but it affects the dataset indexes.
-
-    Minimum found necessary : where in 'ds_from' we find a lazy coordinate, which is a
-    real one in 'ds_to', remove the associated index from 'ds_to'.
+    having real data in a "normal" load.  This also affects which coords have indexes,
+    but we are not checking that here anyway.
     """
-    drop_indices = []
-    for varname, var in ds_from.variables.items():
-        if hasattr(var.data, "compute"):
-            var_other = ds_to.variables.get(varname, None)
-            if isinstance(var_other.data, np.ndarray):
-                # This is lazy, but the reference var is real :  replace with real data.
-                if varname in ds_to.indexes:
-                    drop_indices.append(varname)
-
-    # NB drop_indexes is *not* an inplace operation!
-    # So replace returned 'ds_to' with new dataset.
-    ds_to = ds_to.drop_indexes(drop_indices)
-    # NB: as it currently is, we do *not* ever have to modify/replace 'ds_from'.
-    return ds_from, ds_to
+
+    def check_attrs_equivalent(attrs1, attrs2):
+        # Because dict-eq does not work when values can be arrays (!)
+        okay = set(attrs1.keys()) == set(attrs2.keys())
+        if okay:
+            for attr in attrs1:
+                okay = np.all(attrs1[attr] == attrs2[attr])
+                if not okay:
+                    break
+        assert okay
+
+    def check_vars_equivalent(v1, v2):
+        check_attrs_equivalent(v1.attrs, v2.attrs)
+        assert v1.dims == v2.dims
+        assert v1.dtype == v2.dtype
+        if v1.dtype.kind not in ("iufM"):
+            # Nonnumeric cases are relatively simple
+            result = np.all(v1.data == v2.data)
+        else:
+            # Numeric cases must allow for NaNs, which don't compare
+            d1, d2 = v1.data, v2.data
+            if d1.ndim == 0:
+                # awkward special case where indexing operations otherwise fail
+                d1, d2 = [a.reshape((a.size,)) for a in (d1, d2)]
+            data_diff = d1 - d2
+            # Account for NaN -or "NaT" for time types
+            data_diff = data_diff[np.logical_not(np.isnan(data_diff))]
+            # Note: not entirely happy with exact equality, but the time types make this
+            if data_diff.dtype.kind == "f":
+                # Slight tolerance on floats
+                result = np.allclose(data_diff, 0)
+            else:
+                # Exact equality - including time types, which allclose can't handle.
+                result = np.all(data_diff == 0)
+        if hasattr(result, "compute"):
+            result = result.compute()
+        assert result
+
+    check_attrs_equivalent(ds1.attrs, ds2.attrs)
+    assert ds1.dims == ds2.dims
+    assert list(ds1.variables) == list(ds2.variables)
+    for varname in ds1.variables:
+        check_vars_equivalent(ds1.variables[varname], ds2.variables[varname])
 
 
 def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
@@ -81,16 +105,7 @@ def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
     # Load same, via ncdata
     xr_ncdata_ds = to_xarray(ncdata)
 
-    # Check that datasets are "equal" : but NB this only compares values
-    assert xr_ds.equals(xr_ncdata_ds)
-
-    # 'Fix' equivalence, by making lazy vars real + removing missing indices.
-    # These are the expected differences due to ncdata passing lazy arrays.
-    # This should then make "Dataset.identical" true.
-    xr_ncdata_ds, xr_ds = equivalence_fix_datasets(
-        ds_from=xr_ncdata_ds, ds_to=xr_ds
-    )
-    assert xr_ds.identical(xr_ncdata_ds)
+    check_load_equivalence(xr_ds, xr_ncdata_ds)
 
 
 def test_save_direct_vs_viancdata(standard_testcase, tmp_path):

From 3a85ebbc2ca036f193815c7376581e0a2b266f3c Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 13 Feb 2026 18:19:40 +0000
Subject: [PATCH 4/5] Simplify xr.Dataset equivalence test.

---
 .../test_xarray_load_and_save_equivalence.py  | 25 +++++--------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py
index 9290b62..aa236db 100644
--- a/tests/integration/test_xarray_load_and_save_equivalence.py
+++ b/tests/integration/test_xarray_load_and_save_equivalence.py
@@ -14,6 +14,7 @@
 from ncdata.threadlock_sharing import lockshare_context
 from ncdata.utils import dataset_differences
 from ncdata.xarray import from_xarray, to_xarray
+
 from tests.data_testcase_schemas import (
     BAD_LOADSAVE_TESTCASES,
     session_testdir,
@@ -61,25 +62,11 @@ def check_vars_equivalent(v1, v2):
         check_attrs_equivalent(v1.attrs, v2.attrs)
         assert v1.dims == v2.dims
         assert v1.dtype == v2.dtype
-        if v1.dtype.kind not in ("iufM"):
-            # Nonnumeric cases are relatively simple
-            result = np.all(v1.data == v2.data)
-        else:
-            # Numeric cases must allow for NaNs, which don't compare
-            d1, d2 = v1.data, v2.data
-            if d1.ndim == 0:
-                # awkward special case where indexing operations otherwise fail
-                d1, d2 = [a.reshape((a.size,)) for a in (d1, d2)]
-            data_diff = d1 - d2
-            # Account for NaN -or "NaT" for time types
-            data_diff = data_diff[np.logical_not(np.isnan(data_diff))]
-            # Note: not entirely happy with exact equality, but the time types make this
-            if data_diff.dtype.kind == "f":
-                # Slight tolerance on floats
-                result = np.allclose(data_diff, 0)
-            else:
-                # Exact equality - including time types, which allclose can't handle.
-                result = np.all(data_diff == 0)
+        # Numeric compare may need to allow for NaNs : floats *and datetimes*
+        equal_nan = (
+            v1.dtype.kind in "fM"
+        )  # cannot set kwarg when not applicable
+        result = np.array_equal(v1.data, v2.data, equal_nan=equal_nan)
         if hasattr(result, "compute"):
             result = result.compute()
         assert result

From bcc5ed6fa95cc6cea6e627e8d2d2785df15d0153 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 13 Feb 2026 18:25:44 +0000
Subject: [PATCH 5/5] Reconfigure docs to ref SciTools.

---
 docs/conf.py   |  6 +++---
 pyproject.toml | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 32f3f87..f354243 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -19,8 +19,8 @@
 # -- Project information -----------------------------------------------------
 
 project = "ncdata"
-copyright = "2023, pp-mo"
-author = "pp-mo"
+copyright = "2023, SciTools"
+author = "SciTools"
 
 # The complete version, including alpha/beta/rc tags
 version_parts = [str(part) for part in version_tuple]
@@ -149,7 +149,7 @@
 html_context = {
     # Possibly needed for pydata_theme?
     "github_repo": "ncdata",
-    "github_user": "pp-mo",
+    "github_user": "SciTools",
     "github_version": "main",
     "doc_path": "docs",
     # Default light/dark mode.
diff --git a/pyproject.toml b/pyproject.toml
index 8043c4e..da7d6af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ncdata"
 authors = [
-    {name = "Patrick Peglar", email = "patrick.peglar@metoffice.gov.uk"},
+    {name = "Iris Contributors", email = "scitools.pub@gmail.com"}
 ]
 description = "Abstract NetCDF data objects, providing fast data transfer between analysis packages."
 requires-python = ">=3.10"
@@ -50,10 +50,10 @@ dependencies = ["numpy", "dask", "netCDF4"]
 
 
 [project.urls]
-Code = "https://github.com/pp-mo/ncdata"
-Discussions = "https://github.com/pp-mo/ncdata/discussions"
+Code = "https://github.com/SciTools/ncdata"
+Discussions = "https://github.com/SciTools/ncdata/discussions"
 Documentation = "https://ncdata.readthedocs.io"
-Issues = "https://github.com/pp-mo/ncdata/issues"
+Issues = "https://github.com/SciTools/ncdata/issues"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
@@ -88,7 +88,7 @@ package = "ncdata"
 package_dir = "lib"
 directory = "docs/changelog_fragments"
 filename = "docs/change_log.rst"
-issue_format = "`ISSUE#{issue} <https://github.com/pp-mo/ncdata/pull/{issue}>`_"
+issue_format = "`ISSUE#{issue} <https://github.com/SciTools/ncdata/pull/{issue}>`_"
 underlines = ["~", "^", "*", "+"]
 
 [[tool.towncrier.type]]