From 847e4ebc2cd10db80a10367c73092ea2debd5849 Mon Sep 17 00:00:00 2001
From: C1-BA-B1-F3 <C1-BA-B1-F3@users.noreply.github.com>
Date: Fri, 26 Jun 2026 10:00:49 +0800
Subject: [PATCH 1/4] fix: cache scipy flush_only_netcdf_file class to fix
 pickle identity (GH#11323)

The  class was defined inside `_open_scipy_netcdf()`,
so each call created a new class object. After opening two scipy-backed
datasets from file-like objects, the first dataset's class reference became
unreachable by qualname, causing pickle's class-identity check to fail with:

    PicklingError: Can't pickle
    <class 'xarray.backends.scipy_._PickleWorkaround.flush_only_netcdf_file'>:
    it's not the same object as
    xarray.backends.scipy_._PickleWorkaround.flush_only_netcdf_file

Fix: create the class once in `_get_flush_only_class()`, set its
`__qualname__` to a module-level name, and register it as a module
attribute so pickle can always resolve it.

Regression test included.
---
 xarray/backends/scipy_.py     | 83 +++++++++++++++++++----------------
 xarray/tests/test_backends.py | 15 +++++++
 2 files changed, 61 insertions(+), 37 deletions(-)
diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
index 9d5f33e8947..39bac7432f7 100644
--- a/xarray/backends/scipy_.py
+++ b/xarray/backends/scipy_.py
@@ -122,16 +122,51 @@ def __setitem__(self, key, value):
                     raise
 
 
-# This is a dirty workaround to allow pickling of the flush_only_netcdf_file class.
-# https://stackoverflow.com/questions/72766345/attributeerror-cant-pickle-local-object-in-multiprocessing
-# TODO: Remove this after upstreaming the fixes to scipy.
-class _PickleWorkaround:
-    flush_only_netcdf_file: type[scipy.io.netcdf_file]
-
-    @classmethod
-    def add_cls(cls, new_class: type[Any]) -> None:
-        setattr(cls, new_class.__name__, new_class)
-        new_class.__qualname__ = cls.__qualname__ + "." + new_class.__name__
+# Cached class created once so its identity is stable for pickle.
+# The class must not be re-created on each call to _open_scipy_netcdf;
+# otherwise pickle sees a different class object when looking up the
+# qualname and raises PicklingError (GH#11323).
+#
+# We set __qualname__ to a module-level name so pickle can always
+# resolve the class via ``xarray.backends.scipy_.flush_only_netcdf_file``.
+_flush_only_class: type[Any] | None = None
+
+
+def _get_flush_only_class() -> type[Any]:
+    global _flush_only_class
+    if _flush_only_class is None:
+        import scipy.io
+
+        # TODO: Remove this after upstreaming these fixes.
+        class flush_only_netcdf_file(scipy.io.netcdf_file):
+            # scipy.io.netcdf_file.close() incorrectly closes file objects that
+            # were passed in as constructor arguments:
+            # https://github.com/scipy/scipy/issues/13905
+
+            # Instead of closing such files, only call flush(), which is
+            # equivalent as long as the netcdf_file object is not mmapped.
+            # This suffices to keep BytesIO objects open long enough to read
+            # their contents from to_netcdf(), but underlying files still get
+            # closed when the netcdf_file is garbage collected (via __del__),
+            # and will need to be fixed upstream in scipy.
+            def close(self):
+                if hasattr(self, "fp") and not self.fp.closed:
+                    self.flush()
+                    self.fp.seek(0)  # allow file to be read again
+
+            def __del__(self):
+                # Remove the __del__ method, which in scipy is aliased to close().
+                # These files need to be closed explicitly by xarray.
+                pass
+
+        flush_only_netcdf_file.__qualname__ = "flush_only_netcdf_file"
+        _flush_only_class = flush_only_netcdf_file
+        # Make the class accessible as a module attribute so pickle can
+        # resolve it by qualname ``xarray.backends.scipy_.flush_only_netcdf_file``.
+        import sys
+
+        sys.modules[__name__].flush_only_netcdf_file = _flush_only_class
+    return _flush_only_class
 
 
 def _open_scipy_netcdf(
@@ -143,33 +178,7 @@ def _open_scipy_netcdf(
 ) -> scipy.io.netcdf_file:
     import scipy.io
 
-    # TODO: Remove this after upstreaming these fixes.
-    class flush_only_netcdf_file(scipy.io.netcdf_file):
-        # scipy.io.netcdf_file.close() incorrectly closes file objects that
-        # were passed in as constructor arguments:
-        # https://github.com/scipy/scipy/issues/13905
-
-        # Instead of closing such files, only call flush(), which is
-        # equivalent as long as the netcdf_file object is not mmapped.
-        # This suffices to keep BytesIO objects open long enough to read
-        # their contents from to_netcdf(), but underlying files still get
-        # closed when the netcdf_file is garbage collected (via __del__),
-        # and will need to be fixed upstream in scipy.
-        def close(self):
-            if hasattr(self, "fp") and not self.fp.closed:
-                self.flush()
-                self.fp.seek(0)  # allow file to be read again
-
-        def __del__(self):
-            # Remove the __del__ method, which in scipy is aliased to close().
-            # These files need to be closed explicitly by xarray.
-            pass
-
-    _PickleWorkaround.add_cls(flush_only_netcdf_file)
-
-    netcdf_file = (
-        _PickleWorkaround.flush_only_netcdf_file if flush_only else scipy.io.netcdf_file
-    )
+    netcdf_file = _get_flush_only_class() if flush_only else scipy.io.netcdf_file
 
     # if the string ends with .gz, then gunzip and open as netcdf file
     if isinstance(filename, str) and filename.endswith(".gz"):
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 4e08b71260b..33a9e3c9deb 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4579,6 +4579,21 @@ def roundtrip(
         with self.open(saved, **open_kwargs) as ds:
             yield ds
 
+    def test_pickle_after_multiple_opens_from_bytes(self) -> None:
+        # Regression test for GH#11323: opening two scipy-backed datasets
+        # from BytesIO objects would overwrite the cached flush_only class,
+        # making the first dataset unpicklable.
+        original = Dataset({"foo": ("x", [1, 2, 3])})
+        netcdf_bytes = bytes(original.to_netcdf(engine=self.engine))
+        ds1 = open_dataset(BytesIO(netcdf_bytes), engine=self.engine)
+        ds2 = open_dataset(BytesIO(netcdf_bytes), engine=self.engine)
+        try:
+            with pickle.loads(pickle.dumps(ds1)) as unpickled:
+                assert_identical(unpickled, original)
+        finally:
+            ds1.close()
+            ds2.close()
+
     @pytest.mark.asyncio
     @pytest.mark.skip(reason="NetCDF backends don't support async loading")
     async def test_load_async(self) -> None:

From 037a8d31b7fe9c89763a48929b99371338bee3f4 Mon Sep 17 00:00:00 2001
From: CI Bot <ci@xarray.dev>
Date: Fri, 26 Jun 2026 16:04:11 +0800
Subject: [PATCH 2/4] fix: add type ignore for dynamic module attribute
 assignment

The line
dynamically adds an attribute to the module for pickle resolution.
Mypy cannot track this pattern, so we add .

Fixes mypy error:
  xarray/backends/scipy_.py:168: error: Module has no attribute
  "flush_only_netcdf_file" [attr-defined]
---
 xarray/backends/scipy_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
index 39bac7432f7..13dff16aee3 100644
--- a/xarray/backends/scipy_.py
+++ b/xarray/backends/scipy_.py
@@ -165,7 +165,7 @@ def __del__(self):
         # resolve it by qualname ``xarray.backends.scipy_.flush_only_netcdf_file``.
         import sys
 
-        sys.modules[__name__].flush_only_netcdf_file = _flush_only_class
+        sys.modules[__name__].flush_only_netcdf_file = _flush_only_class  # type: ignore[attr-defined]
     return _flush_only_class
 
 

From a2cb58d8c70d27438db7aa2d04a71e6b4b462918 Mon Sep 17 00:00:00 2001
From: Fix 11417 <fix-11417@users.noreply.github.com>
Date: Fri, 26 Jun 2026 20:06:53 +0800
Subject: [PATCH 3/4] fix: preserve scalar variables in reduce operations
 (GH#11417)

When calling reduce operations like sum/mean on a Dataset with scalar
(non-dimensional) data variables, the reduce_maybe_single logic would
set axis=None for 0-d variables with no matching reduce dims. This
caused numpy to attempt reduction on the scalar value itself, which
failed for non-numeric types like strings.

The fix adds a check that reduce_dims is non-empty before setting
reduce_maybe_single=None. When reduce_dims is empty (variable doesn't
have the target dimension), reduce_maybe_single stays as [] (empty
list), which triggers the invariant_0d check in duck_array_ops and
returns the scalar value unchanged.

Closes #11417
---
 xarray/core/dataset.py       |  4 +++-
 xarray/tests/test_dataset.py | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 1ce84904623..bf4fae2922a 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6966,7 +6966,9 @@ def reduce(
                 # keep single-element dims as list, to support Hashables
                 reduce_maybe_single = (
                     None
-                    if len(reduce_dims) == var.ndim and var.ndim != 1
+                    if reduce_dims
+                    and len(reduce_dims) == var.ndim
+                    and var.ndim != 1
                     else reduce_dims
                 )
                 variables[name] = var.reduce(
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 762c647f15c..c95816ca904 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6361,6 +6361,31 @@ def test_reduce_scalars(self) -> None:
         actual = ds.var("a")
         assert_identical(expected, actual)
 
+    def test_reduce_string_scalar(self) -> None:
+        # regression test for GH#11417
+        # scalar variables without the reduce dim should be preserved
+        ds = Dataset(
+            data_vars={
+                "a": (["index"], [1, 2, 3]),
+                "d": ([], "hello"),
+            }
+        )
+        expected = Dataset({"a": 6, "d": "hello"})
+        actual = ds.sum("index")
+        assert_identical(expected, actual)
+
+        expected = Dataset({"a": 2.0, "d": "hello"})
+        actual = ds.mean("index")
+        assert_identical(expected, actual)
+
+        expected = Dataset({"a": 1, "d": "hello"})
+        actual = ds.min("index")
+        assert_identical(expected, actual)
+
+        expected = Dataset({"a": 3, "d": "hello"})
+        actual = ds.max("index")
+        assert_identical(expected, actual)
+
     def test_reduce_only_one_axis(self) -> None:
         def mean_only_one_axis(x, axis):
             if not isinstance(axis, integer_types):

From 11415677e87e0c26e38d2672ba5679779d0575ce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 26 Jun 2026 12:08:02 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/core/dataset.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index bf4fae2922a..e40b9a7b104 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6966,9 +6966,7 @@ def reduce(
                 # keep single-element dims as list, to support Hashables
                 reduce_maybe_single = (
                     None
-                    if reduce_dims
-                    and len(reduce_dims) == var.ndim
-                    and var.ndim != 1
+                    if reduce_dims and len(reduce_dims) == var.ndim and var.ndim != 1
                     else reduce_dims
                 )
                 variables[name] = var.reduce(