Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
041af2d
Initial tests.
pp-mo Oct 24, 2025
65bd9dd
Get 'create_cf_data_variable' to call 'create_generic_cf_array_var': …
pp-mo Oct 25, 2025
d75a7a7
Reinstate decode on load, now in-Iris coded.
pp-mo Oct 28, 2025
07efc06
Revert and amend.
pp-mo Dec 7, 2025
2321077
Hack to preserve the existing order of attributes on saved Coords and…
pp-mo Oct 29, 2025
0174e53
Fix for dataless; avoid FUTURE global state change from temporary tests.
pp-mo Oct 29, 2025
035e28b
Further fix to attribute ordering.
pp-mo Oct 29, 2025
80c4776
Fixes for data packing.
pp-mo Oct 29, 2025
d4d3ebd
Latest test-chararrays.
pp-mo Dec 7, 2025
3f10cc1
Fix search+replace error.
pp-mo Dec 7, 2025
ee2fe4c
Tiny fix in crucial place! (merge error?).
pp-mo Jan 14, 2026
744826d
Extra mock property prevents weird test crashes.
pp-mo Jan 14, 2026
a3e1217
Fix another mock problem.
pp-mo Jan 14, 2026
1a4f2f2
Initial dataset wrappers.
pp-mo Oct 31, 2025
0148f43
Various notes, choices + changes: Beginnings of encoded-dataset testing.
pp-mo Dec 3, 2025
20a5be2
Replace use of encoding functions with test-specific function: Test f…
pp-mo Dec 5, 2025
9b621bf
Radically simplify 'make_bytesarray', by using a known specified byte…
pp-mo Dec 5, 2025
b366fd2
Add read tests.
pp-mo Dec 5, 2025
cf048b2
Remove iris width control (not in this layer).
pp-mo Dec 5, 2025
e684d1d
more notes
pp-mo Dec 5, 2025
28b124c
Merge branch 'encoded_datasets' into chardata_plus_encoded_datasets
pp-mo Jan 19, 2026
a20cc45
Remove temporary test code.
pp-mo Jan 19, 2026
c995a8d
Use iris categorised warnings for unknown encodings.
pp-mo Jan 19, 2026
f118c18
Clarify the temporary load/save exercising tests (a bit).
pp-mo Jan 19, 2026
c8a27df
Use bytecoded_datasets in nc load+save, begin fixes.
pp-mo Jan 17, 2026
c4a31a4
Further attempt to satisfy warning cateogry checker.
pp-mo Jan 19, 2026
10831d7
Fix overlength error tests.
pp-mo Jan 19, 2026
042028e
Get temporary iris load/save exercises working (todo: proper tests).
pp-mo Jan 19, 2026
94b2b21
Put encoding information into separate converter class, for use in pr…
pp-mo Jan 21, 2026
c4b7936
First proper testing (reads working).
pp-mo Jan 21, 2026
ac3e687
Encoded reading ~working; new ideas for switching (untested).
pp-mo Jan 23, 2026
9ec31fb
Check loads when coords do/not share a string dim with data.
pp-mo Jan 27, 2026
9bdeb5d
Fix nondecoded reference loads in test_byecoded_datasets.
pp-mo Jan 27, 2026
54d7743
Test writing of string data: various encodings, from strings or bytes.
pp-mo Jan 27, 2026
6a37f62
Fix write proxy; tmp_path in stringdata tests; tidy stringdata tests.
pp-mo Jan 28, 2026
cf9594b
Fix for non-string data.
pp-mo Jan 28, 2026
ef11375
Pre-clear load problems.
pp-mo Jan 28, 2026
2dbdcba
Fix mock patches.
pp-mo Feb 27, 2026
a34ea09
Fix patches in test_CFReader.
pp-mo Feb 27, 2026
aa1fe03
Fix variable creation in odd cases.
pp-mo Feb 27, 2026
f5d50ee
Ignore attribute reordering in scaling-packed saves.
pp-mo Feb 27, 2026
b2c6d51
Fix test for refactored proxy constructor.
pp-mo Feb 27, 2026
dfd4d91
Fix get_cf_var_data to support vlen-string.
pp-mo Feb 27, 2026
274fae4
Add back new test results, folder removed in error.
pp-mo Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
),
)
if problem is not None:
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Skipping disallowed global attribute '{attr_name}' (see above error)"
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]


################################################################################
Expand Down Expand Up @@ -1536,14 +1536,14 @@ def build_and_add_dimension_coordinate(
)
if problem is not None:
coord_var_name = str(cf_coord_var.cf_name)
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Failed to create {coord_var_name} dimension coordinate:\n"
f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]
problem.handled = True

_ = _add_or_capture(
Expand Down Expand Up @@ -1643,9 +1643,13 @@ def _add_auxiliary_coordinate(

# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the coordinate being built.
common_dims = [
dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
]
coord_dims = cf_coord_var.dimensions
# if cf._is_str_dtype(cf_coord_var):
# coord_dims = coord_dims[:-1]
datavar_dims = engine.cf_var.dimensions
# if cf._is_str_dtype(engine.cf_var):
# datavar_dims = datavar_dims[:-1]
common_dims = [dim for dim in coord_dims if dim in datavar_dims]
data_dims = None
if common_dims:
# Calculate the offset of each common dimension.
Expand Down
113 changes: 66 additions & 47 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import iris.exceptions
import iris.fileformats._nc_load_rules.helpers as hh
from iris.fileformats.netcdf import _thread_safe_nc
from iris.fileformats.netcdf import _bytecoding_datasets, _thread_safe_nc
from iris.mesh.components import Connectivity
import iris.util
import iris.warnings
Expand Down Expand Up @@ -67,7 +67,9 @@

# NetCDF returns a different type for strings depending on Python version.
def _is_str_dtype(var):
return np.issubdtype(var.dtype, np.bytes_)
# N.B. use 'datatype' not 'dtype', to "look inside" variable wrappers which
# represent 'S1' type data as 'U<xx>'.
return isinstance(var.datatype, np.dtype) and np.issubdtype(var.datatype, np.bytes_)


################################################################################
Expand Down Expand Up @@ -788,50 +790,63 @@ def cf_label_data(self, cf_data_var):
% type(cf_data_var)
)

# Determine the name of the label string (or length) dimension by
# finding the dimension name that doesn't exist within the data dimensions.
str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))

if len(str_dim_name) != 1:
raise ValueError(
"Invalid string dimensions for CF-netCDF label variable %r"
% self.cf_name
)

str_dim_name = str_dim_name[0]
label_data = self[:]

if ma.isMaskedArray(label_data):
label_data = label_data.filled()

# Determine whether we have a string-valued scalar label
# i.e. a character variable that only has one dimension (the length of the string).
if self.ndim == 1:
label_string = b"".join(label_data).strip()
label_string = label_string.decode("utf8")
data = np.array([label_string])
else:
# Determine the index of the string dimension.
str_dim = self.dimensions.index(str_dim_name)

# Calculate new label data shape (without string dimension) and create payload array.
new_shape = tuple(
dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
)
string_basetype = "|U%d"
string_dtype = string_basetype % self.shape[str_dim]
data = np.empty(new_shape, dtype=string_dtype)

for index in np.ndindex(new_shape):
# Create the slice for the label data.
if str_dim == 0:
label_index = (slice(None, None),) + index
else:
label_index = index + (slice(None, None),)

label_string = b"".join(label_data[label_index]).strip()
label_string = label_string.decode("utf8")
data[index] = label_string
# # Determine the name of the label string (or length) dimension by
# # finding the dimension name that doesn't exist within the data dimensions.
# str_dim_names = list(set(self.dimensions) - set(cf_data_var.dimensions))
# n_nondata_dims = len(str_dim_names)
#
# if n_nondata_dims == 0:
# # *All* dims are shared with the data-variable.
# # This is only ok if the data-var is *also* a string type.
# dim_ok = _is_str_dtype(cf_data_var)
# # In this case, we must just *assume* that the last dimension is "the"
# # string dimension
# str_dim_name = self.dimensions[-1]
# else:
# # If there is exactly one non-data dim, that is the one we want
# dim_ok = len(str_dim_names) == 1
# (str_dim_name,) = str_dim_names
#
# if not dim_ok:
# raise ValueError(
# "Invalid string dimensions for CF-netCDF label variable %r"
# % self.cf_name
# )

data = self[:]
# label_data = self[:]
#
# if ma.isMaskedArray(label_data):
# label_data = label_data.filled(b"\0")
#
# # Determine whether we have a string-valued scalar label
# # i.e. a character variable that only has one dimension (the length of the string).
# if self.ndim == 1:
# label_string = b"".join(label_data).strip()
# label_string = label_string.decode("utf8")
# data = np.array([label_string])
# else:
# # Determine the index of the string dimension.
# str_dim = self.dimensions.index(str_dim_name)
#
# # Calculate new label data shape (without string dimension) and create payload array.
# new_shape = tuple(
# dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
# )
# string_basetype = "|U%d"
# string_dtype = string_basetype % self.shape[str_dim]
# data = np.empty(new_shape, dtype=string_dtype)
#
# for index in np.ndindex(new_shape):
# # Create the slice for the label data.
# if str_dim == 0:
# label_index = (slice(None, None),) + index
# else:
# label_index = index + (slice(None, None),)
#
# label_string = b"".join(label_data[label_index]).strip()
# label_string = label_string.decode("utf8")
# data[index] = label_string

return data

Expand Down Expand Up @@ -1361,7 +1376,11 @@ def __init__(self, file_source, warn=False, monotonic=False):
if isinstance(file_source, str):
# Create from filepath : open it + own it (=close when we die).
self._filename = os.path.expanduser(file_source)
self._dataset = _thread_safe_nc.DatasetWrapper(self._filename, mode="r")
if _bytecoding_datasets.DECODE_TO_STRINGS_ON_READ:
ds_type = _bytecoding_datasets.EncodedDataset
else:
ds_type = _thread_safe_nc.DatasetWrapper
self._dataset = ds_type(self._filename, mode="r")
self._own_file = True
else:
# We have been passed an open dataset.
Expand Down
Loading
Loading