Skip to content

Commit 804ff4f

Browse files
committed
Add tests for decoding non-ASCII bytes and handling multiple categorical columns
1 parent 962ecfb commit 804ff4f

1 file changed

Lines changed: 40 additions & 0 deletions

File tree

tests/test_info_read.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,14 @@ def test_decode_unicode_array(self):
186186
assert result[0] == "hello"
187187
assert result[1] == "world"
188188

189+
def test_decode_non_ascii_bytes_array(self):
190+
"""Test decoding non-ASCII UTF-8 bytes without crashing."""
191+
arr = np.array(
192+
["β-cell".encode("utf-8"), "μglia".encode("utf-8")], dtype=object
193+
)
194+
result = decode_str_array(arr)
195+
assert list(result) == ["β-cell", "μglia"]
196+
189197

190198
class TestReadCategoricalColumn:
191199
"""Tests for read_categorical_column function."""
@@ -252,6 +260,38 @@ def test_col_chunk_not_found(self, sample_h5ad_file):
252260
with pytest.raises(RuntimeError, match="not found in group"):
253261
col_chunk_as_strings(f["obs"], "nonexistent", 0, 5, cache)
254262

263+
def test_col_chunk_multiple_categorical_columns_keep_values(self, temp_dir):
264+
"""Test categorical cache does not leak across columns."""
265+
file_path = temp_dir / "multi_categorical.h5ad"
266+
267+
with h5py.File(file_path, "w") as f:
268+
obs = f.create_group("obs")
269+
obs.attrs["_index"] = "obs_names"
270+
obs.create_dataset(
271+
"obs_names", data=np.array(["c1", "c2", "c3"], dtype="S")
272+
)
273+
274+
age = obs.create_group("age")
275+
age.attrs["encoding-type"] = "categorical"
276+
age.create_dataset("categories", data=np.array(["5.0", "6.0"], dtype="S"))
277+
age.create_dataset("codes", data=np.array([0, 1, 0], dtype=np.int8))
278+
279+
cell_type = obs.create_group("cell_type")
280+
cell_type.attrs["encoding-type"] = "categorical"
281+
cell_type.create_dataset(
282+
"categories",
283+
data=np.array(["Neuron", "β-cell"], dtype=object),
284+
)
285+
cell_type.create_dataset("codes", data=np.array([1, 0, 1], dtype=np.int8))
286+
287+
with h5py.File(file_path, "r") as f:
288+
cache = {}
289+
age_values = col_chunk_as_strings(f["obs"], "age", 0, 3, cache)
290+
cell_type_values = col_chunk_as_strings(f["obs"], "cell_type", 0, 3, cache)
291+
292+
assert age_values == ["5.0", "6.0", "5.0"]
293+
assert cell_type_values == ["β-cell", "Neuron", "β-cell"]
294+
255295

256296
class TestLegacyV010Support:
257297
"""Tests for legacy v0.1.0 format support."""

0 commit comments

Comments
 (0)