@@ -186,6 +186,14 @@ def test_decode_unicode_array(self):
186186 assert result [0 ] == "hello"
187187 assert result [1 ] == "world"
188188
189+ def test_decode_non_ascii_bytes_array (self ):
190+ """Test decoding non-ASCII UTF-8 bytes without crashing."""
191+ arr = np .array (
192+ ["β-cell" .encode ("utf-8" ), "μglia" .encode ("utf-8" )], dtype = object
193+ )
194+ result = decode_str_array (arr )
195+ assert list (result ) == ["β-cell" , "μglia" ]
196+
189197
190198class TestReadCategoricalColumn :
191199 """Tests for read_categorical_column function."""
@@ -252,6 +260,38 @@ def test_col_chunk_not_found(self, sample_h5ad_file):
252260 with pytest .raises (RuntimeError , match = "not found in group" ):
253261 col_chunk_as_strings (f ["obs" ], "nonexistent" , 0 , 5 , cache )
254262
263+ def test_col_chunk_multiple_categorical_columns_keep_values (self , temp_dir ):
264+ """Test categorical cache does not leak across columns."""
265+ file_path = temp_dir / "multi_categorical.h5ad"
266+
267+ with h5py .File (file_path , "w" ) as f :
268+ obs = f .create_group ("obs" )
269+ obs .attrs ["_index" ] = "obs_names"
270+ obs .create_dataset (
271+ "obs_names" , data = np .array (["c1" , "c2" , "c3" ], dtype = "S" )
272+ )
273+
274+ age = obs .create_group ("age" )
275+ age .attrs ["encoding-type" ] = "categorical"
276+ age .create_dataset ("categories" , data = np .array (["5.0" , "6.0" ], dtype = "S" ))
277+ age .create_dataset ("codes" , data = np .array ([0 , 1 , 0 ], dtype = np .int8 ))
278+
279+ cell_type = obs .create_group ("cell_type" )
280+ cell_type .attrs ["encoding-type" ] = "categorical"
281+ cell_type .create_dataset (
282+ "categories" ,
283+ data = np .array (["Neuron" , "β-cell" ], dtype = object ),
284+ )
285+ cell_type .create_dataset ("codes" , data = np .array ([1 , 0 , 1 ], dtype = np .int8 ))
286+
287+ with h5py .File (file_path , "r" ) as f :
288+ cache = {}
289+ age_values = col_chunk_as_strings (f ["obs" ], "age" , 0 , 3 , cache )
290+ cell_type_values = col_chunk_as_strings (f ["obs" ], "cell_type" , 0 , 3 , cache )
291+
292+ assert age_values == ["5.0" , "6.0" , "5.0" ]
293+ assert cell_type_values == ["β-cell" , "Neuron" , "β-cell" ]
294+
255295
256296class TestLegacyV010Support :
257297 """Tests for legacy v0.1.0 format support."""
0 commit comments