Skip to content

Commit ba22dde

Browse files
committed
Fix HDF5 serialization for datetime and date objects, resolve FilesOpenItem serialization bug, and enhance HDF5 serialization tests
1 parent 85c501f commit ba22dde

6 files changed

Lines changed: 93 additions & 29 deletions

File tree

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,25 @@
200200

201201
🛠️ Bug fixes:
202202

203+
* Fixed HDF5 serialization and deserialization for datetime and date objects:
204+
* Previously, datetime and date objects were serialized as numerical values (timestamp for datetime, ordinal for date) but were not properly restored as the original object types upon deserialization.
205+
* This caused `datetime.datetime` objects to be restored as `float` values and `datetime.date` objects to be restored as `int` values.
206+
* The fix ensures that these temporal objects are now correctly restored as their original types, maintaining data integrity across save/load cycles.
207+
* Updated the HDF5Reader to detect and convert numerical values back to datetime/date objects when appropriate.
208+
* This affects all DataSet instances containing `DateItem` or `DateTimeItem` objects that are saved to and loaded from HDF5 files.
209+
210+
* Fixed `FilesOpenItem` serialization bug in HDF5 files:
211+
* Previously, when serializing file paths in `FilesOpenItem`, the paths were encoded to UTF-8 bytes but not properly decoded during deserialization.
212+
* This caused file paths to be incorrectly restored as lists of individual characters instead of complete path strings.
213+
* The fix ensures that file paths are properly decoded from bytes to strings during HDF5 deserialization.
214+
* This resolves data corruption issues when saving and loading datasets containing multiple file selections.
215+
216+
* Enhanced HDF5 serialization test to prevent regressions:
217+
* The automatic unit test for HDF5 serialization (`test_loadsave_hdf5.py`) now properly validates dataset integrity after serialization/deserialization cycles.
218+
* Previously, the test could pass even when values were corrupted during the save/load process due to improper initialization.
219+
* The test now explicitly sets all items to `None` after creating the target dataset, ensuring that deserialized values truly come from the HDF5 file rather than from default initialization.
220+
* This improvement helps catch serialization bugs early and prevents future regressions in HDF5 I/O functionality.
221+
203222
* Fixed font hinting preference in `RotatedLabel` initialization for improved text rendering
204223

205224
* Fixed dataset corruption in `DataSetShowGroupBox.get()` when updating widgets with dependencies:

guidata/dataset/dataitems.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -906,17 +906,20 @@ def __init__(
906906
) -> None:
907907
if isinstance(default, str):
908908
default = [default]
909-
super().__init__(
909+
StringItem.__init__(
910+
self,
910911
label,
911-
formats=formats,
912912
default=default,
913-
basedir=basedir,
914-
all_files_first=all_files_first,
915913
regexp=regexp,
916914
help=help,
917915
check=check,
918916
allow_none=allow_none,
919917
)
918+
if isinstance(formats, str):
919+
formats = [formats] # type:ignore
920+
self.set_prop("data", formats=formats)
921+
self.set_prop("data", basedir=basedir)
922+
self.set_prop("data", all_files_first=all_files_first)
920923
self.set_prop("display", func=self.paths_basename)
921924

922925
@staticmethod
@@ -955,6 +958,8 @@ def serialize(
955958
) -> None:
956959
"""Serialize this item"""
957960
value = self.get_value(instance)
961+
if value is not None and not isinstance(value, (tuple, list)):
962+
value = [value]
958963
writer.write_sequence([fname.encode("utf-8") for fname in value])
959964

960965
def get_value_from_reader(

guidata/io/h5fmt.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from __future__ import annotations
1111

1212
import datetime
13+
import numbers
1314
import sys
1415
from collections.abc import Callable, Sequence
1516
from typing import Any
@@ -474,10 +475,10 @@ def write(self, val: Any, group_name: str | None = None) -> None:
474475
group = self.get_parent_group()
475476
try:
476477
group.attrs[self.option[-1]] = val
477-
except TypeError:
478+
except TypeError as exc:
478479
raise NotImplementedError(
479480
"cannot serialize %r of type %r" % (val, type(val))
480-
)
481+
) from exc
481482

482483
if group_name:
483484
self.end(group_name)
@@ -583,6 +584,32 @@ class NoDefault:
583584
pass
584585

585586

587+
def infer_datetime(value: Any) -> Any:
588+
"""Infer if a numeric value represents a datetime object.
589+
590+
Args:
591+
value: The value to check.
592+
593+
Returns:
594+
The inferred datetime object if applicable, otherwise the original value.
595+
"""
596+
if isinstance(value, (numbers.Real, np.generic)):
597+
# Convert NumPy scalars to native Python types for consistency
598+
value = value.item() if isinstance(value, np.generic) else value
599+
600+
# datetime.datetime: seconds since epoch
601+
if isinstance(value, float) and 1e8 < value < 2e9:
602+
return datetime.datetime.fromtimestamp(value)
603+
604+
# datetime.date: ordinal days (typical range)
605+
if isinstance(value, numbers.Integral) and 50000 < value < 1000000:
606+
try:
607+
return datetime.date.fromordinal(value)
608+
except ValueError:
609+
pass
610+
return value
611+
612+
586613
class HDF5Reader(HDF5Handler):
587614
"""
588615
Reader for HDF5 files. Inherits from HDF5Handler.
@@ -641,7 +668,18 @@ def read(
641668
self.end(group_name)
642669
return val
643670

644-
def read_any(self) -> str | bytes:
671+
def read_any(
672+
self,
673+
) -> (
674+
str
675+
| bytes
676+
| int
677+
| float
678+
| datetime.date
679+
| datetime.datetime
680+
| list[Any]
681+
| np.ndarray
682+
):
645683
"""
646684
Read a value from the current group as a generic type.
647685
@@ -659,8 +697,7 @@ def read_any(self) -> str | bytes:
659697
value = self.read_sequence()
660698
if isinstance(value, bytes):
661699
return value.decode("utf-8")
662-
else:
663-
return value
700+
return infer_datetime(value)
664701

665702
def read_bool(self) -> bool | None:
666703
"""
@@ -756,7 +793,7 @@ def read_dict(self) -> dict[str, Any]:
756793
for key, value in dict_group.attrs.items():
757794
if key == DICT_NAME:
758795
continue
759-
dict_val[key] = value
796+
dict_val[key] = infer_datetime(value)
760797
for key in dict_group:
761798
with self.group(key):
762799
try:

guidata/tests/dataset/test_all_features.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
class SubDataSet(gds.DataSet):
3535
dir = gds.DirectoryItem("Directory", TEMPDIR)
3636
fname = gds.FileOpenItem("Single file (open)", ("csv", "eta"), FILE_CSV.name)
37-
fnames = gds.FilesOpenItem("Multiple files", "csv", FILE_CSV.name)
37+
fnames = gds.FilesOpenItem("Multiple files", "csv", [FILE_CSV.name])
3838
fname_s = gds.FileSaveItem("Single file (save)", "eta", FILE_ETA.name)
3939

4040

guidata/tests/dataset/test_all_items.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ class Parameters(gds.DataSet):
8282
as well as special characters (α, β, γ, δ, ...)
8383
"""
8484

85-
dir = gds.DirectoryItem("Directory")
86-
preview = gds.TextItem("File names preview")
85+
dir = gds.DirectoryItem("Directory", default=TEMPDIR)
86+
preview = gds.TextItem("File names preview", default="-")
8787
option = gds.ChoiceItem("Option", (("1", "first choice"), ("2", "second choice")))
8888

8989
fname = gds.FileOpenItem("Open file", ("csv", "eta"), FILE_CSV.name)
90-
fnames = gds.FilesOpenItem("Open files", "csv", FILE_CSV.name)
90+
fnames = gds.FilesOpenItem("Open files", "csv", [FILE_CSV.name])
9191
fname_s = gds.FileSaveItem("Save file", "eta", FILE_ETA.name)
92-
string = gds.StringItem("String")
93-
text = gds.TextItem("Text")
92+
string = gds.StringItem("String", default="default string !?")
93+
text = gds.TextItem("Text", default="default\nmultiline\ntext")
9494
float_slider = gds.FloatItem(
9595
"Float (with slider)", default=0.5, min=0, max=1, step=0.01, slider=True
9696
)

guidata/tests/dataset/test_loadsave_hdf5.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313

1414
# guitest: show
1515

16-
import os
16+
import os.path as osp
17+
import tempfile
1718

19+
from guidata.dataset import assert_datasets_equal
1820
from guidata.env import execenv
1921
from guidata.io import HDF5Reader, HDF5Writer
2022
from guidata.qthelpers import qt_app_context
@@ -23,28 +25,29 @@
2325

2426
def test_loadsave_hdf5():
2527
"""Test HDF5 I/O"""
26-
fname = "test.h5"
27-
with qt_app_context():
28-
if os.path.exists(fname):
29-
os.unlink(fname)
28+
with tempfile.TemporaryDirectory() as temp_dir:
29+
fname = osp.join(temp_dir, "test.h5")
30+
with qt_app_context():
31+
p1 = Parameters()
32+
# p1.edit()
3033

31-
p1 = Parameters()
32-
if execenv.unattended or p1.edit():
34+
# Save to HDF5 file
3335
writer = HDF5Writer(fname)
3436
p1.serialize(writer)
3537
writer.close()
3638

3739
p2 = Parameters()
40+
# Set all items to None for testing purposes:
41+
for item in p2._items:
42+
item.__set__(p2, None)
43+
44+
# Load from HDF5 file
3845
reader = HDF5Reader(fname)
3946
p2.deserialize(reader)
4047
reader.close()
41-
p2.edit()
42-
os.unlink(fname)
4348

44-
# TODO: Uncomment this part of the test, and make it work!
45-
# if execenv.unattended:
46-
# assert_datasets_equal(p1, p2, "Parameters do not match after HDF5 I/O")
47-
execenv.print("OK")
49+
assert_datasets_equal(p1, p2, "Parameters do not match after HDF5 I/O")
50+
execenv.print("OK")
4851

4952

5053
if __name__ == "__main__":

0 commit comments

Comments
 (0)