From be22083143edb797ee8e4e6fbf2698627b425dca Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 4 Feb 2025 22:48:23 +0800
Subject: [PATCH 001/129] added objid functions

---
 pyproject.toml          |   7 +-
 src/h5json/__init__.py  |   8 +
 src/h5json/hdf5db.py    |  21 +-
 src/h5json/objid.py     | 485 ++++++++++++++++++++++++++++++++++++++++
 test/unit/objid_test.py | 199 +++++++++++++++++
 5 files changed, 707 insertions(+), 13 deletions(-)
 create mode 100644 src/h5json/objid.py
 create mode 100755 test/unit/objid_test.py

diff --git a/pyproject.toml b/pyproject.toml
index bcba8205..5ddb024f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,17 +19,18 @@ authors = [{ "name" = "The HDF Group", "email" = "help@hdfgroup.org" }]
 keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
 requires-python = ">=3.8"
 dependencies = [
-    "h5py >=3.10",
+    "h5py >= 3.10",
     "numpy >= 2.0; python_version>='3.9'",
     "jsonschema >=4.4.0",
     "tomli; python_version<'3.11'",
     "numpy >=1.20,<2.0.0; python_version=='3.8'",
 ]
+
 dynamic = ["version"]
 
 [project.urls]
-Homepage = "https://hdf5-json.readthedocs.io"
-Documentation = "https://hdf5-json.readthedocs.io"
+Homepage = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
+Documentation = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
 Source = "https://github.com/HDFGroup/hdf5-json"
 "Bug Reports" = "https://github.com/HDFGroup/hdf5-json/issues"
 Social = "https://twitter.com/hdf5"
diff --git a/src/h5json/__init__.py b/src/h5json/__init__.py
index 704d2411..d4a7f781 100644
--- a/src/h5json/__init__.py
+++ b/src/h5json/__init__.py
@@ -21,6 +21,14 @@
 from .hdf5dtype import getTypeResponse
 from .hdf5dtype import getItemSize
 from .hdf5dtype import createDataType
+from .objid import createObjId
+from .objid import getCollectionForId
+from .objid import isObjId
+from .objid import isS3ObjKey
+from .objid import getS3Key
+from .objid import getObjId
+from .objid import isSchema2Id
+from .objid import isRootObjId
 from .hdf5db import Hdf5db
 from . import _version
 
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 27f20946..676dbef5 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -19,6 +19,7 @@
 import json
 import logging
 from .hdf5dtype import getTypeItem, createDataType, getItemSize
+from .objid import createObjId
 from .apiversion import _apiver
 
 
@@ -561,7 +562,7 @@ def initFile(self):
 
         self.log.info("initializing file")
         if not self.root_uuid:
-            self.root_uuid = str(uuid.uuid1())
+            self.root_uuid = createObjId()
         self.dbGrp.attrs["rootUUID"] = self.root_uuid
         self.dbGrp.create_group("{groups}")
         self.dbGrp.create_group("{datasets}")
@@ -593,21 +594,21 @@ def visit(self, path, obj):
             msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file"
             self.log.error(msg)
             raise IOError(errno.EIO, msg)
-        uuid1 = uuid.uuid1()  # create uuid
-        id = str(uuid1)
+        obj_id = createObjId()  # create uuid
+
         addrGrp = self.dbGrp["{addr}"]
         if not self.readonly:
             # storing db in the file itself, so we can link to the object directly
-            col[id] = obj.ref  # save attribute ref to object
+            col[obj_id] = obj.ref  # save attribute ref to object
         else:
             # store path to object
-            col[id] = obj.name
+            col[obj_id] = obj.name
         addr = h5py.h5o.get_info(obj.id).addr
         # store reverse map as an attribute
-        addrGrp.attrs[str(addr)] = id
+        addrGrp.attrs[str(addr)] = obj_id
 
     #
-    # Get Datset creation properties
+    # Get Dataset creation properties
     #
     def getDatasetCreationProps(self, dset_uuid):
         prop_list = {}
@@ -1087,7 +1088,7 @@ def createCommittedType(self, datatype, obj_uuid=None):
             raise IOError(errno.EPERM, msg)
         datatypes = self.dbGrp["{datatypes}"]
         if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
+            obj_uuid = createObjId()
         dt = self.createTypeFromItem(datatype)
 
         datatypes[obj_uuid] = dt
@@ -2715,7 +2716,7 @@ def createDataset(
             raise IOError(errno.EPERM, msg)
         datasets = self.dbGrp["{datasets}"]
         if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
+            obj_uuid = createObjId()
         dt = None
         item = {}
         fillvalue = None
@@ -3490,7 +3491,7 @@ def createGroup(self, obj_uuid=None):
             raise IOError(errno.EPERM, msg)
         groups = self.dbGrp["{groups}"]
         if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
+            obj_uuid = createObjId()
         newGroup = groups.create_group(obj_uuid)
         # store reverse map as an attribute
         addr = h5py.h5o.get_info(newGroup.id).addr
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
new file mode 100644
index 00000000..7a98a5b7
--- /dev/null
+++ b/src/h5json/objid.py
@@ -0,0 +1,485 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HDF (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+#
+# objID:
+# id (uuid) related functions
+#
+
+
+import hashlib
+import uuid
+
+S3_URI = "s3://"
+FILE_URI = "file://"
+AZURE_URI = "blob.core.windows.net/"  # preceded with "https://"
+UUID_LEN = 36  # length for uuid strings
+
+
+
+def _getStorageProtocol(uri):
+    """ returns 's3://', 'file://', or 'https://...net/' prefix if present.
+    If the prefix is in the form: https://myaccount.blob.core.windows.net/mycontainer
+    (references Azure blob storage), return: https://myaccount.blob.core.windows.net/
+    otherwise None """
+
+    if not uri:
+        protocol = None
+    elif uri.startswith(S3_URI):
+        protocol = S3_URI
+    elif uri.startswith(FILE_URI):
+        protocol = FILE_URI
+    elif uri.startswith("https://") and uri.find(AZURE_URI) > 0:
+        n = uri.find(AZURE_URI) + len(AZURE_URI)
+        protocol = uri[:n]
+    elif uri.find("://") >= 0:
+        raise ValueError(f"storage uri: {uri} not supported")
+    else:
+        protocol = None
+    return protocol
+
+
+def _getBaseName(uri):
+    """ Return the part of the URI after the storage protocol (if any) """
+
+    protocol = _getStorageProtocol(uri)
+    if not protocol:
+        return uri
+    else:
+        return uri[len(protocol):]
+    
+def _getPrefixForCollection(collection):
+    """ Return prefix character for given collection type """
+    collection = collection.lower()
+
+    if collection in ("group", "groups"):
+        return 'g'
+    elif collection in ("dataset", "datasets"):
+        return 'd'
+    elif collection in ("datatype", "datatypes"):
+        return 't'
+    elif collection in ("chunk", "chunks"):
+        return 'c'
+    else:
+        raise ValueError(f"unexpected collection type: {collection}")
+
+
+def getIdHash(id):
+    """Return md5 prefix based on id value"""
+    m = hashlib.new("md5")
+    m.update(id.encode("utf8"))
+    hexdigest = m.hexdigest()
+    return hexdigest[:5]
+
+
+def isSchema2Id(id):
+    """return true if this is a v2 id"""
+    # v1 ids are in the standard UUID format: 8-4-4-4-12
+    # v2 ids are in the non-standard: 8-8-4-6-6
+    parts = id.split("-")
+    if len(parts) != 6:
+        raise ValueError(f"Unexpected id formation for uuid: {id}")
+    if len(parts[2]) == 8:
+        return True
+    else:
+        return False
+
+
+def getIdHexChars(id):
+    """get the hex chars of the given id"""
+    if id[0] == "c":
+        # don't include chunk index
+        index = id.index("_")
+        parts = id[0:index].split("-")
+    else:
+        parts = id.split("-")
+    if len(parts) != 6:
+        raise ValueError(f"Unexpected id format for uuid: {id}")
+    return "".join(parts[1:])
+
+
+def hexRot(ch):
+    """rotate hex character by 8"""
+    return format((int(ch, base=16) + 8) % 16, "x")
+
+
+def isRootObjId(id):
+    """returns true if this is a root id (only for v2 schema)"""
+    if not isSchema2Id(id):
+        raise ValueError("isRootObjId can only be used with v2 ids")
+    validateUuid(id)  # will throw ValueError exception if not a objid
+    if id[0] != "g":
+        return False  # not a group
+    token = getIdHexChars(id)
+    # root ids will have last 16 chars rotated version of the first 16
+    is_root = True
+    for i in range(16):
+        if token[i] != hexRot(token[i + 16]):
+            is_root = False
+            break
+    return is_root
+
+
+def getRootObjId(id):
+    """returns root id for this objid if this is a root id
+    (only for v2 schema)
+    """
+    if isRootObjId(id):
+        return id  # this is the root id
+    token = list(getIdHexChars(id))
+    # root ids will have last 16 chars rotated version of the first 16
+    for i in range(16):
+        token[i + 16] = hexRot(token[i])
+    token = "".join(token)
+    root_id = "g-" + token[0:8] + "-" + token[8:16] + "-" + token[16:20]
+    root_id += "-" + token[20:26] + "-" + token[26:32]
+
+    return root_id
+
+
+def createObjId(obj_type=None, root_id=None):
+    """ create a new objid 
+    
+        if obj_type is None, return just a bare uuid.
+        Otherwise a hsds v2 schema obj_id will be created.
+        In this case obj_type should be one of "groups",
+        "datasets", "datatypes", "chunks".  If rootid is
+        None, a root group obj_id will be created.  Otherwise the 
+        obj_id will be a an id that has root_id as it's root.  """
+
+    
+    prefix = None
+    if obj_type is None:
+        # just return a regular uuid
+        objid = str(uuid.uuid4())
+    else:
+
+        prefix = _getPrefixForCollection(obj_type)
+        # schema v2
+        salt = uuid.uuid4().hex
+        # take a hash to randomize the uuid
+        token = list(hashlib.sha256(salt.encode()).hexdigest())
+
+        if root_id:
+            # replace first 16 chars of token with first 16 chars of root id
+            root_hex = getIdHexChars(root_id)
+            token[0:16] = root_hex[0:16]
+        else:
+            if obj_type != "groups":
+                raise ValueError("expected 'groups' obj_type for root group id")
+            # use only 16 chars, but make it look a 32 char id
+            for i in range(16):
+                token[16 + i] = hexRot(token[i])
+        # format as a string
+        token = "".join(token)
+        objid = prefix + "-" + token[0:8] + "-" + token[8:16] + "-"
+        objid += token[16:20] + "-" + token[20:26] + "-" + token[26:32]
+
+    return objid
+
+
+def getS3Key(id):
+    """Return s3 key for given id.
+
+    For schema v1:
+        A md5 prefix is added to the front of the returned key to better
+        distribute S3 objects.
+    For schema v2:
+        The id is converted to the pattern: "db/{rootid[0:16]}" for rootids and
+        "db/id[0:16]/{prefix}/id[16-32]" for other ids
+        Chunk ids have the chunk index added after the slash:
+        "db/id[0:16]/d/id[16:32]/x_y_z
+
+    For domain id's:
+        Return a key with the .domain suffix and no preceding slash.
+        For non-default buckets, use the format: <bucket_name>/s3_key
+        If the id has a storage specifier ("s3://", "file://", etc.)
+        include that along with the bucket name. e.g.: "s3://mybucket/a_folder/a_file.h5"
+    """
+
+    base_id = _getBaseName(id)  # strip any s3://, etc.
+    if base_id.find("/") > 0:
+        # a domain id
+        domain_suffix = ".domain.json"
+        index = base_id.find("/") + 1
+        key = base_id[index:]
+        if not key.endswith(domain_suffix):
+            if key[-1] != "/":
+                key += "/"
+            key += domain_suffix
+    else:
+        if isSchema2Id(id):
+            # schema v2 id
+            hexid = getIdHexChars(id)
+            prefix = id[0]  # one of g, d, t, c
+            if prefix not in ("g", "d", "t", "c"):
+                raise ValueError(f"Unexpected id: {id}")
+
+            if isRootObjId(id):
+                key = f"db/{hexid[0:8]}-{hexid[8:16]}"
+            else:
+                partition = ""
+                if prefix == "c":
+                    # use 'g' so that chunks will show up under their dataset
+                    s3col = "d"
+                    n = id.find("-")
+                    if n > 1:
+                        # extract the partition index if present
+                        partition = "p" + id[1:n]
+                else:
+                    s3col = prefix
+                key = f"db/{hexid[0:8]}-{hexid[8:16]}/{s3col}/{hexid[16:20]}"
+                key += f"-{hexid[20:26]}-{hexid[26:32]}"
+            if prefix == "c":
+                if partition:
+                    key += "/"
+                    key += partition
+                # add the chunk coordinate
+                index = id.index("_")  # will raise ValueError if not found
+                n = index + 1
+                coord = id[n:]
+                key += "/"
+                key += coord
+            elif prefix == "g":
+                # add key suffix for group
+                key += "/.group.json"
+            elif prefix == "d":
+                # add key suffix for dataset
+                key += "/.dataset.json"
+            else:
+                # add key suffix for datatype
+                key += "/.datatype.json"
+        else:
+            # v1 id
+            # schema v1 id
+            idhash = getIdHash(id)
+            key = f"{idhash}-{id}"
+
+    return key
+
+
+def getObjId(s3key):
+    """Return object id given valid s3key"""
+    if all(
+        (
+            len(s3key) >= 44 and s3key[0:5].isalnum(),
+            len(s3key) >= 44 and s3key[5] == "-",
+            len(s3key) >= 44 and s3key[6] in ("g", "d", "c", "t"),
+        )
+    ):
+        # v1 obj keys
+        objid = s3key[6:]
+    elif s3key.endswith("/.domain.json"):
+        objid = "/" + s3key[: -(len("/.domain.json"))]
+    elif s3key.startswith("db/"):
+        # schema v2 object key
+        parts = s3key.split("/")
+        chunk_coord = ""  # used only for chunk ids
+        partition = ""  # likewise
+        token = []
+        for ch in parts[1]:
+            if ch != "-":
+                token.append(ch)
+
+        if len(parts) == 3:
+            # root id
+            # last part should be ".group.json"
+            if parts[2] != ".group.json":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            # add 16 more chars using rotated version of first 16
+            for i in range(16):
+                token.append(hexRot(token[i]))
+            prefix = "g"
+        elif len(parts) == 5:
+            # group, dataset, or datatype or chunk
+            for ch in parts[3]:
+                if ch != "-":
+                    token.append(ch)
+
+            if parts[2] == "g" and parts[4] == ".group.json":
+                prefix = "g"  # group json
+            elif parts[2] == "t" and parts[4] == ".datatype.json":
+                prefix = "t"  # datatype json
+            elif parts[2] == "d":
+                if parts[4] == ".dataset.json":
+                    prefix = "d"  # dataset json
+                else:
+                    # chunk object
+                    prefix = "c"
+                    chunk_coord = "_" + parts[4]
+            else:
+                raise ValueError(f"unexpected S3Key: {s3key}")
+        elif len(parts) == 6:
+            # chunk key with partitioning
+            for ch in parts[3]:
+                if ch != "-":
+                    token.append(ch)
+            if parts[2][0] != "d":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            prefix = "c"
+            partition = parts[4]
+            if partition[0] != "p":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            partition = partition[1:]  # strip off the p
+            chunk_coord = "_" + parts[5]
+        else:
+            raise ValueError(f"unexpected S3Key: {s3key}")
+
+        token = "".join(token)
+        objid = prefix + partition + "-" + token[0:8] + "-" + token[8:16]
+        objid += "-" + token[16:20] + "-" + token[20:26] + "-"
+        objid += token[26:32] + chunk_coord
+    else:
+        msg = f"unexpected S3Key: {s3key}"
+        raise ValueError(msg)
+    return objid
+
+
+def isS3ObjKey(s3key):
+    """ return True if this is a storage key """
+    valid = False
+    try:
+        objid = getObjId(s3key)
+        if objid:
+            valid = True
+    except KeyError:
+        pass  # ignore
+    except ValueError:
+        pass  # ignore
+    return valid
+
+
+def getCollectionForId(obj_id):
+    """return groups/datasets/datatypes based on id"""
+    if not isinstance(obj_id, str):
+        raise ValueError("invalid object id")
+    collection = None
+    if obj_id.startswith("g-"):
+        collection = "groups"
+    elif obj_id.startswith("d-"):
+        collection = "datasets"
+    elif obj_id.startswith("t-"):
+        collection = "datatypes"
+    else:
+        raise ValueError("not a collection id")
+    return collection
+
+
+def validateUuid(id, obj_class=None):
+    """ verify the UUID is well-formed 
+        schema can be:
+           None: expecting ordinary UUID
+           "v1": expecting HSDS v1 format
+           "v2": expecting HSDS v2 format
+        if set obj_class can be one of "groups", "datasets", "datatypes"
+    """
+    if not isinstance(id, str):
+        raise ValueError("Expected string type")
+    if len(id) < UUID_LEN:
+        raise ValueError("id is too short to be an object identifier")
+    if len(id) == UUID_LEN:
+        if obj_class:
+            # expected a prefix
+            raise ValueError(f"obj_id: {id} not valid for collection: {obj_class}") 
+    else:
+        # does this have a v1 schema hash tag?
+        # e.g.: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
+        if id[:5].isalnum() and id[5] == '-':
+            id = id[6:]  # trim off the hash tag
+        # validate prefix
+        if id[0] not in ("g", "d", "t", "c"):
+            raise ValueError("Unexpected prefix")
+        if id[0] != "c" and id[1] != "-":
+            # chunk ids may have a partition index following the c
+            raise ValueError("Unexpected prefix")
+        if obj_class is not None:
+            obj_class = obj_class.lower()
+            if id[0] != _getPrefixForCollection(obj_class):
+                raise ValueError(f"unexpected object id {id} for collection: {obj_class}")
+        if id[0] == "c":
+            # trim the type char and any partition id
+            n = id.find("-")
+            if n == -1:
+                raise ValueError("Invalid chunk id")
+
+            # trim the chunk index for chunk ids
+            m = id.find("_")
+            if m == -1:
+                raise ValueError("Invalid chunk id")
+            n += 1
+            id = "c-" + id[n:m]
+        id = id[2:]
+    if len(id) != UUID_LEN:
+        # id should be 36 now
+        raise ValueError("Unexpected id length")
+
+    for ch in id:
+        if ch.isalnum():
+            continue
+        if ch == "-":
+            continue
+        raise ValueError(f"Unexpected character in uuid: {ch}")
+
+
+def isValidUuid(id, obj_class=None):
+    try:
+        validateUuid(id, obj_class)
+        return True
+    except ValueError:
+        return False
+
+
+def isValidChunkId(id):
+    if not isValidUuid(id):
+        return False
+    if id[0] != "c":
+        return False
+    return True
+
+
+def getClassForObjId(id):
+    """return domains/chunks/groups/datasets/datatypes based on id"""
+    if not isinstance(id, str):
+        raise ValueError("Expected string type")
+    if len(id) == 0:
+        raise ValueError("Empty string")
+    if id[0] == "/":
+        return "domains"
+    if isValidChunkId(id):
+        return "chunks"
+    else:
+        return getCollectionForId(id)
+
+
+def isObjId(id):
+    """return true if uuid or domain"""
+    if not isinstance(id, str) or len(id) == 0:
+        return False
+    if id.find("/") > 0:
+        # domain id is any string in the form <bucket_name>/<domain_path>
+        return True
+    return isValidUuid(id)
+
+
+def getUuidFromId(id):
+    """strip off the type prefix ('g-' or 'd-', or 't-')
+    and return the uuid part"""
+    if len(id) == UUID_LEN:
+        # just a uuid
+        return id
+    elif len(id) == UUID_LEN + 2:
+        # 'g-', 'd-', or 't-' prefix
+        return id[2:]
+    else:
+        raise ValueError(f"Unexpected obj_id: {id}")
+    
+ 
+  
\ No newline at end of file
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
new file mode 100755
index 00000000..7c02482f
--- /dev/null
+++ b/test/unit/objid_test.py
@@ -0,0 +1,199 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import sys
+
+from h5json.objid import isRootObjId, isValidUuid, validateUuid
+from h5json.objid import createObjId, getCollectionForId
+from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
+
+
+class IdUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(IdUtilTest, self).__init__(*args, **kwargs)
+        # main
+
+    def testCreateObjId(self):
+        id_len = 38  # 36 for uuid plus two for prefix ("g-", "d-")
+        ids = set()  # we'll use this to verify we always get a unique id
+        # create just a plain uuid...
+        id = createObjId()
+        self.assertEqual(len(id) + 2, id_len)
+        # create a v2 root_id
+        root_id = createObjId(obj_type="groups")
+        self.assertEqual(len(root_id), id_len)
+        for obj_type in ("groups", "datasets", "datatypes", "chunks"):
+            for i in range(100):
+                id = createObjId(obj_type=obj_type, root_id=root_id)
+                self.assertEqual(len(id), id_len)
+                self.assertTrue(id[0] in ("g", "d", "t", "c"))
+                self.assertEqual(id[1], "-")
+                ids.add(id)
+
+        self.assertEqual(len(ids), 400)
+        try:
+            createObjId(obj_type="bad_class")
+            self.assertTrue(False)  # should throw exception
+        except ValueError:
+            pass  # expected
+
+    def testIsValidUuid(self):
+        group1_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"      # orig schema
+        group2_id = "g-314d61b8-995411e6-a733-3c15c2-da029e"
+        root_id = "g-f9aaa28e-d42e10e5-7122-2a065c-a6986d"
+        dataset1_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"    # orig schema
+        dataset2_id = "d-4c48f3ae-995411e6-a3cd-3c15c2-da029e"
+        ctype1_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"      # orig schema
+        ctype2_id = "t-8c785f1c-995311e6-9bc2-0242ac-110005"
+        chunk1_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2"  # orig schema
+        chunk2_id = "c-8c785f1c-995311e6-9bc2-0242ac-110005_7_2"
+        domain_id = "mybucket/bob/mydata.h5"
+        s3_domain_id = "s3://mybucket/bob/mydata.h5"
+        file_domain_id = "file://mybucket/bob/mydata.h5"
+        azure_domain_id = "https://myaccount.blob.core.windows.net/mybucket/bob/mydata.h5"
+        valid_id_map = {
+            group1_id: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
+            group2_id: "db/314d61b8-995411e6/g/a733-3c15c2-da029e/.group.json",
+            dataset1_id: "26928-d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e",
+            dataset2_id: "db/4c48f3ae-995411e6/d/a3cd-3c15c2-da029e/.dataset.json",
+            ctype1_id: "5a9cf-t-8c785f1c-9953-11e6-9bc2-0242ac110005",
+            ctype2_id: "db/8c785f1c-995311e6/t/9bc2-0242ac-110005/.datatype.json",
+            chunk1_id: "dc4ce-c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2",
+            chunk2_id: "db/8c785f1c-995311e6/d/9bc2-0242ac-110005/7_2",
+            domain_id: "bob/mydata.h5/.domain.json",
+            s3_domain_id: "bob/mydata.h5/.domain.json",
+            file_domain_id: "bob/mydata.h5/.domain.json",
+            azure_domain_id: "bob/mydata.h5/.domain.json", }
+
+        bad_ids = ("g-1e76d862", "/bob/mydata.h5")
+
+        self.assertTrue(isValidUuid(group1_id))
+        self.assertFalse(isSchema2Id(group1_id))
+        self.assertTrue(isValidUuid(group1_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(group1_id, obj_class="group"))
+        self.assertTrue(isValidUuid(group1_id, obj_class="groups"))
+        self.assertTrue(isSchema2Id(root_id))
+        self.assertTrue(isValidUuid(root_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(root_id, obj_class="group"))
+        self.assertTrue(isValidUuid(root_id, obj_class="groups"))
+        self.assertTrue(isRootObjId(root_id))
+        self.assertTrue(isValidUuid(dataset1_id, obj_class="datasets"))
+        self.assertFalse(isSchema2Id(dataset1_id))
+        self.assertTrue(isValidUuid(ctype1_id, obj_class="datatypes"))
+        self.assertFalse(isSchema2Id(ctype1_id))
+        self.assertTrue(isValidUuid(chunk1_id, obj_class="chunks"))
+        self.assertFalse(isSchema2Id(chunk1_id))
+        self.assertTrue(isValidUuid(group2_id))
+        self.assertTrue(isSchema2Id(group2_id))
+        self.assertTrue(isValidUuid(group2_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(group2_id, obj_class="group"))
+        self.assertTrue(isValidUuid(group2_id, obj_class="groups"))
+        self.assertFalse(isRootObjId(group2_id))
+        self.assertTrue(isValidUuid(dataset2_id, obj_class="datasets"))
+        self.assertTrue(isSchema2Id(dataset2_id))
+        self.assertTrue(isValidUuid(ctype2_id, obj_class="datatypes"))
+        self.assertTrue(isSchema2Id(ctype2_id))
+        self.assertTrue(isValidUuid(chunk2_id, obj_class="chunks"))
+        self.assertTrue(isSchema2Id(chunk2_id))
+        validateUuid(group1_id)
+        try:
+            isRootObjId(group1_id)
+            self.assertTrue(False)
+        except ValueError:
+            # only works for v2 schema
+            pass  # expected
+
+        for item in valid_id_map:
+            self.assertTrue(isObjId(item))
+            s3key = getS3Key(item)
+            self.assertTrue(s3key[0] != "/")
+            self.assertTrue(isS3ObjKey(s3key))
+            expected = valid_id_map[item]
+            self.assertEqual(s3key, expected)
+            if item.find("/") > 0:
+                continue  # bucket name gets lost when domain ids get converted to s3keys
+            objid = getObjId(s3key)
+            self.assertEqual(objid, item)
+        for item in bad_ids:
+            self.assertFalse(isValidUuid(item))
+            self.assertFalse(isObjId(item))
+
+    def testGetCollection(self):
+        group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"
+        dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"
+        ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"
+        bad_id = "x-59647858-9954-11e6-95d2-3c15c2da029e"
+        self.assertEqual(getCollectionForId(group_id), "groups")
+        self.assertEqual(getCollectionForId(dataset_id), "datasets")
+        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
+        try:
+            getCollectionForId(bad_id)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+        try:
+            getCollectionForId(None)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+    def testSchema2Id(self):
+        root_id = createObjId("groups")
+        group_id = createObjId("groups", root_id=root_id)
+        dataset_id = createObjId("datasets", root_id=root_id)
+        ctype_id = createObjId("datatypes", root_id=root_id)
+
+        self.assertEqual(getCollectionForId(root_id), "groups")
+        self.assertEqual(getCollectionForId(group_id), "groups")
+        self.assertEqual(getCollectionForId(dataset_id), "datasets")
+        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
+        chunk_id = "c" + dataset_id[1:] + "_1_2"
+        chunk_partition_id = "c42-" + dataset_id[2:] + "_1_2"
+
+        for id in (chunk_id, chunk_partition_id):
+            try:
+                getCollectionForId(id)
+                self.assertTrue(False)
+            except ValueError:
+                pass  # expected
+        valid_ids = (
+            group_id,
+            dataset_id,
+            ctype_id,
+            chunk_id,
+            chunk_partition_id,
+            root_id,
+        )
+        s3prefix = getS3Key(root_id)
+        self.assertTrue(s3prefix.endswith("/.group.json"))
+        s3prefix = s3prefix[: -(len(".group.json"))]
+        for oid in valid_ids:
+            self.assertTrue(len(oid) >= 38)
+            parts = oid.split("-")
+            self.assertEqual(len(parts), 6)
+            self.assertTrue(oid[0] in ("g", "d", "t", "c"))
+            self.assertTrue(isSchema2Id(oid))
+            if oid == root_id:
+                self.assertTrue(isRootObjId(oid))
+            else:
+                self.assertFalse(isRootObjId(oid))
+
+            s3key = getS3Key(oid)
+            self.assertTrue(s3key.startswith(s3prefix))
+            self.assertEqual(getObjId(s3key), oid)
+            self.assertTrue(isS3ObjKey(s3key))
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()

From 28dcfc6e744b376e51b8bbf4521150eced7d4bc6 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 4 Feb 2025 23:02:09 +0800
Subject: [PATCH 002/129] fix flake8 errors

---
 src/h5json/hdf5db.py    |  3 +--
 src/h5json/objid.py     | 18 +++++++-----------
 test/unit/objid_test.py |  1 -
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 676dbef5..f23dc3a4 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -13,7 +13,6 @@
 import time
 import h5py
 import numpy as np
-import uuid
 import os.path as op
 import os
 import json
@@ -1902,7 +1901,7 @@ def listToRef(self, data):
             # object reference should be in the form: <collection_name>/<uuid>
             for prefix in ("datasets", "groups", "datatypes"):
                 if data.startswith(prefix):
-                    uuid_ref = data[len(prefix) :]
+                    uuid_ref = data[len(prefix):]
                     if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith("/"):
                         obj = self.getObjectByUuid(prefix, uuid_ref[1:])
                         if obj:
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index 7a98a5b7..598790e0 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -24,7 +24,6 @@
 UUID_LEN = 36  # length for uuid strings
 
 
-
 def _getStorageProtocol(uri):
     """ returns 's3://', 'file://', or 'https://...net/' prefix if present.
     If the prefix is in the form: https://myaccount.blob.core.windows.net/mycontainer
@@ -55,7 +54,8 @@ def _getBaseName(uri):
         return uri
     else:
         return uri[len(protocol):]
-    
+
+
 def _getPrefixForCollection(collection):
     """ Return prefix character for given collection type """
     collection = collection.lower()
@@ -146,16 +146,15 @@ def getRootObjId(id):
 
 
 def createObjId(obj_type=None, root_id=None):
-    """ create a new objid 
-    
+    """ create a new objid
+
         if obj_type is None, return just a bare uuid.
         Otherwise a hsds v2 schema obj_id will be created.
         In this case obj_type should be one of "groups",
         "datasets", "datatypes", "chunks".  If rootid is
-        None, a root group obj_id will be created.  Otherwise the 
+        None, a root group obj_id will be created.  Otherwise the
         obj_id will be a an id that has root_id as it's root.  """
 
-    
     prefix = None
     if obj_type is None:
         # just return a regular uuid
@@ -374,7 +373,7 @@ def getCollectionForId(obj_id):
 
 
 def validateUuid(id, obj_class=None):
-    """ verify the UUID is well-formed 
+    """ verify the UUID is well-formed
         schema can be:
            None: expecting ordinary UUID
            "v1": expecting HSDS v1 format
@@ -388,7 +387,7 @@ def validateUuid(id, obj_class=None):
     if len(id) == UUID_LEN:
         if obj_class:
             # expected a prefix
-            raise ValueError(f"obj_id: {id} not valid for collection: {obj_class}") 
+            raise ValueError(f"obj_id: {id} not valid for collection: {obj_class}")
     else:
         # does this have a v1 schema hash tag?
         # e.g.: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
@@ -480,6 +479,3 @@ def getUuidFromId(id):
         return id[2:]
     else:
         raise ValueError(f"Unexpected obj_id: {id}")
-    
- 
-  
\ No newline at end of file
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
index 7c02482f..af4ac21e 100755
--- a/test/unit/objid_test.py
+++ b/test/unit/objid_test.py
@@ -10,7 +10,6 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import sys
 
 from h5json.objid import isRootObjId, isValidUuid, validateUuid
 from h5json.objid import createObjId, getCollectionForId

From 54c83d574b703981c3b60fda72ce09c92895a71c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 8 Feb 2025 18:38:50 +0800
Subject: [PATCH 003/129] merge hsds hdf5dtype changes

---
 data/json/bool_attr.json    |   2 +-
 data/json/bool_dset.json    |   2 +-
 data/json/enum_attr.json    |   2 +-
 data/json/enum_dset.json    |   2 +-
 src/h5json/hdf5db.py        |  44 ++-
 src/h5json/hdf5dtype.py     | 743 ++++++++++++++++++++++++++----------
 test/unit/hdf5db_test.py    |   2 -
 test/unit/hdf5dtype_test.py | 259 ++++++++++---
 8 files changed, 801 insertions(+), 255 deletions(-)
 mode change 100755 => 100644 src/h5json/hdf5dtype.py

diff --git a/data/json/bool_attr.json b/data/json/bool_attr.json
index ff092b9a..6d4d24da 100644
--- a/data/json/bool_attr.json
+++ b/data/json/bool_attr.json
@@ -20,7 +20,7 @@
                             "class": "H5T_INTEGER"
                         },
                         "class": "H5T_ENUM",
-                        "members": [
+                        "mapping": [
                             {
                                 "name": "FALSE",
                                 "value": 0
diff --git a/data/json/bool_dset.json b/data/json/bool_dset.json
index 29e46d80..11f19e01 100644
--- a/data/json/bool_dset.json
+++ b/data/json/bool_dset.json
@@ -24,7 +24,7 @@
                     "class": "H5T_INTEGER"
                 },
                 "class": "H5T_ENUM",
-                "members": [
+                "mapping": [
                     {
                         "name": "FALSE",
                         "value": 0
diff --git a/data/json/enum_attr.json b/data/json/enum_attr.json
index 9e9d94a9..e39425ef 100644
--- a/data/json/enum_attr.json
+++ b/data/json/enum_attr.json
@@ -21,7 +21,7 @@
                             "class": "H5T_INTEGER"
                         },
                         "class": "H5T_ENUM",
-                        "members": [
+                        "mapping": [
                             {
                                 "name": "GAS",
                                 "value": 2
diff --git a/data/json/enum_dset.json b/data/json/enum_dset.json
index d2afcd4a..08291696 100644
--- a/data/json/enum_dset.json
+++ b/data/json/enum_dset.json
@@ -25,7 +25,7 @@
                     "class": "H5T_INTEGER"
                 },
                 "class": "H5T_ENUM",
-                "members": [
+                "mapping": [
                     {
                         "name": "GAS",
                         "value": 2
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index f23dc3a4..112fb867 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -17,7 +17,7 @@
 import os
 import json
 import logging
-from .hdf5dtype import getTypeItem, createDataType, getItemSize
+from .hdf5dtype import getTypeItem, createDataType, getItemSize, Reference, RegionReference
 from .objid import createObjId
 from .apiversion import _apiver
 
@@ -73,6 +73,43 @@
 _H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
 
 
+def convert_dtype(srcdt):
+    """Return a dtype based on input dtype, converting any Reference types from
+    h5json style to h5py.
+    """
+
+    if len(srcdt) > 0:
+        fields = []
+        for name in srcdt.fields:
+            item = srcdt.fields[name]
+            # item is a tuple of dtype and integer offset
+            field_dt = convert_dtype(item[0])
+            fields.append((name, field_dt))
+        tgt_dt = np.dtype(fields)
+    else:
+        # check if this a "special dtype"
+        if srcdt.metadata and "ref" in srcdt.metadata:
+            if srcdt.metadata['ref'] is Reference:
+                tgt_dt = h5py.special_dtype(ref=h5py.Reference)
+            elif srcdt.metadata['ref'] is RegionReference:
+                tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
+            else:
+                raise TypeError(f"Unexpected ref type: {srcdt}")
+        elif srcdt.metadata and "vlen" in srcdt.metadata:
+            src_vlen = srcdt.metadata["vlen"]
+            if isinstance(src_vlen, np.dtype):
+                tgt_base = convert_dtype(src_vlen)
+            else:
+                tgt_base = src_vlen
+            tgt_dt = h5py.special_dtype(vlen=tgt_base)
+        elif srcdt.kind == "U":
+            # use vlen for unicode strings
+            tgt_dt = h5py.special_dtype(vlen=str)
+        else:
+            tgt_dt = srcdt  # no conversion needed
+    return tgt_dt
+
+
 def visitObj(path, obj):
     hdf5db = _db[obj.file.filename]
     hdf5db.visit(path, obj)
@@ -1476,6 +1513,7 @@ def makeAttribute(self, obj, attr_name, shape, attr_type, value):
                     self.makeNullTermStringAttribute(obj, attr_name, strLength, value)
                 else:
                     typeItem = getTypeItem(dt)
+                    dt = convert_dtype(dt)
                     value = self.toRef(rank, typeItem, value)
 
                     # create numpy array
@@ -1725,6 +1763,7 @@ def toNumPyValue(self, typeItem, src, des):
             baseType = typeItem["base"]
 
             dt = self.createTypeFromItem(baseType)
+            dt = convert_dtype(dt)
             des = np.array(src, dtype=dt)
 
         elif typeClass == "H5T_REFERENCE":
@@ -2193,7 +2232,8 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
             raise IOError(errno.EIO, msg)
 
         if isinstance(slices, (list, tuple)) and len(slices) != rank:
-            msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank"
+            msg = "Unexpected error: getDatasetValuesByUuid: "
+            msg += "number of dims in selection not same as rank"
             self.log.error(msg)
             raise IOError(errno.EIO, msg)
 
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
old mode 100755
new mode 100644
index 9f867f27..fecf38f0
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -2,37 +2,199 @@
 # Copyright by The HDF Group.                                                #
 # All rights reserved.                                                       #
 #                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
 # terms governing use, modification, and redistribution, is contained in     #
 # the file COPYING, which can be found at the root of the source code        #
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 
-"""
-This class is used to map between HDF5 type representations and numpy types
-
-"""
+import weakref
 import numpy as np
-from h5py.h5t import special_dtype
-from h5py.h5t import check_dtype
-from h5py.h5r import Reference
-from h5py.h5r import RegionReference
+
+
+class Reference:
+    """
+    Represents an HDF5 object reference
+    """
+
+    @property
+    def id(self):
+        """Low-level identifier appropriate for this object"""
+        return self._id
+
+    @property
+    def objref(self):
+        """Weak reference to object"""
+        return self._objref  # return weak ref to ref'd object
+
+    def __init__(self, bind):
+        """Create a new reference by binding to
+        a group/dataset/committed type
+        """
+        self._id = bind._id
+        self._objref = weakref.ref(bind)
+
+    def __repr__(self):
+        # TBD: this is not consistent with hsds or h5py...
+        if not isinstance(self._id.id, str):
+            raise TypeError("Expected string id")
+        item = None
+
+        collection_type = self._id.collection_type
+        item = f"{collection_type}/{self._id.id}"
+        return item
+
+    def tolist(self):
+        if type(self._id.id) is not str:
+            raise TypeError("Expected string id")
+        if self._id.objtype_code == "d":
+            return [
+                ("datasets/" + self._id.id),
+            ]
+        elif self._id.objtype_code == "g":
+            return [
+                ("groups/" + self._id.id),
+            ]
+        elif self._id.objtype_code == "t":
+            return [
+                ("datatypes/" + self._id.id),
+            ]
+        else:
+            raise TypeError("Unexpected id type")
+
+
+class RegionReference:
+    """
+    Represents an HDF5 region reference
+    """
+
+    @property
+    def id(self):
+        """Low-level identifier appropriate for this object"""
+        return self._id
+
+    @property
+    def objref(self):
+        """Weak reference to object"""
+        return self._objref  # return weak ref to ref'd object
+
+    def __init__(self, bind):
+        """Create a new reference by binding to
+        a group/dataset/committed type
+        """
+        self._id = bind._id
+        self._objref = weakref.ref(bind)
+
+    def __repr__(self):
+        return "<HDF5 region reference>"
+
+
+def special_dtype(**kwds):
+    """Create a new h5py "special" type.  Only one keyword may be given.
+
+    Legal keywords are:
+
+    vlen = basetype
+        Base type for HDF5 variable-length datatype. This can be Python
+        str type or instance of np.dtype.
+        Example: special_dtype( vlen=str )
+
+    enum = (basetype, values_dict)
+        Create a NumPy representation of an HDF5 enumerated type.  Provide
+        a 2-tuple containing an (integer) base dtype and a dict mapping
+        string names to integer values.
+
+    ref = Reference | RegionReference
+        Create a NumPy representation of an HDF5 object or region reference
+        type."""
+
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
+
+    name, val = kwds.popitem()
+
+    if name == "vlen":
+
+        return np.dtype("O", metadata={"vlen": val})
+
+    if name == "enum":
+
+        try:
+            dt, enum_vals = val
+        except TypeError:
+            msg = "Enums must be created from a 2-tuple "
+            msg += "(basetype, values_dict)"
+            raise TypeError(msg)
+
+        dt = np.dtype(dt)
+        if dt.kind not in "iu":
+            raise TypeError("Only integer types can be used as enums")
+
+        return np.dtype(dt, metadata={"enum": enum_vals})
+
+    if name == "ref":
+        dt = None
+        if val is Reference:
+            dt = np.dtype("S48", metadata={"ref": Reference})
+        elif val is RegionReference:
+            dt = np.dtype("S48", metadata={"ref": RegionReference})
+        else:
+            raise ValueError("Ref class must be Reference or RegionReference")
+
+        return dt
+
+    raise TypeError(f'Unknown special type "{name}"')
+
+
+def check_dtype(**kwds):
+    """Check a dtype for h5py special type "hint" information.  Only one
+    keyword may be given.
+
+    vlen = dtype
+        If the dtype represents an HDF5 vlen, returns the Python base class.
+        Currently only builting string vlens (str) are supported.  Returns
+        None if the dtype does not represent an HDF5 vlen.
+
+    enum = dtype
+        If the dtype represents an HDF5 enumerated type, returns the dictionary
+        mapping string names to integer values.  Returns None if the dtype does
+        not represent an HDF5 enumerated type.
+
+    ref = dtype
+        If the dtype represents an HDF5 reference type, returns the reference
+        class (either Reference or RegionReference).  Returns None if the dtype
+        does not represent an HDF5 reference type.
+    """
+
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
+
+    name, dt = kwds.popitem()
+
+    if name not in ("vlen", "enum", "ref"):
+        raise TypeError('Unknown special type "%s"' % name)
+
+    try:
+        return dt.metadata[name]
+    except TypeError:
+        return None
+    except KeyError:
+        return None
 
 
 def getTypeResponse(typeItem):
     """
     Convert the given type item  to a predefined type string for
-    predefined integer and floating point types ("H5T_STD_I64LE", et. al).
-    For compound types, recursively iterate through the typeItem and do same
-    conversion for fields of the compound type.
-    """
+        predefined integer and floating point types ("H5T_STD_I64LE", et. al).
+        For compound types, recursively iterate through the typeItem and do
+        same conversion for fields of the compound type."""
     response = None
     if "uuid" in typeItem:
         # committed type, just return uuid
         response = "datatypes/" + typeItem["uuid"]
-    elif typeItem["class"] == "H5T_INTEGER" or typeItem["class"] == "H5T_FLOAT":
+    elif typeItem["class"] in ("H5T_INTEGER", "H5T_FLOAT"):
         # just return the class and base for pre-defined types
         response = {}
         response["class"] = typeItem["class"]
@@ -52,7 +214,7 @@ def getTypeResponse(typeItem):
         for field in typeItem["fields"]:
             fieldItem = {}
             fieldItem["name"] = field["name"]
-            fieldItem["type"] = getTypeResponse(field["type"])  # recursive call
+            fieldItem["type"] = getTypeResponse(field["type"])  # recurse call
             fieldList.append(fieldItem)
         response["fields"] = fieldList
     else:
@@ -60,7 +222,7 @@ def getTypeResponse(typeItem):
         for k in typeItem.keys():
             if k == "base":
                 if isinstance(typeItem[k], dict):
-                    response[k] = getTypeResponse(typeItem[k])  # recursive call
+                    response[k] = getTypeResponse(typeItem[k])  # recurse call
                 else:
                     response[k] = typeItem[k]  # predefined type
             elif k not in ("size", "base_size"):
@@ -68,112 +230,12 @@ def getTypeResponse(typeItem):
     return response
 
 
-def getItemSize(typeItem):
-    """
-    Get size of an item in bytes.
-    For variable length types (e.g. variable length strings),
-    return the string "H5T_VARIABLE"
+def getTypeItem(dt, metadata=None):
     """
-    # handle the case where we are passed a primitive type first
-    if isinstance(typeItem, bytes):
-        typeItem = typeItem.decode("ascii")
-    if isinstance(typeItem, str):
-        for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"):
-            if typeItem.startswith(type_prefix):
-                num_bits = typeItem[len(type_prefix) :]
-                if num_bits[-2:] in ("LE", "BE"):
-                    num_bits = num_bits[:-2]
-                try:
-                    return int(num_bits) // 8
-                except ValueError:
-                    raise TypeError("Invalid Type")
-        # none of the expect primative types mathched
-        raise TypeError("Invalid Type")
-    if not isinstance(typeItem, dict):
-        raise TypeError("invalid type")
-
-    item_size = 0
-    if "class" not in typeItem:
-        raise KeyError("'class' not provided")
-    typeClass = typeItem["class"]
-
-    if typeClass == "H5T_INTEGER":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_FLOAT":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_STRING":
-        if "length" not in typeItem:
-            raise KeyError("'length' not provided")
-        item_size = typeItem["length"]
-
-    elif typeClass == "H5T_VLEN":
-        item_size = "H5T_VARIABLE"
-    elif typeClass == "H5T_OPAQUE":
-        if "size" not in typeItem:
-            raise KeyError("'size' not provided")
-        item_size = int(typeItem["size"])
-
-    elif typeClass == "H5T_ARRAY":
-        if "dims" not in typeItem:
-            raise KeyError("'dims' must be provided for array types")
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_ENUM":
-        if "base" not in typeItem:
-            raise KeyError("'base' must be provided for enum types")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_REFERENCE":
-        item_size = "H5T_VARIABLE"
-    elif typeClass == "H5T_COMPOUND":
-        if "fields" not in typeItem:
-            raise KeyError("'fields' not provided for compound type")
-        fields = typeItem["fields"]
-        if type(fields) is not list:
-            raise TypeError("Type Error: expected list type for 'fields'")
-        if not fields:
-            raise KeyError("no 'field' elements provided")
-        # add up the size of each sub-field
-        for field in fields:
-            if not isinstance(field, dict):
-                raise TypeError("Expected dictionary type for field")
-            if "type" not in field:
-                raise KeyError("'type' missing from field")
-            subtype_size = getItemSize(field["type"])  # recursive call
-            if subtype_size == "H5T_VARIABLE":
-                item_size = "H5T_VARIABLE"
-                break  # don't need to look at the rest
-
-            item_size += subtype_size
-    else:
-        raise TypeError("Invalid type class")
-
-    # calculate array type
-    if "dims" in typeItem and type(item_size) is int:
-        dims = typeItem["dims"]
-        for dim in dims:
-            item_size *= dim
-
-    return item_size
-
-
-"""
     Return type info.
           For primitive types, return string with typename
           For compound types return array of dictionary items
-"""
-
-
-def getTypeItem(dt):
-
+    """
     predefined_int_types = {
         "int8": "H5T_STD_I8",
         "uint8": "H5T_STD_U8",
@@ -184,10 +246,16 @@ def getTypeItem(dt):
         "int64": "H5T_STD_I64",
         "uint64": "H5T_STD_U64",
     }
-    predefined_float_types = {"float32": "H5T_IEEE_F32", "float64": "H5T_IEEE_F64"}
+    predefined_float_types = {
+        "float16": "H5T_IEEE_F16",
+        "float32": "H5T_IEEE_F32",
+        "float64": "H5T_IEEE_F64",
+    }
+    if not metadata and dt.metadata:
+        metadata = dt.metadata
 
     type_info = {}
-    if len(dt) > 1 or dt.names:
+    if len(dt) > 1:
         # compound type
         names = dt.names
         type_info["class"] = "H5T_COMPOUND"
@@ -204,15 +272,22 @@ def getTypeItem(dt):
         # array type
         type_info["dims"] = dt.shape
         type_info["class"] = "H5T_ARRAY"
-        type_info["base"] = getTypeItem(dt.base)
+        type_info["base"] = getTypeItem(dt.base, metadata=metadata)
     elif dt.kind == "O":
         # vlen string or data
         #
         # check for h5py variable length extension
-        vlen_check = check_dtype(vlen=dt.base)
-        if vlen_check is not None and not isinstance(vlen_check, np.dtype):
-            vlen_check = np.dtype(vlen_check)
-        ref_check = check_dtype(ref=dt.base)
+        vlen_check = None
+        if metadata and "vlen" in metadata:
+            vlen_check = metadata["vlen"]
+            if vlen_check is not None and not isinstance(vlen_check, np.dtype):
+                vlen_check = np.dtype(vlen_check)
+
+        if metadata and "ref" in metadata:
+            ref_check = metadata["ref"]
+        else:
+            ref_check = check_dtype(ref=dt.base)
+
         if vlen_check == bytes:
             type_info["class"] = "H5T_STRING"
             type_info["length"] = "H5T_VARIABLE"
@@ -229,15 +304,15 @@ def getTypeItem(dt):
             type_info["size"] = "H5T_VARIABLE"
             type_info["base"] = getTypeItem(vlen_check)
         elif vlen_check is not None:
-            # unknown vlen type
+            #  unknown vlen type
             raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
         elif ref_check is not None:
             # a reference type
             type_info["class"] = "H5T_REFERENCE"
 
-            if ref_check is Reference:
+            if ref_check.__name__ == "Reference":
                 type_info["base"] = "H5T_STD_REF_OBJ"  # objref
-            elif ref_check is RegionReference:
+            elif ref_check.__name__ == "RegionReference":
                 type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
             else:
                 raise TypeError("unexpected reference type")
@@ -249,14 +324,40 @@ def getTypeItem(dt):
         type_info["size"] = dt.itemsize
         type_info["tag"] = ""  # todo - determine tag
     elif dt.base.kind == "S":
-        # Fixed length string type
-        type_info["class"] = "H5T_STRING"
-        type_info["charSet"] = "H5T_CSET_ASCII"
+        # check for object reference
+        ref_check = check_dtype(ref=dt.base)
+        if ref_check is not None:
+            # a reference type
+            type_info["class"] = "H5T_REFERENCE"
+
+            if ref_check is Reference:
+                type_info["base"] = "H5T_STD_REF_OBJ"  # objref
+            elif ref_check is RegionReference:
+                type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
+            else:
+                raise TypeError("unexpected reference type")
+        else:
+            # Fixed length string type
+            type_info["class"] = "H5T_STRING"
         type_info["length"] = dt.itemsize
+        type_info["charSet"] = "H5T_CSET_ASCII"
         type_info["strPad"] = "H5T_STR_NULLPAD"
     elif dt.base.kind == "U":
         # Fixed length unicode type
-        raise TypeError("Fixed length unicode type is not supported")
+        ref_check = check_dtype(ref=dt.base)
+        if ref_check is not None:
+            raise TypeError("unexpected reference type")
+
+        # Fixed length string type with unicode support
+        type_info["class"] = "H5T_STRING"
+
+        # this can be problematic if the encoding of the string is not valid,
+        # or reqires too many bytes.  Use variable length strings to handle all
+        # UTF8 strings correctly
+        type_info["charSet"] = "H5T_CSET_UTF8"
+        # convert from UTF32 length to a fixed length
+        type_info["length"] = dt.itemsize
+        type_info["strPad"] = "H5T_STR_NULLPAD"
 
     elif dt.kind == "b":
         # boolean type - h5py stores as enum
@@ -265,13 +366,12 @@ def getTypeItem(dt):
         if dt.base.byteorder == ">":
             byteorder = "BE"
         # this mapping is an h5py convention for boolean support
-        members = [{"name": "FALSE", "value": 0}, {"name": "TRUE", "value": 1}]
+        mapping = {"FALSE": 0, "TRUE": 1}
         type_info["class"] = "H5T_ENUM"
-        type_info["members"] = members
+        type_info["mapping"] = mapping
         base_info = {"class": "H5T_INTEGER"}
         base_info["base"] = "H5T_STD_I8" + byteorder
         type_info["base"] = base_info
-
     elif dt.kind == "f":
         # floating point type
         type_info["class"] = "H5T_FLOAT"
@@ -280,7 +380,8 @@ def getTypeItem(dt):
             byteorder = "BE"
         if dt.name in predefined_float_types:
             # maps to one of the HDF5 predefined types
-            type_info["base"] = predefined_float_types[dt.base.name] + byteorder
+            float_type = predefined_float_types[dt.base.name]
+            type_info["base"] = float_type + byteorder
         else:
             raise TypeError("Unexpected floating point type: " + dt.name)
     elif dt.kind == "i" or dt.kind == "u":
@@ -291,14 +392,13 @@ def getTypeItem(dt):
         if dt.base.byteorder == ">":
             byteorder = "BE"
 
-        # numpy integer type - but check to see if this is the h5py
+        # numpy integer type - but check to see if this is the hypy
         # enum extension
-        mapping = check_dtype(enum=dt)
-
-        if mapping:
+        if metadata and "enum" in metadata:
             # yes, this is an enum!
+            mapping = metadata["enum"]
             type_info["class"] = "H5T_ENUM"
-            type_info["members"] = [{"name": n, "value": v} for n, v in mapping.items()]
+            type_info["mapping"] = mapping
             if dt.name not in predefined_int_types:
                 raise TypeError("Unexpected integer type: " + dt.name)
             # maps to one of the HDF5 predefined types
@@ -316,11 +416,146 @@ def getTypeItem(dt):
 
     else:
         # unexpected kind
-        raise TypeError("unexpected dtype kind: " + dt.kind)
+        raise TypeError(f"unexpected dtype kind: {dt.kind}")
 
     return type_info
 
 
+def getItemSize(typeItem):
+    """
+    Get size of an item in bytes.
+        For variable length types (e.g. variable length strings),
+        return the string "H5T_VARIABLE"
+    """
+    # handle the case where we are passed a primitive type first
+    if isinstance(typeItem, str) or isinstance(typeItem, bytes):
+        for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"):
+            if typeItem.startswith(type_prefix):
+                nlen = len(type_prefix)
+                num_bits = typeItem[nlen:]
+                if num_bits[-2:] in ("LE", "BE"):
+                    num_bits = num_bits[:-2]
+                try:
+                    return int(num_bits) // 8
+                except ValueError:
+                    raise TypeError("Invalid Type")
+        # none of the expect primative types mathched
+        raise TypeError("Invalid Type")
+    if not isinstance(typeItem, dict):
+        raise TypeError("invalid type")
+
+    item_size = 0
+    if "class" not in typeItem:
+        raise KeyError("'class' not provided")
+    typeClass = typeItem["class"]
+
+    if typeClass == "H5T_INTEGER":
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_FLOAT":
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_STRING":
+        if "length" not in typeItem:
+            raise KeyError("'length' not provided")
+        item_size = typeItem["length"]
+
+    elif typeClass == "H5T_VLEN":
+        item_size = "H5T_VARIABLE"
+    elif typeClass == "H5T_OPAQUE":
+        if "size" not in typeItem:
+            raise KeyError("'size' not provided")
+        item_size = int(typeItem["size"])
+
+    elif typeClass == "H5T_ARRAY":
+        if "dims" not in typeItem:
+            raise KeyError("'dims' must be provided for array types")
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_ENUM":
+        if "base" not in typeItem:
+            raise KeyError("'base' must be provided for enum types")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_REFERENCE":
+        if "length" in typeItem:
+            item_size = typeItem["length"]
+        elif "base" in typeItem and typeItem["base"] == "H5T_STD_REF_OBJ":
+            # obj ref values are in the form: "groups/<id>" or
+            # "datasets/<id>" or "datatypes/<id>"
+            item_size = 48
+        else:
+            item_size = 80  # tb: just take a guess at this for now
+    elif typeClass == "H5T_COMPOUND":
+        if "fields" not in typeItem:
+            raise KeyError("'fields' not provided for compound type")
+        fields = typeItem["fields"]
+        if not isinstance(fields, list):
+            raise TypeError("Type Error: expected list type for 'fields'")
+        if not fields:
+            raise KeyError("no 'field' elements provided")
+        # add up the size of each sub-field
+        for field in fields:
+            if not isinstance(field, dict):
+                raise TypeError("Expected dictionary type for field")
+            if "type" not in field:
+                raise KeyError("'type' missing from field")
+            subtype_size = getItemSize(field["type"])  # recursive call
+            if subtype_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"
+                break  # don't need to look at the rest
+
+            item_size += subtype_size
+    else:
+        raise TypeError("Invalid type class")
+
+    # calculate array type
+    if "dims" in typeItem and isinstance(item_size, int):
+        dims = typeItem["dims"]
+        for dim in dims:
+            item_size *= dim
+
+    return item_size
+
+
+def getDtypeItemSize(dtype):
+    """ Return size of dtype in bytes
+        For variable length types (e.g. variable length strings),
+        return the string "H5T_VARIABLE
+    """
+    item_size = 0
+    if len(dtype) > 0:
+        # compound dtype
+        for i in range(len(dtype)):
+            sub_dt = dtype[i]
+            sub_dt_size = getDtypeItemSize(sub_dt)
+            if sub_dt_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"  # return variable if any component is variable
+                break
+            item_size += sub_dt_size
+    else:
+        # primitive type
+        if dtype.shape:
+            base_size = getDtypeItemSize(dtype.base)
+            if base_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"
+            else:
+                nelements = np.prod(dtype.shape)
+                item_size = base_size * nelements
+        else:
+            if dtype.metadata and "vlen" in dtype.metadata:
+                item_size = "H5T_VARIABLE"
+            else:
+                item_size = dtype.itemsize
+    return item_size
+
+
 def getNumpyTypename(hdf5TypeName, typeClass=None):
     predefined_int_types = {
         "H5T_STD_I8": "i1",
@@ -332,7 +567,11 @@ def getNumpyTypename(hdf5TypeName, typeClass=None):
         "H5T_STD_I64": "i8",
         "H5T_STD_U64": "u8",
     }
-    predefined_float_types = {"H5T_IEEE_F32": "f4", "H5T_IEEE_F64": "f8"}
+    predefined_float_types = {
+        "H5T_IEEE_F16": "f2",
+        "H5T_IEEE_F32": "f4",
+        "H5T_IEEE_F64": "f8",
+    }
 
     if len(hdf5TypeName) < 3:
         raise Exception("Type Error: invalid typename: ")
@@ -356,7 +595,6 @@ def getNumpyTypename(hdf5TypeName, typeClass=None):
 
 
 def createBaseDataType(typeItem):
-
     dtRet = None
     if isinstance(typeItem, str):
         # should be one of the predefined types
@@ -371,20 +609,32 @@ def createBaseDataType(typeItem):
         raise KeyError("'class' not provided")
     typeClass = typeItem["class"]
 
+    dims = ""
+    if "dims" in typeItem:
+        if typeClass != "H5T_ARRAY":
+            raise TypeError("'dims' only supported for integer types")
+
+        dims = None
+        if isinstance(typeItem["dims"], int):
+            dims = typeItem["dims"]  # make into a tuple
+        elif not isinstance(typeItem["dims"], list) and not isinstance(
+            typeItem["dims"], tuple
+        ):
+            raise TypeError("expected list or integer for dims")
+        else:
+            dims = typeItem["dims"]
+        dims = str(tuple(dims))
+
     if typeClass == "H5T_INTEGER":
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for integer types")
         baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_INTEGER")
-        dtRet = np.dtype(baseType)
+        dtRet = np.dtype(dims + baseType)
     elif typeClass == "H5T_FLOAT":
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for floating point types")
         baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_FLOAT")
-        dtRet = np.dtype(baseType)
+        dtRet = np.dtype(dims + baseType)
     elif typeClass == "H5T_STRING":
         if "length" not in typeItem:
             raise KeyError("'length' not provided")
@@ -392,8 +642,9 @@ def createBaseDataType(typeItem):
             raise KeyError("'charSet' not provided")
 
         if typeItem["length"] == "H5T_VARIABLE":
-            if "dims" in typeItem:
-                raise TypeError("'dims' not supported for variable types")
+            if dims:
+                msg = "ArrayType is not supported for variable len types"
+                raise TypeError(msg)
             if typeItem["charSet"] == "H5T_CSET_ASCII":
                 dtRet = special_dtype(vlen=bytes)
             elif typeItem["charSet"] == "H5T_CSET_UTF8":
@@ -408,20 +659,25 @@ def createBaseDataType(typeItem):
             if typeItem["charSet"] == "H5T_CSET_ASCII":
                 type_code = "S"
             elif typeItem["charSet"] == "H5T_CSET_UTF8":
-                raise TypeError("fixed-width unicode strings are not supported")
+                # use the same type_code as ascii strings
+                # (othewise, numpy will reserve bytes for UTF32 representation)
+                type_code = "S"
             else:
                 raise TypeError("unexpected 'charSet' value")
-            dtRet = np.dtype(type_code + str(nStrSize))  # fixed size string
+            # a fixed size string
+            dtRet = np.dtype(dims + type_code + str(nStrSize))
     elif typeClass == "H5T_VLEN":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for vlen types")
+        if dims:
+            msg = "ArrayType is not supported for variable len types"
+            raise TypeError(msg)
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         baseType = createBaseDataType(typeItem["base"])
         dtRet = special_dtype(vlen=np.dtype(baseType))
     elif typeClass == "H5T_OPAQUE":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for opaque types")
+        if dims:
+            msg = "Opaque Type is not supported for variable len types"
+            raise TypeError(msg)
         if "size" not in typeItem:
             raise KeyError("'size' not provided")
         nSize = int(typeItem["size"])
@@ -429,26 +685,19 @@ def createBaseDataType(typeItem):
             raise TypeError("'size' must be non-negative")
         dtRet = np.dtype("V" + str(nSize))
     elif typeClass == "H5T_ARRAY":
-        if "dims" not in typeItem:
+        if not dims:
             raise KeyError("'dims' must be provided for array types")
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         arrayBaseType = typeItem["base"]
-        if type(arrayBaseType) is dict:
+        if isinstance(arrayBaseType, dict):
             if "class" not in arrayBaseType:
                 raise KeyError("'class' not provided for array base type")
-            if arrayBaseType["class"] not in (
-                "H5T_INTEGER",
-                "H5T_FLOAT",
-                "H5T_STRING",
-                "H5T_COMPOUND",
-            ):
-                raise TypeError(
-                    f"{arrayBaseType['class']}: H5T_ARRAY base type not supported."
-                )
-
-        dt_base = createDataType(arrayBaseType)
-
+            type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_COMPOUND", "H5T_ARRAY")
+            if arrayBaseType["class"] not in type_classes:
+                msg = "Array Type base type must be integer, float, string, compound or array"
+                raise TypeError(msg)
+        baseType = createDataType(arrayBaseType)
         if isinstance(typeItem["dims"], int):
             dims = typeItem["dims"]  # make into a tuple
         elif type(typeItem["dims"]) not in (list, tuple):
@@ -457,11 +706,17 @@ def createBaseDataType(typeItem):
             dims = typeItem["dims"]
         # create an array type of the base type
 
-        dtRet = np.dtype((dt_base, dims))
-
+        dtRet = np.dtype((baseType, dims))
+        """
+        metadata = None
+        if baseType.metadata:
+            metadata = dict(baseType.metadata)
+            dtRet = np.dtype(dims + baseType.str, metadata=metadata)
+        else:
+            dtRet = np.dtype(dims + baseType.str)
+        return dtRet  # return predefined type
+        """
     elif typeClass == "H5T_REFERENCE":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for reference types")
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         if typeItem["base"] == "H5T_STD_REF_OBJ":
@@ -470,6 +725,7 @@ def createBaseDataType(typeItem):
             dtRet = special_dtype(ref=RegionReference)
         else:
             raise TypeError("Invalid base type for reference type")
+
     elif typeClass == "H5T_ENUM":
         if "base" not in typeItem:
             raise KeyError("Expected 'base' to be provided for enum type")
@@ -477,21 +733,32 @@ def createBaseDataType(typeItem):
         if "class" not in base_json:
             raise KeyError("Expected class field in base type")
         if base_json["class"] != "H5T_INTEGER":
-            raise TypeError("Only integer base types can be used with enum type")
-        if "members" not in typeItem:
-            raise KeyError("'members' not provided for enum type")
-        members = typeItem["members"]
-        if len(members) == 0:
-            raise KeyError("empty enum members")
+            msg = "Only integer base types can be used with enum type"
+            raise TypeError(msg)
+        if "mapping" not in typeItem:
+            raise KeyError("'mapping' not provided for enum type")
+        mapping = typeItem["mapping"]
+        if len(mapping) == 0:
+            raise KeyError("empty enum map")
 
         dt = createBaseDataType(base_json)
-        values_dict = dict((m["name"], m["value"]) for m in members)
-        if (
-            dt.kind == "i"
-            and dt.name == "int8"
-            and len(members) == 2
-            and "TRUE" in values_dict
-            and "FALSE" in values_dict
+        if isinstance(mapping, list):
+            # convert to a dictionary
+            values_dict = dict((m["name"], m["value"]) for m in mapping)
+        elif isinstance(mapping, dict):
+            # just use as is
+            values_dict = mapping
+        else:
+            raise TypeError("Expected dict or list mapping for enum type")
+
+        if all(
+            (
+                dt.kind == "i",
+                dt.name == "int8",
+                len(mapping) == 2,
+                "TRUE" in values_dict,
+                "FALSE" in values_dict,
+            )
         ):
             # convert to numpy boolean type
             dtRet = np.dtype("bool")
@@ -505,14 +772,12 @@ def createBaseDataType(typeItem):
     return dtRet
 
 
-"""
-Create a numpy datatype given a json type
-"""
-
-
 def createDataType(typeItem):
+    """
+    Create a numpy datatype given a json type
+    """
     dtRet = None
-    if isinstance(typeItem, (str, bytes)):
+    if type(typeItem) in (str, bytes):
         # should be one of the predefined types
         dtName = getNumpyTypename(typeItem)
         dtRet = np.dtype(dtName)
@@ -543,20 +808,90 @@ def createDataType(typeItem):
             if "type" not in field:
                 raise KeyError("'type' missing from field")
             field_name = field["name"]
-            if isinstance(field_name, str):
-                # verify the field name is ascii
-                try:
-                    field_name.encode("ascii")
-                except UnicodeDecodeError:
-                    raise TypeError("non-ascii field name not allowed")
+            if not isinstance(field_name, str):
+                raise TypeError("field names must be strings")
+            # verify the field name is ascii
+            try:
+                field_name.encode("ascii")
+            except UnicodeEncodeError:
+                raise TypeError("non-ascii field name not allowed")
 
             dt = createDataType(field["type"])  # recursive call
             if dt is None:
                 raise Exception("unexpected error")
-            subtypes.append((field_name, dt))  # append tuple
+            subtypes.append((field["name"], dt))  # append tuple
 
         dtRet = np.dtype(subtypes)
-
     else:
         dtRet = createBaseDataType(typeItem)  # create non-compound dt
     return dtRet
+
+
+def validateTypeItem(typeItem):
+    """
+    Validate a json type - call createDataType and if no exception,
+       it's valid
+    """
+    createDataType(typeItem)
+    # throws KeyError, TypeError, or ValueError
+
+
+def getBaseTypeJson(type_name):
+    """
+    Return JSON representation of a predefined type string
+    """
+    predefined_int_types = (
+        "H5T_STD_I8",
+        "H5T_STD_U8",
+        "H5T_STD_I16",
+        "H5T_STD_U16",
+        "H5T_STD_I32",
+        "H5T_STD_U32",
+        "H5T_STD_I64",
+        "H5T_STD_U64",
+    )
+    predefined_float_types = ("H5T_IEEE_F16", "H5T_IEEE_F32", "H5T_IEEE_F64")
+    type_json = {}
+    # predefined typenames start with 'H5T' and end with "LE" or "BE"
+    if all(
+        (
+            type_name.startswith("H5T_"),
+            type_name[-1] == "E",
+            type_name[-2] in ("L", "B"),
+        )
+    ):
+        # trime of the "BE/"LE"
+        type_prefix = type_name[:-2]
+        if type_prefix in predefined_int_types:
+            type_json["class"] = "H5T_INTEGER"
+            type_json["base"] = type_name
+        elif type_prefix in predefined_float_types:
+            type_json["class"] = "H5T_FLOAT"
+            type_json["base"] = type_name
+        else:
+            raise TypeError("Invalid type name")
+    else:
+        raise TypeError("Invalid type name")
+    return type_json
+
+
+def getSubType(dt_parent, fields):
+    """ Return a dtype that is a compound type composed of
+        the fields given in the field_names list
+    """
+    if len(dt_parent) == 0:
+        raise TypeError("getSubType - parent must be compound type")
+    if not fields:
+        raise TypeError("null field specification")
+    if isinstance(fields, str):
+        fields = [fields,]  # convert to a list
+
+    field_names = set(dt_parent.names)
+    dt_items = []
+    for field in fields:
+        if field not in field_names:
+            raise TypeError(f"field: {field} is not defined in parent type")
+        dt_items.append((field, dt_parent[field]))
+    dt = np.dtype(dt_items)
+
+    return dt
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 6a310c60..9ac6578d 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -841,7 +841,6 @@ def testCreateReferenceAttribute(self):
             ]
             db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
             item = db.getAttributeItem("groups", root_uuid, "A1")
-
             attr_type = item["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
@@ -1275,7 +1274,6 @@ def testGetEvalStr(self):
             for query in queries.keys():
                 eval_str = db._getEvalStr(query, fields)
                 self.assertEqual(eval_str, queries[query])
-                # print(query, "->", eval_str)
 
     def testBadQuery(self):
         queries = (
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index 0f67d7bf..7101286a 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -2,8 +2,8 @@
 # Copyright by The HDF Group.                                                #
 # All rights reserved.                                                       #
 #                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
 # terms governing use, modification, and redistribution, is contained in     #
 # the file COPYING, which can be found at the root of the source code        #
 # distribution tree.  If you do not have access to this file, you may        #
@@ -12,11 +12,12 @@
 import unittest
 import logging
 import numpy as np
-from h5py import special_dtype
-from h5py import check_dtype
-from h5py import Reference
-from h5py import RegionReference
+
 from h5json import hdf5dtype
+from h5json.hdf5dtype import special_dtype
+from h5json.hdf5dtype import check_dtype
+from h5json.hdf5dtype import Reference
+from h5json.hdf5dtype import RegionReference
 
 
 class Hdf5dtypeTest(unittest.TestCase):
@@ -26,6 +27,31 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.INFO)
 
+    def testGetBaseTypeJson(self):
+        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F64LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_IEEE_F64LE")
+
+        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F16LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_IEEE_F16LE")
+
+        type_json = hdf5dtype.getBaseTypeJson("H5T_STD_I32LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_INTEGER")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_STD_I32LE")
+
+        try:
+            hdf5dtype.getBaseTypeJson("foobar")
+            self.assertTrue(False)
+        except TypeError:
+            pass  # expected
+
     def testBaseIntegerTypeItem(self):
         dt = np.dtype("<i1")
         typeItem = hdf5dtype.getTypeItem(dt)
@@ -44,6 +70,15 @@ def testBaseFloatTypeItem(self):
         self.assertEqual(typeItem["class"], "H5T_FLOAT")
         self.assertEqual(typeItem["base"], "H5T_IEEE_F64LE")
 
+    def testBaseFloat16TypeItem(self):
+        dt = np.dtype("<f2")
+        typeItem = hdf5dtype.getTypeItem(dt)
+        self.assertEqual(typeItem["class"], "H5T_FLOAT")
+        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
+        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
+        self.assertEqual(typeItem["class"], "H5T_FLOAT")
+        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
+
     def testBaseStringTypeItem(self):
         dt = np.dtype("S3")
         typeItem = hdf5dtype.getTypeItem(dt)
@@ -54,12 +89,12 @@ def testBaseStringTypeItem(self):
 
     def testBaseStringUTFTypeItem(self):
         dt = np.dtype("U3")
-        try:
-            # typeItem = hdf5dtype.getTypeItem(dt)
-            hdf5dtype.getTypeItem(dt)
-            self.assertTrue(False)  # expected exception
-        except TypeError:
-            pass  # expected
+        typeItem = hdf5dtype.getTypeItem(dt)
+        self.assertEqual(typeItem["class"], "H5T_STRING")
+        # type item length in bytes (may no actual be enough space for some UTF strings)
+        self.assertEqual(typeItem["length"], 12)
+        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(typeItem["charSet"], "H5T_CSET_UTF8")
 
     def testBaseVLenAsciiTypeItem(self):
         dt = special_dtype(vlen=bytes)
@@ -90,9 +125,8 @@ def testBaseEnumTypeItem(self):
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("members" in typeItem)
-        mapp_out = dict((m["name"], m["value"]) for m in typeItem["members"])
-        self.assertEqual(mapp_out["GREEN"], 1)
+        self.assertTrue("mapping" in typeItem)
+        self.assertEqual(typeItem["mapping"]["GREEN"], 1)
         self.assertEqual(typeSize, 1)
 
     def testBaseBoolTypeItem(self):
@@ -102,10 +136,9 @@ def testBaseBoolTypeItem(self):
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("members" in typeItem)
-        members = typeItem["members"]
-        self.assertEqual(len(members), 2)
-        mapping = dict((m["name"], m["value"]) for m in members)
+        self.assertTrue("mapping" in typeItem)
+        mapping = typeItem["mapping"]
+        self.assertEqual(len(mapping), 2)
         self.assertEqual(mapping["FALSE"], 0)
         self.assertEqual(mapping["TRUE"], 1)
         self.assertEqual(typeSize, 1)
@@ -121,21 +154,23 @@ def testBaseArrayTypeItem(self):
         self.assertEqual(typeSize, 16)
 
     def testObjReferenceTypeItem(self):
-        # dt = np.dtype('S48', metadata={'ref': val.__class__})
         dt = special_dtype(ref=Reference)
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_REFERENCE")
         self.assertEqual(typeItem["base"], "H5T_STD_REF_OBJ")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
+        # length of obj id, e.g.:
+        # g-b2c9a750-a557-11e7-ab09-0242ac110009
+        self.assertEqual(typeSize, 48)
 
     def testRegionReferenceTypeItem(self):
         dt = special_dtype(ref=RegionReference)
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 48)
         self.assertEqual(typeItem["class"], "H5T_REFERENCE")
-        self.assertEqual(typeItem["base"], "H5T_STD_REF_DSETREG")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
+        # self.assertEqual(typeItem['base'], 'H5T_STD_REF_DSETREG')
+        # self.assertEqual(typeSize, 'H5T_VARIABLE')
 
     def testCompoundArrayTypeItem(self):
         dt = np.dtype([("a", "<i1"), ("b", "S1", (10,))])
@@ -157,6 +192,28 @@ def testCompoundArrayTypeItem(self):
         self.assertEqual(field_b_basetype["class"], "H5T_STRING")
         self.assertEqual(typeSize, 11)
 
+    def testEnumArrayTypeItem(self):
+        mapping = {"RED": 0, "GREEN": 1, "BLUE": 2}
+        dt_enum = special_dtype(enum=(np.int8, mapping))
+        typeItem = hdf5dtype.getTypeItem(dt_enum)
+        dt_array = np.dtype("(2,3)" + dt_enum.str, metadata=dict(dt_enum.metadata))
+
+        typeItem = hdf5dtype.getTypeItem(dt_array)
+
+        self.assertEqual(typeItem["class"], "H5T_ARRAY")
+        self.assertTrue("dims" in typeItem)
+        self.assertEqual(typeItem["dims"], (2, 3))
+        baseItem = typeItem["base"]
+        self.assertEqual(baseItem["class"], "H5T_ENUM")
+        self.assertTrue("mapping" in baseItem)
+        self.assertEqual(baseItem["mapping"]["GREEN"], 1)
+        self.assertTrue("base" in baseItem)
+        basePrim = baseItem["base"]
+        self.assertEqual(basePrim["class"], "H5T_INTEGER")
+        self.assertEqual(basePrim["base"], "H5T_STD_I8LE")
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 6)  # one-byte for base enum type * shape of (2,3)
+
     def testCompoundArrayVlenIntTypeItem(self):
         dt_vlen = special_dtype(vlen=np.int32)
         dt_arr = np.dtype((dt_vlen, (4,)))
@@ -165,7 +222,8 @@ def testCompoundArrayVlenIntTypeItem(self):
         )
         typeItem = hdf5dtype.getTypeItem(dt_compound)
 
-        # typeSize = hdf5dtype.getItemSize(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         self.assertEqual(typeItem["class"], "H5T_COMPOUND")
         fields = typeItem["fields"]
         field_a = fields[0]
@@ -197,7 +255,8 @@ def testCompoundArrayVlenStringTypeItem(self):
         )
         typeItem = hdf5dtype.getTypeItem(dt_compound)
 
-        # typeSize = hdf5dtype.getItemSize(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         self.assertEqual(typeItem["class"], "H5T_COMPOUND")
         fields = typeItem["fields"]
         field_a = fields[0]
@@ -338,13 +397,14 @@ def testCreateBaseStringType(self):
         self.assertEqual(typeSize, 6)
 
     def testCreateBaseUnicodeType(self):
-        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 32}
-        try:
-            # dt = hdf5dtype.createDataType(typeItem)
-            hdf5dtype.createDataType(typeItem)
-            self.assertTrue(False)  # expected exception
-        except TypeError:
-            pass
+        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 6}
+
+        dt = hdf5dtype.createDataType(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertTrue(dt is not None)
+        self.assertEqual(dt.name, "bytes48")
+        self.assertEqual(dt.kind, "S")  # uses byte
+        self.assertEqual(typeSize, 6)
 
     def testCreateNullTermStringType(self):
         typeItem = {
@@ -355,6 +415,7 @@ def testCreateNullTermStringType(self):
         }
         typeSize = hdf5dtype.getItemSize(typeItem)
         dt = hdf5dtype.createDataType(typeItem)
+
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")
         self.assertEqual(typeSize, 6)
@@ -388,10 +449,10 @@ def testCreateVLenUTF8Type(self):
     def testCreateVLenDataType(self):
         typeItem = {"class": "H5T_VLEN", "base": "H5T_STD_I32BE"}
         typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "object")
         self.assertEqual(dt.kind, "O")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
 
     def testCreateOpaqueType(self):
         typeItem = {"class": "H5T_OPAQUE", "size": 200}
@@ -405,12 +466,7 @@ def testCreateEnumType(self):
         typeItem = {
             "class": "H5T_ENUM",
             "base": {"base": "H5T_STD_I16LE", "class": "H5T_INTEGER"},
-            "members": [
-                {"name": "GAS", "value": 2},
-                {"name": "LIQUID", "value": 1},
-                {"name": "PLASMA", "value": 3},
-                {"name": "SOLID", "value": 0},
-            ],
+            "mapping": {"GAS": 2, "LIQUID": 1, "PLASMA": 3, "SOLID": 0},
         }
 
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -429,7 +485,7 @@ def testCreateBoolType(self):
         typeItem = {
             "class": "H5T_ENUM",
             "base": {"base": "H5T_STD_I8LE", "class": "H5T_INTEGER"},
-            "members": [{"name": "TRUE", "value": 1}, {"name": "FALSE", "value": 0}],
+            "mapping": {"TRUE": 1, "FALSE": 0},
         }
 
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -437,6 +493,35 @@ def testCreateBoolType(self):
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "bool")
         self.assertEqual(dt.kind, "b")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+
+    def testCreateReferenceType(self):
+        typeItem = {
+            "class": "H5T_REFERENCE",
+            "base": "H5T_STD_REF_OBJ",
+            "length": 48,
+            "charSet": "H5T_CSET_ASCII",
+            "strPad": "H5T_STR_NULLPAD"
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 48)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.kind, "S")
+        self.assertTrue(dt.metadata['ref'] is Reference)
+        self.assertEqual(check_dtype(ref=dt), Reference)
+
+    def testCreateVlenReferenceType(self):
+        typeItem = {
+            'class': 'H5T_VLEN',
+            'base': {'class': 'H5T_REFERENCE', 'base': 'H5T_STD_REF_OBJ'}
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 'H5T_VARIABLE')
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.kind, "O")
+        base = dt.metadata['vlen']
+        self.assertTrue(base.metadata['ref'] is Reference)
+        self.assertEqual(check_dtype(ref=base), Reference)
 
     def testCreateCompoundType(self):
         typeItem = {
@@ -461,11 +546,34 @@ def testCreateCompoundType(self):
         self.assertEqual(dt.name, "void144")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 4)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+
         dtLocation = dt[2]
         self.assertEqual(dtLocation.name, "object")
         self.assertEqual(dtLocation.kind, "O")
         self.assertEqual(check_dtype(vlen=dtLocation), bytes)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dtLocation))
+
+    def testCreateCompoundInvalidFieldName(self):
+        typeItem = {
+            "class": "H5T_COMPOUND",
+            "fields": [
+                {
+                    "name": "\u03b1",
+                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
+                },
+                {
+                    "name": "\u03c9",
+                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
+                },
+            ],
+        }
+        try:
+            hdf5dtype.createDataType(typeItem)
+            self.assertTrue(False)
+        except TypeError:
+            pass  # expected
 
     def testCreateCompoundOfCompoundType(self):
         typeItem = {
@@ -552,6 +660,7 @@ def testCreateCompoundTypeUnicodeFields(self):
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 3)
         self.assertEqual(typeSize, 10)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
 
     def testCreateArrayType(self):
         typeItem = {"class": "H5T_ARRAY", "base": "H5T_STD_I64LE", "dims": (3, 5)}
@@ -560,15 +669,42 @@ def testCreateArrayType(self):
         self.assertEqual(dt.name, "void960")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(typeSize, 120)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+
+    def testCreateCompoundArrayVlenType(self):
+        typeItem = {
+            "fields": [
+                {"type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"}, "name": "VALUE"},
+                {"type": {"class": "H5T_FLOAT", "base": "H5T_IEEE_F64BE"}, "name": "VALUE2"},
+                {"type": {"class": "H5T_ARRAY", "dims": [8],
+                          "base": {
+                              "class": "H5T_STRING",
+                              "charSet": "H5T_CSET_ASCII",
+                              "strPad": "H5T_STR_NULLTERM",
+                              "length": "H5T_VARIABLE"
+                            }
+                          },
+                 "name": "VALUE3"}
+                ],
+            "class": "H5T_COMPOUND"
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.name, "void640")
+        self.assertEqual(dt.kind, "V")
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        dt_arr = dt["VALUE3"]
+        self.assertEqual(dt_arr.kind, "V")
+        self.assertEqual(dt_arr.shape, (8,))
+        self.assertEqual(dt_arr.metadata, None)
 
     def testCreateArrayIntegerType(self):
         typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I64LE", "dims": (3, 5)}
 
         try:
             hdf5dtype.createDataType(typeItem)
-            self.assertTrue(
-                False
-            )  # expected exception - dims used with none array type
+            self.assertTrue(False)  # expected exception - dims used with non-array type
         except TypeError:
             pass  # should get exception
 
@@ -611,6 +747,43 @@ def testCreateCompoundArrayType(self):
         self.assertTrue("a" in dt.fields.keys())
         self.assertTrue("b" in dt.fields.keys())
         self.assertEqual(typeSize, 11)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+
+    def testCompoundArrayType(self):
+        typeItem = {
+            "class": "H5T_COMPOUND",
+            "fields": [
+                {
+                    "type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"},
+                    "name": "VALUE1",
+                },
+                {
+                    "type": {"class": "H5T_FLOAT", "base": "H5T_IEEE_F64BE"},
+                    "name": "VALUE2",
+                },
+                {
+                    "type": {
+                        "class": "H5T_ARRAY",
+                        "dims": [2],
+                        "base": {
+                            "class": "H5T_STRING",
+                            "charSet": "H5T_CSET_ASCII",
+                            "strPad": "H5T_STR_NULLTERM",
+                            "length": "H5T_VARIABLE",
+                        },
+                    },
+                    "name": "VALUE3",
+                },
+            ],
+        }
+        dt = hdf5dtype.createDataType(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(len(dt), 3)
+        self.assertTrue("VALUE1" in dt.fields.keys())
+        self.assertTrue("VALUE2" in dt.fields.keys())
+        self.assertTrue("VALUE3" in dt.fields.keys())
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
 
 
 if __name__ == "__main__":

From 3a2b0847ec2636df16247ffcfe4bc60cecf8f044 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 8 Feb 2025 18:46:29 +0800
Subject: [PATCH 004/129] patch flake8 error

---
 test/unit/hdf5dtype_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index 7101286a..a645dc07 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -682,10 +682,10 @@ def testCreateCompoundArrayVlenType(self):
                               "charSet": "H5T_CSET_ASCII",
                               "strPad": "H5T_STR_NULLTERM",
                               "length": "H5T_VARIABLE"
-                            }
+                            } # noqa: E126
                           },
                  "name": "VALUE3"}
-                ],
+                ], # noqa: E123
             "class": "H5T_COMPOUND"
         }
         typeSize = hdf5dtype.getItemSize(typeItem)

From 133e962c1f25a02f63b170c39747d263ca740187 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 8 Feb 2025 18:47:35 +0800
Subject: [PATCH 005/129] patch flake8 error

---
 test/unit/hdf5dtype_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index a645dc07..dbc806bb 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -682,10 +682,10 @@ def testCreateCompoundArrayVlenType(self):
                               "charSet": "H5T_CSET_ASCII",
                               "strPad": "H5T_STR_NULLTERM",
                               "length": "H5T_VARIABLE"
-                            } # noqa: E126
+                            }  # noqa: E126
                           },
                  "name": "VALUE3"}
-                ], # noqa: E123
+                ],  # noqa: E123
             "class": "H5T_COMPOUND"
         }
         typeSize = hdf5dtype.getItemSize(typeItem)

From 856ee6502641d71c0dae799b44275f3a9df38c24 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 9 Feb 2025 09:38:19 +0800
Subject: [PATCH 006/129] keep backward compatibility for enum members key

---
 data/json/bool_attr.json | 2 +-
 data/json/bool_dset.json | 2 +-
 data/json/enum_attr.json | 2 +-
 data/json/enum_dset.json | 2 +-
 src/h5json/hdf5db.py     | 2 +-
 src/h5json/hdf5dtype.py  | 8 ++++++--
 6 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/data/json/bool_attr.json b/data/json/bool_attr.json
index 6d4d24da..ff092b9a 100644
--- a/data/json/bool_attr.json
+++ b/data/json/bool_attr.json
@@ -20,7 +20,7 @@
                             "class": "H5T_INTEGER"
                         },
                         "class": "H5T_ENUM",
-                        "mapping": [
+                        "members": [
                             {
                                 "name": "FALSE",
                                 "value": 0
diff --git a/data/json/bool_dset.json b/data/json/bool_dset.json
index 11f19e01..29e46d80 100644
--- a/data/json/bool_dset.json
+++ b/data/json/bool_dset.json
@@ -24,7 +24,7 @@
                     "class": "H5T_INTEGER"
                 },
                 "class": "H5T_ENUM",
-                "mapping": [
+                "members": [
                     {
                         "name": "FALSE",
                         "value": 0
diff --git a/data/json/enum_attr.json b/data/json/enum_attr.json
index e39425ef..9e9d94a9 100644
--- a/data/json/enum_attr.json
+++ b/data/json/enum_attr.json
@@ -21,7 +21,7 @@
                             "class": "H5T_INTEGER"
                         },
                         "class": "H5T_ENUM",
-                        "mapping": [
+                        "members": [
                             {
                                 "name": "GAS",
                                 "value": 2
diff --git a/data/json/enum_dset.json b/data/json/enum_dset.json
index 08291696..d2afcd4a 100644
--- a/data/json/enum_dset.json
+++ b/data/json/enum_dset.json
@@ -25,7 +25,7 @@
                     "class": "H5T_INTEGER"
                 },
                 "class": "H5T_ENUM",
-                "mapping": [
+                "members": [
                     {
                         "name": "GAS",
                         "value": 2
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 112fb867..db48eda3 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -797,7 +797,7 @@ def getObjByPath(self, path):
     def getObjectByUuid(self, col_type, obj_uuid):
         # col_type should be either "datasets", "groups", or "datatypes"
         if col_type not in ("datasets", "groups", "datatypes"):
-            msg = "Unexpectd error, invalid col_type: [" + col_type + "]"
+            msg = "Unexpected error, invalid col_type: [" + col_type + "]"
             self.log.error(msg)
             raise IOError(errno.EIO, msg)
         if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]:
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index fecf38f0..9c565ce0 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -735,9 +735,13 @@ def createBaseDataType(typeItem):
         if base_json["class"] != "H5T_INTEGER":
             msg = "Only integer base types can be used with enum type"
             raise TypeError(msg)
-        if "mapping" not in typeItem:
+        if "mapping" in typeItem:
+            mapping = typeItem["mapping"]
+        elif "members" in typeItem:
+            mapping = typeItem["members"]  # backward-compatibility for hdf5-json
+        else:
             raise KeyError("'mapping' not provided for enum type")
-        mapping = typeItem["mapping"]
+
         if len(mapping) == 0:
             raise KeyError("empty enum map")
 

From eec4efce6706e89ffbad4e6cc9d500f71cfa8216 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 12 Feb 2025 22:39:11 +0800
Subject: [PATCH 007/129] first pass at abstrct db class

---
 src/h5json/array_util.py     |  730 +++++++
 src/h5json/dset_util.py      |  114 +
 src/h5json/hdf5db.py         | 3942 ++++------------------------------
 src/h5json/hdf5dtype.py      |   77 +-
 src/h5json/objid.py          |    2 +
 test/unit/array_util_test.py | 1021 +++++++++
 test/unit/hdf5db_test.py     | 1449 +++----------
 7 files changed, 2642 insertions(+), 4693 deletions(-)
 create mode 100644 src/h5json/array_util.py
 create mode 100644 src/h5json/dset_util.py
 create mode 100644 test/unit/array_util_test.py

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
new file mode 100644
index 00000000..bef4587e
--- /dev/null
+++ b/src/h5json/array_util.py
@@ -0,0 +1,730 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import math
+import base64
+import binascii
+import numpy as np
+
+MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
+
+
+def bytesArrayToList(data):
+    """
+    Convert list that may contain bytes type elements to list of string elements
+
+    TBD: Need to deal with non-string byte data (hexencode?)
+    """
+    if type(data) in (bytes, str):
+        is_list = False
+    elif isinstance(data, (np.ndarray, np.generic)):
+        if len(data.shape) == 0:
+            is_list = False
+            data = data.tolist()  # tolist will return a scalar in this case
+            if type(data) in (list, tuple):
+                is_list = True
+            else:
+                is_list = False
+        else:
+            is_list = True
+    elif type(data) in (list, tuple):
+        is_list = True
+    else:
+        is_list = False
+
+    if is_list:
+        out = []
+        for item in data:
+            try:
+                rec_item = bytesArrayToList(item)  # recursive call
+                out.append(rec_item)
+            except ValueError as err:
+                raise err
+    elif type(data) is bytes:
+        try:
+            out = data.decode("utf-8")
+        except UnicodeDecodeError as err:
+            raise ValueError(err)
+    else:
+        out = data
+
+    return out
+
+
+def toTuple(rank, data):
+    """
+    Convert a list to a tuple, recursively.
+    Example. [[1,2],[3,4]] -> ((1,2),(3,4))
+    """
+    if type(data) in (list, tuple):
+        if rank > 0:
+            return list(toTuple(rank - 1, x) for x in data)
+        else:
+            return tuple(toTuple(rank - 1, x) for x in data)
+    else:
+        if isinstance(data, str):
+            data = data.encode("utf8")
+        return data
+
+
+def getArraySize(arr):
+    """
+    Get size in bytes of a numpy array.
+    """
+    nbytes = arr.dtype.itemsize
+    for n in arr.shape:
+        nbytes *= n
+    return nbytes
+
+
+def getNumElements(dims):
+    """
+    Get num elements defined by a shape
+    """
+    num_elements = 0
+    if isinstance(dims, int):
+        num_elements = dims
+    elif isinstance(dims, (list, tuple)):
+        num_elements = 1
+        for dim in dims:
+            num_elements *= dim
+    else:
+        raise ValueError("Unexpected argument")
+    return num_elements
+
+
+def isVlen(dt):
+    """
+    Return True if the type contains variable length elements
+    """
+    is_vlen = False
+    if len(dt) > 1:
+        names = dt.names
+        for name in names:
+            if isVlen(dt[name]):
+                is_vlen = True
+                break
+    else:
+        if dt.metadata and "vlen" in dt.metadata:
+            is_vlen = True
+    return is_vlen
+
+
+def jsonToArray(data_shape, data_dtype, data_json):
+    """
+    Return numpy array from the given json array.
+    """
+    def fillVlenArray(rank, data, arr, index):
+        for i in range(len(data)):
+            if rank > 1:
+                index = fillVlenArray(rank - 1, data[i], arr, index)
+            else:
+                arr[index] = data[i]
+                index += 1
+        return index
+
+    if data_json is None:
+        return np.array([]).astype(data_dtype)
+
+    if isinstance(data_json, (list, tuple)):
+        if None in data_json:
+            return np.array([]).astype(data_dtype)
+
+    # need some special conversion for compound types --
+    # each element must be a tuple, but the JSON decoder
+    # gives us a list instead.
+    if len(data_dtype) > 1 and not isinstance(data_json, (list, tuple)):
+        raise TypeError("expected list data for compound data type")
+    npoints = getNumElements(data_shape)
+    np_shape_rank = len(data_shape)
+
+    if type(data_json) in (list, tuple):
+        converted_data = []
+        if npoints == 1 and len(data_json) == len(data_dtype):
+            converted_data.append(toTuple(0, data_json))
+        else:
+            converted_data = toTuple(np_shape_rank, data_json)
+        data_json = converted_data
+    else:
+        if isinstance(data_json, str):
+            data_json = data_json.encode("utf8")
+        data_json = [data_json,]  # listify
+
+    if isVlen(data_dtype):
+        arr = np.zeros((npoints,), dtype=data_dtype)
+        fillVlenArray(np_shape_rank, data_json, arr, 0)
+    else:
+        try:
+            arr = np.array(data_json, dtype=data_dtype)
+        except UnicodeEncodeError as ude:
+            msg = "Unable to encode data"
+            raise ValueError(msg) from ude
+    # raise an exception of the array shape doesn't match the selection shape
+    # allow if the array is a scalar and the selection shape is one element,
+    # numpy is ok with this
+    if arr.size != npoints:
+        msg = "Input data doesn't match selection number of elements"
+        msg += f" Expected {npoints}, but received: {arr.size}"
+        raise ValueError(msg)
+    if arr.shape != data_shape:
+        arr = arr.reshape(data_shape)  # reshape to match selection
+
+    return arr
+
+
+def getElementSize(e, dt):
+    """
+    Get number of byte needed to given element as a bytestream
+    """
+    # print(f"getElementSize - e: {e}  dt: {dt} metadata: {dt.metadata}")
+    if len(dt) > 1:
+        count = 0
+        for name in dt.names:
+            field_dt = dt[name]
+            field_val = e[name]
+            count += getElementSize(field_val, field_dt)
+    elif not dt.metadata or "vlen" not in dt.metadata:
+        count = dt.itemsize  # fixed size element
+    else:
+        # variable length element
+        vlen = dt.metadata["vlen"]
+        if isinstance(e, int):
+            if e == 0:
+                count = 4  # non-initialized element
+            else:
+                raise ValueError("Unexpected value: {}".format(e))
+        elif isinstance(e, bytes):
+            count = len(e) + 4
+        elif isinstance(e, str):
+            count = len(e.encode("utf-8")) + 4
+        elif isinstance(e, np.ndarray):
+            nElements = math.prod(e.shape)
+            if e.dtype.kind != "O":
+                count = e.dtype.itemsize * nElements
+            else:
+                arr1d = e.reshape((nElements,))
+                count = 0
+                for item in arr1d:
+                    count += getElementSize(item, dt)
+            count += 4  # byte count
+        elif isinstance(e, list) or isinstance(e, tuple):
+            if not e:
+                # empty list, just add byte count
+                count = 4
+            else:
+                # not sure how to deal with this
+                count = len(e) * vlen.itemsize + 4  # +4 for byte count
+        else:
+            raise TypeError("unexpected type: {}".format(type(e)))
+    return count
+
+
+def getByteArraySize(arr):
+    """
+    Get number of bytes needed to store given numpy array as a bytestream
+    """
+    if not isVlen(arr.dtype):
+        return arr.itemsize * math.prod(arr.shape)
+    nElements = math.prod(arr.shape)
+    # reshape to 1d for easier iteration
+    arr1d = arr.reshape((nElements,))
+    dt = arr1d.dtype
+    count = 0
+    for e in arr1d:
+        count += getElementSize(e, dt)
+    return count
+
+
+def copyBuffer(src, des, offset):
+    """
+    Copy to buffer at given offset
+    """
+    # print(f"copyBuffer - src: {src} offset: {offset}")
+    # TBD: just do: des[offset:] = src[:]  ?
+    for i in range(len(src)):
+        des[i + offset] = src[i]
+
+    # print("returning:", offset + len(src))
+    return offset + len(src)
+
+
+def copyElement(e, dt, buffer, offset):
+    """
+    Copy element to bytearray
+    """
+    # print(f"copyElement - dt: {dt}  offset: {offset}")
+    if len(dt) > 1:
+        for name in dt.names:
+            field_dt = dt[name]
+            field_val = e[name]
+            offset = copyElement(field_val, field_dt, buffer, offset)
+    elif not dt.metadata or "vlen" not in dt.metadata:
+        # print(f"e vlen: {e} type: {type(e)} itemsize: {dt.itemsize}")
+        e_buf = e.tobytes()
+        # print("tobytes:", e_buf)
+        if len(e_buf) < dt.itemsize:
+            # extend the buffer for fixed size strings
+            # print("extending buffer")
+            e_buf_ex = bytearray(dt.itemsize)
+            for i in range(len(e_buf)):
+                e_buf_ex[i] = e_buf[i]
+            e_buf = bytes(e_buf_ex)
+
+        # print("length:", len(e_buf))
+        offset = copyBuffer(e_buf, buffer, offset)
+    else:
+        # variable length element
+        vlen = dt.metadata["vlen"]
+        # print("copyBuffer vlen:", vlen)
+        if isinstance(e, int):
+            # print("copyBuffer int")
+            if e == 0:
+                # write 4-byte integer 0 to buffer
+                offset = copyBuffer(b"\x00\x00\x00\x00", buffer, offset)
+            else:
+                raise ValueError("Unexpected value: {}".format(e))
+        elif isinstance(e, bytes):
+            # print("copyBuffer bytes")
+            count = np.int32(len(e))
+            if count > MAX_VLEN_ELEMENT:
+                raise ValueError("vlen element too large")
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            offset = copyBuffer(e, buffer, offset)
+        elif isinstance(e, str):
+            # print("copyBuffer, str")
+            text = e.encode("utf-8")
+            count = np.int32(len(text))
+            if count > MAX_VLEN_ELEMENT:
+                raise ValueError("vlen element too large")
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            offset = copyBuffer(text, buffer, offset)
+
+        elif isinstance(e, np.ndarray):
+            nElements = math.prod(e.shape)
+            # print("copyBuffer ndarray, nElements:", nElements)
+
+            if e.dtype.kind != "O":
+                count = np.int32(e.dtype.itemsize * nElements)
+                # print("copyBuffeer got vlen count:", count)
+                # print("copyBuffer e:", e)
+                if count > MAX_VLEN_ELEMENT:
+                    raise ValueError("vlen element too large")
+                offset = copyBuffer(count.tobytes(), buffer, offset)
+                # print("copyBuffer write new count, offset:", offset)
+                offset = copyBuffer(e.tobytes(), buffer, offset)
+                # print("copyBuffer write data, offset:", offset)
+            else:
+                arr1d = e.reshape((nElements,))
+                for item in arr1d:
+                    offset = copyElement(item, dt, buffer, offset)
+
+        elif isinstance(e, list) or isinstance(e, tuple):
+            # print("cooyBuffer list/tuple  vlen:", vlen, "e:", e)
+            count = np.int32(len(e) * vlen.itemsize)
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            if isinstance(e, np.ndarray):
+                arr = e
+            else:
+                arr = np.asarray(e, dtype=vlen)
+            offset = copyBuffer(arr.tobytes(), buffer, offset)
+
+        else:
+            raise TypeError("unexpected type: {}".format(type(e)))
+        # print("buffer: {}".format(buffer))
+    return offset
+
+
+def getElementCount(buffer, offset=0):
+    """
+    Get the count value from persisted vlen array
+    """
+
+    n = offset
+    m = offset + 4
+    count_bytes = bytes(buffer[n:m])
+
+    try:
+        count = int(np.frombuffer(count_bytes, dtype="<i4")[0])
+    except TypeError as e:
+        msg = f"Unexpected error reading count value for varlen element: {e}"
+        raise TypeError(msg)
+    if count < 0:
+        # shouldn't be negative
+        raise ValueError(f"Unexpected count value for varlen element: {count}")
+    if count > MAX_VLEN_ELEMENT:
+        # expect variable length element to be between 0 and 1mb
+        raise ValueError("varlen element size expected to be less than 1MB")
+    return count
+
+
+def readElement(buffer, offset, arr, index, dt):
+    """
+    Read a single element from buffer into array.
+
+    Parameters:
+        buffer (bytearray): Byte array to read an element from.
+        offset (int): Starting offset in the buffer.
+        arr (numpy.ndarray): Array to store the element.
+        index (int): Index in 'arr' at which to store the element.
+        dt (numpy.dtype): Numpy datatype of the element.
+
+    Note: If the provided datatype is a variable-length sequence,
+    this function will read the byte count from the first 4 bytes
+    of the buffer, and then read the entire sequence.
+
+    Returns:
+        int: The updated offset value after reading the element.
+    """
+    if len(dt) > 1:
+        e = arr[index]
+        for name in dt.names:
+            field_dt = dt[name]
+            offset = readElement(buffer, offset, e, name, field_dt)
+    elif not dt.metadata or "vlen" not in dt.metadata:
+        count = dt.itemsize
+        n = offset
+        m = offset + count
+        e_buffer = buffer[n:m]
+        offset += count
+        try:
+            e = np.frombuffer(bytes(e_buffer), dtype=dt)
+            arr[index] = e[0]
+        except ValueError:
+            print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}")
+            raise
+    else:
+        # variable length element
+        vlenBaseType = dt.metadata["vlen"]
+        e = arr[index]
+
+        if isinstance(e, np.ndarray):
+            nelements = math.prod(dt.shape)
+            e.reshape((nelements,))
+            for i in range(nelements):
+                offset = readElement(buffer, offset, e, i, dt)
+            e.reshape(dt.shape)
+        else:
+            # total number of bytes in the vlen sequence/variable-length string
+            count = getElementCount(buffer, offset=offset)
+            offset += 4
+            n = offset
+            m = offset + count
+            if count > 0:
+                e_buffer = buffer[n:m]
+                offset += count
+
+                if vlenBaseType is bytes:
+                    arr[index] = bytes(e_buffer)
+                elif vlenBaseType is str:
+                    s = e_buffer.decode("utf-8")
+                    arr[index] = s
+                else:
+                    try:
+                        e = np.frombuffer(bytes(e_buffer), dtype=vlenBaseType)
+                    except ValueError:
+                        msg = f"Failed to parse vlen data: {e_buffer} with dtype: {vlenBaseType}"
+                        raise ValueError(msg)
+                    arr[index] = e
+    return offset
+
+
+def encodeData(data, encoding="base64"):
+    """ Encode given data """
+    if encoding != "base64":
+        raise ValueError("only base64 encoding is supported")
+    try:
+        if isinstance(data, str):
+            data = data.encode("utf8")
+    except UnicodeEncodeError:
+        raise ValueError("can not encode string value")
+    if not isinstance(data, bytes):
+        msg = "Expected str or bytes type to encodeData, "
+        msg += f"but got: {type(data)}"
+        raise TypeError(msg)
+    try:
+        encoded_data = base64.b64encode(data)
+    except Exception as e:
+        # TBD: what exceptions can be raised?
+        raise ValueError(f"Unable to encode: {e}")
+    return encoded_data
+
+
+def decodeData(data, encoding="base64"):
+    if encoding != "base64":
+        raise ValueError("only base64 decoding is supported")
+    try:
+        decoded_data = base64.b64decode(data)
+    except Exception as e:
+        # TBD: catch actual exception
+        raise ValueError(f"Unable to decode: {e}")
+    return decoded_data
+
+
+def arrayToBytes(arr, encoding=None):
+    """
+    Return byte representation of numpy array
+    """
+    if isVlen(arr.dtype):
+        nSize = getByteArraySize(arr)
+        buffer = bytearray(nSize)
+        offset = 0
+        nElements = math.prod(arr.shape)
+        arr1d = arr.reshape((nElements,))
+        for e in arr1d:
+            # print("arrayToBytes:", e)
+            offset = copyElement(e, arr1d.dtype, buffer, offset)
+        data = bytes(buffer)
+    else:
+        # fixed length type
+        data = arr.tobytes()
+
+    if encoding:
+        data = encodeData(data)
+    return data
+
+def bytesToArray(data, dt, shape, encoding=None):
+    """
+    Create numpy array based on byte representation
+    """
+    if encoding:
+        # decode the data
+        # will raise ValueError if non-decodeable
+        data = decodeData(data)
+    if not isVlen(dt):
+        # regular numpy from string
+        arr = np.frombuffer(data, dtype=dt)
+    else:
+        nelements = getNumElements(shape)
+
+        arr = np.zeros((nelements,), dtype=dt)
+        offset = 0
+        for index in range(nelements):
+            offset = readElement(data, offset, arr, index, dt)
+    if shape is not None:
+        arr = arr.reshape(shape)
+    # check that we can update the array if needed
+    # Note: this seems to have been required starting with numpuy v 1.17
+    # Setting the flag directly is not recommended.
+    # cf: https://github.com/numpy/numpy/issues/9440
+
+    if not arr.flags["WRITEABLE"]:
+        arr_copy = arr.copy()
+        arr = arr_copy
+
+    return arr
+
+  
+def getNumpyValue(value, dt=None, encoding=None):
+    """
+    Return value as numpy type for given dtype and encoding
+    Encoding is expected to be one of None or "base64"
+    """
+    # create a scalar numpy array
+    arr = np.zeros((), dtype=dt)
+
+    if encoding and not isinstance(value, str):
+        msg = "Expected value to be string to use encoding"
+        raise ValueError(msg)
+
+    if encoding == "base64":
+        try:
+            data = base64.decodebytes(value.encode("utf-8"))
+        except binascii.Error:
+            msg = "Unable to decode base64 string: {value}"
+            # log.warn(msg)
+            raise ValueError(msg)
+        arr = bytesToArray(data, dt, dt.shape)
+    else:
+        if isinstance(value, list):
+            # convert to tuple
+            value = tuple(value)
+        elif dt.kind == "f" and isinstance(value, str) and value == "nan":
+            value = np.nan
+        else:
+            # use as is
+            pass
+        arr = np.asarray(value, dtype=dt.base)
+    return arr[()]
+
+
+def squeezeArray(data):
+    """
+    Reduce dimensions by removing any 1-extent dimensions.
+    Just return input if no 1-extent dimensions
+
+    Note: only works with ndarrays (for now at least)
+    """
+    if not isinstance(data, np.ndarray):
+        raise TypeError("expected ndarray")
+    if len(data.shape) <= 1:
+        return data
+    can_reduce = True
+    for extent in data.shape:
+        if extent == 1:
+            can_reduce = True
+        break
+    if can_reduce:
+        data = data.squeeze()
+    return data
+
+
+class IndexIterator(object):
+    """
+    Class to iterate through list of chunks of a given dataset
+    """
+
+    def __init__(self, shape, sel=None):
+        self._shape = shape
+        self._rank = len(self._shape)
+        self._stop = False
+
+        if self._rank < 1:
+            raise ValueError("IndexIterator can not be used on arrays of zero rank")
+
+        if sel is None:
+            # select over entire dataset
+            slices = []
+            for dim in range(self._rank):
+                slices.append(slice(0, self._shape[dim]))
+            self._sel = tuple(slices)
+        else:
+            if isinstance(sel, slice):
+                self._sel = (sel,)
+            else:
+                self._sel = sel
+        if len(self._sel) != self._rank:
+            raise ValueError("Invalid selection - selection region must have same rank as shape")
+        self._index = []
+        for dim in range(self._rank):
+            s = self._sel[dim]
+            if s.start < 0 or s.stop > self._shape[dim] or s.stop <= s.start:
+                raise ValueError(
+                    "Invalid selection - selection region must be within dataset space"
+                )
+            self._index.append(s.start)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._stop:
+            raise StopIteration()
+        # bump up the last index and carry forward if we run outside the selection
+        dim = self._rank - 1
+        ret_index = self._index.copy()
+        while True:
+            s = self._sel[dim]
+            if s.step:
+                step = s.step
+            else:
+                step = 1
+            self._index[dim] += step
+
+            if self._index[dim] < s.stop:
+                # we still have room to extend along this dimensions
+                break
+
+            # reset to the start and continue iterating with higher dimension
+            self._index[dim] = s.start
+            dim -= 1
+            if dim < 0:
+                # ran past last index, stop iteration on next run
+                self._stop = True
+
+        return tuple(ret_index)
+
+
+def ndarray_compare(arr1, arr2):
+    # compare two numpy arrays.
+    # return true if the same (exclusive of null vs. empty array)
+    # false otherwise
+    # TBD: this is slow for multi-megabyte vlen arrays, needs to be optimized
+    if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            return arr1 == arr2
+        if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            if arr1.size == 0 and not arr2:
+                return True
+            else:
+                return False
+        if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
+            if not arr1 and arr2.size == 0:
+                return True
+            else:
+                return False
+        # both np.voids
+        if arr1.size != arr2.size:
+            return False
+
+        if len(arr1) != len(arr2):
+            return False
+
+        for i in range(len(arr1)):
+            if not ndarray_compare(arr1[i], arr2[i]):
+                return False
+        return True
+
+    if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 is 0
+        if arr1.size == 0 and not arr2:
+            return True
+        else:
+            return False
+    if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 size is 0
+        if not arr1 and arr2.size == 0:
+            return True
+        else:
+            return False
+
+    # two ndarrays...
+    if arr1.shape != arr2.shape:
+        return False
+    if arr2.dtype != arr2.dtype:
+        return False
+
+    if isVlen(arr1.dtype):
+        # need to compare element by element
+
+        nElements = np.prod(arr1.shape)
+        arr1 = arr1.reshape((nElements,))
+        arr2 = arr2.reshape((nElements,))
+        for i in range(nElements):
+            if not ndarray_compare(arr1[i], arr2[i]):
+                return False
+        return True
+    else:
+        # can just us np array_compare
+        return np.array_equal(arr1, arr2)
+
+
+def getBroadcastShape(mshape, element_count):
+    # if element_count is less than the number of elements
+    # defined by mshape, return a numpy compatible broadcast
+    # shape that contains element_count elements.
+    # If non exists return None
+
+    if np.prod(mshape) == element_count:
+        return None
+
+    if element_count == 1:
+        # this always works
+        return [1,]
+
+    bcshape = []
+    rank = len(mshape)
+    for n in range(rank - 1):
+        bcshape.insert(0, mshape[rank - n - 1])
+        if element_count == np.prod(bcshape):
+            return bcshape  # have a match
+
+    return None  # no broadcast found
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
new file mode 100644
index 00000000..75854212
--- /dev/null
+++ b/src/h5json/dset_util.py
@@ -0,0 +1,114 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import time
+from .hdf5dtype import getTypeItem
+
+"""
+# standard compress filters
+_HDF_FILTERS = {
+    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
+    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
+    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
+    4: {
+        "class": "H5Z_FILTER_SZIP",
+        "alias": "szip",
+        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
+    },
+    5: {"class": "H5Z_FILTER_NBIT"},
+    6: {
+        "class": "H5Z_FILTER_SCALEOFFSET",
+        "alias": "scaleoffset",
+        "options": ["scaleType", "scaleOffset"],
+    },
+    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
+}
+
+_HDF_FILTER_OPTION_ENUMS = {
+    "coding": {
+        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
+        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
+    },
+    "scaleType": {
+        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
+        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
+        h5py.h5z.SO_INT: "H5Z_SO_INT",
+    },
+}
+
+# h5py supported filters
+_H5PY_FILTERS = {
+    "gzip": 1,
+    "shuffle": 2,
+    "fletcher32": 3,
+    "szip": 4,
+    "scaleoffset": 6,
+    "lzf": 32000,
+}
+
+_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
+"""
+
+def make_new_dset(
+        shape=None,
+        dtype=None,
+        chunks=None,
+        compression=None,
+        shuffle=None,
+        maxshape=None,
+        compression_opts=None,
+        fillvalue=None,
+        cpl=None
+    ):
+
+    type_json = getTypeItem(dtype)
+    if shape == "H5S_NULL":
+        shape_json = {"class": "H5S_NULL"}
+    else:
+        shape_json = {"class": "H5S_SIMPLE"}
+        shape_json["dims"] = list(shape)
+
+    if maxshape:
+        shape_json["maxshape"] = maxshape
+    if cpl is None:
+        cpl = {}
+    if chunks:
+        cpl["chunks"] = chunks
+    if compression:
+        cpl["compression"] = compression
+    if shuffle:
+        cpl["shuffle"] = shuffle
+    if compression_opts: 
+        cpl["compression_opts"] = compression_opts
+    if fillvalue:
+        cpl["fillvalue"] = fillvalue
+    
+
+    # TBD - other properties
+    dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl}
+    dset_json["created"] = time.time()
+    dset_json["modified"] = None
+
+    return dset_json
+
+def resize_dataset(dset_json, shape):
+    shape_json = dset_json["shape"]
+    shape_class = shape_json["class"]
+    if shape_class != "H5S_SIMPLE":
+        raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
+    if len(shape_class["dims"]) != len(shape):
+        raise ValueError("Resize shape parameter doesn't match dataset's rank")
+    # TBD: validate shape
+    shape_json["dims"] = list(shape)
+    dset_json["modified"] = time.time()
+        
+         
\ No newline at end of file
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index db48eda3..e7ea8d9c 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -9,3563 +9,501 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import errno
 import time
-import h5py
 import numpy as np
-import os.path as op
-import os
-import json
 import logging
-from .hdf5dtype import getTypeItem, createDataType, getItemSize, Reference, RegionReference
-from .objid import createObjId
+from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
+from .array_util import jsonToArray, bytesArrayToList
+from .dset_util import make_new_dset, resize_dataset
+from .objid import createObjId, getCollectionForId
 from .apiversion import _apiver
 
 
-# global dictionary to direct back to the Hdf5db instance by filename
-# (needed for visititems callback)
-# Will break in multi-threaded context
-_db = {}
-
-UUID_LEN = 36  # length for uuid strings
-
-# standard compress filters
-_HDF_FILTERS = {
-    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
-    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
-    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
-    4: {
-        "class": "H5Z_FILTER_SZIP",
-        "alias": "szip",
-        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
-    },
-    5: {"class": "H5Z_FILTER_NBIT"},
-    6: {
-        "class": "H5Z_FILTER_SCALEOFFSET",
-        "alias": "scaleoffset",
-        "options": ["scaleType", "scaleOffset"],
-    },
-    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
-}
-
-_HDF_FILTER_OPTION_ENUMS = {
-    "coding": {
-        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
-        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
-    },
-    "scaleType": {
-        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
-        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
-        h5py.h5z.SO_INT: "H5Z_SO_INT",
-    },
-}
-
-# h5py supported filters
-_H5PY_FILTERS = {
-    "gzip": 1,
-    "shuffle": 2,
-    "fletcher32": 3,
-    "szip": 4,
-    "scaleoffset": 6,
-    "lzf": 32000,
-}
-
-_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-
-
-def convert_dtype(srcdt):
-    """Return a dtype based on input dtype, converting any Reference types from
-    h5json style to h5py.
-    """
-
-    if len(srcdt) > 0:
-        fields = []
-        for name in srcdt.fields:
-            item = srcdt.fields[name]
-            # item is a tuple of dtype and integer offset
-            field_dt = convert_dtype(item[0])
-            fields.append((name, field_dt))
-        tgt_dt = np.dtype(fields)
-    else:
-        # check if this a "special dtype"
-        if srcdt.metadata and "ref" in srcdt.metadata:
-            if srcdt.metadata['ref'] is Reference:
-                tgt_dt = h5py.special_dtype(ref=h5py.Reference)
-            elif srcdt.metadata['ref'] is RegionReference:
-                tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
-            else:
-                raise TypeError(f"Unexpected ref type: {srcdt}")
-        elif srcdt.metadata and "vlen" in srcdt.metadata:
-            src_vlen = srcdt.metadata["vlen"]
-            if isinstance(src_vlen, np.dtype):
-                tgt_base = convert_dtype(src_vlen)
-            else:
-                tgt_base = src_vlen
-            tgt_dt = h5py.special_dtype(vlen=tgt_base)
-        elif srcdt.kind == "U":
-            # use vlen for unicode strings
-            tgt_dt = h5py.special_dtype(vlen=str)
-        else:
-            tgt_dt = srcdt  # no conversion needed
-    return tgt_dt
-
-
-def visitObj(path, obj):
-    hdf5db = _db[obj.file.filename]
-    hdf5db.visit(path, obj)
-
-
 class Hdf5db:
     """
     This class is used to manage UUID lookup tables for primary HDF objects (Groups, Datasets,
-    and Datatypes).  For HDF5 files that are read/write, this information is managed within
-    the file itself in the "__db__" group.  For read-only files, the data is managed in
-    an external file (domain filename with ".db" extension).
-
-    "___db__"  ("root" for read-only case)
-        description: Group object (member of root group). Only objects below this group are used
-                for UUID data
-        members: "{groups}", "{datasets}", "{datatypes}", "{objects}", "{paths}"
-        attrs: 'rootUUID': UUID of the root group
-
-    "{groups}"
-        description: contains map of UUID->group objects
-        members: hard link to each anonymous group (i.e. groups which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: group reference (or path for read-only files) to the group (for non-
-            anonymous groups).
-
-    "{datasets}"
-        description: contains map of UUID->dataset objects
-        members: hard link to each anonymous dataset (i.e. datasets which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: dataset reference (or path for read-only files) to the dataset (for non-
-            anonymous datasets).
-
-    "{dataset_props}:
-        description contains dataset creation properties"
-        members: sub-group with link name as UUID.  Sub-group attributes are the creation props
-
-    "{datatypes}"
-        description: contains map of UUID->datatyped objects
-        members: hard link to each anonymous datatype (i.e. datatypes which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: datatype reference (or path for read-only files) to the datatype (for non-
-            anonymous datatypes).
-
-    "{addr}"
-        description: contains map of file offset to UUID.
-        members: none
-        attrs: map of file offset to UUID
+    and Datatypes).  By default all data is held in-memory.  Initialize with h5_reader to read from
+    an HDF5 compatible storage pool, and or, h5_writer to write to an HDF5 compatible storage pool.
     """
-
-    @staticmethod
-    def createHDF5File(filePath):
-        # create an "empty" hdf5 file
-        # if op.isfile(filePath):
-        #     raise IOError(errno.EEXIST, "Resource already exists")
-
-        f = h5py.File(filePath, "w")
-        f.close()
-
+     
     @staticmethod
     def getVersionInfo():
         versionInfo = {}
         versionInfo["hdf5-json-version"] = _apiver
-        versionInfo["h5py_version"] = h5py.version.version
-        versionInfo["hdf5_version"] = h5py.version.hdf5_version
         return versionInfo
 
     def __init__(
         self,
-        filePath,
-        dbFilePath=None,
-        readonly=False,
-        app_logger=None,
-        root_uuid=None,
-        update_timestamps=True,
-        userid=None,
+        h5_reader = None,
+        h5_writer = None,
+        app_logger = None,
     ):
         if app_logger:
             self.log = app_logger
         else:
             self.log = logging.getLogger()
-        if len(filePath) == 0 or not op.isfile(filePath):
-            raise IOError(errno.ENXIO, "file not found")
-        if not h5py.is_hdf5(filePath):
-            raise IOError(errno.EINVAL, "not an HDF5 file")
-
-        mode = "r"
-        if readonly:
-            self.readonly = True
-        else:
-            if not os.stat(filePath).st_mode & 0o200:
-                # file is read-only
-                self.readonly = True
-            else:
-                mode = "r+"
-                self.readonly = False
-
-        self.log.info("init -- filePath: " + filePath + " mode: " + mode)
-
-        self.update_timestamps = update_timestamps
-
-        self.f = h5py.File(filePath, mode, libver="latest")
-
-        self.root_uuid = root_uuid
-
-        if self.readonly:
-            # for read-only files, add a dot in front of the name to be used as
-            # the db file.  This won't collide with actual data files, since
-            # "." is not allowed as the first character in a domain name.
-            if not dbFilePath:
-                dirname = op.dirname(self.f.filename)
-                basename = op.basename(self.f.filename)
-                if len(dirname) > 0:
-                    dbFilePath = dirname + "/." + basename
-                else:
-                    dbFilePath = "." + basename
-            dbMode = "r+"
-            if not op.isfile(dbFilePath):
-                dbMode = "w"
-            self.log.info("dbFilePath: " + dbFilePath + " mode: " + dbMode)
-            self.dbf = h5py.File(dbFilePath, dbMode)
-        else:
-            self.dbf = None  # for read only
-        # create a global reference to this class
-        # so visitObj can call back
-        _db[filePath] = self
 
+        self._db = {}
+
+        self._reader = h5_reader
+        self._writer = h5_writer
+    
+        if self._reader:
+            root_id = self._reader.get_objid("/")
+            kwargs = {"include_attrs": True, "include_links": True}
+            group_json = self._reader.get_obj(root_id, **kwargs)
+        else:
+            # create a root group
+            group_json = {"links": {}, "attributes": {}, "cpl": {}}
+            group_json["created"] = time.time()
+            root_id = createObjId(obj_type="groups")
+            self._db[root_id] = group_json
+        
+        self._root_id = root_id
+           
     def __enter__(self):
+        """ called on package init """
         self.log.info("Hdf5db __enter")
         return self
 
     def __exit__(self, type, value, traceback):
+        """ called on package exit """
         self.log.info("Hdf5db __exit")
-        filename = self.f.filename
-        self.f.flush()
-        self.f.close()
-        if self.dbf:
-            self.dbf.flush()
-            self.dbf.close()
-        del _db[filename]
-
-    def getTimeStampName(self, uuid, objType="object", name=None):
-        ts_name = uuid
-        if objType != "object":
-            if len(name) == 0:
-                self.log.error("empty name passed to setCreateTime")
-                raise Exception("bad setCreateTimeParameter")
-            if objType == "attribute":
-                ts_name += "_attr:["
-                ts_name += name
-                ts_name += "]"
-            elif objType == "link":
-                ts_name += "_link:["
-                ts_name += name
-                ts_name += "]"
-            else:
-                msg = "Bad objType passed to setCreateTime"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-        return ts_name
-
-    """
-      setCreateTime - sets the create time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        timestamp - time (otherwise current time will be used)
-
-       returns - nothing
-
-       Note - should only be called once per object
-    """
-
-    def setCreateTime(self, uuid, objType="object", name=None, timestamp=None):
-        if not self.update_timestamps:
-            return
-        ctime_grp = self.dbGrp["{ctime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        if timestamp is None:
-            timestamp = time.time()
-        if ts_name in ctime_grp.attrs:
-            self.log.warning("modifying create time for object: " + ts_name)
-        ctime_grp.attrs.create(ts_name, timestamp, dtype="int64")
-
-    """
-      getCreateTime - gets the create time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        useRoot - if true, use the time value for root object as default
-
-       returns - create time for object, or create time for root if not set
-    """
-
-    def getCreateTime(self, uuid, objType="object", name=None, useRoot=True):
-        ctime_grp = self.dbGrp["{ctime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        timestamp = None
-        if ts_name in ctime_grp.attrs:
-            timestamp = ctime_grp.attrs[ts_name]
-        elif useRoot:
-            # return root timestamp
-            root_uuid = self.dbGrp.attrs["rootUUID"]
-            if root_uuid in ctime_grp.attrs:
-                timestamp = ctime_grp.attrs[root_uuid]
-        return timestamp
-
-    """
-      setModifiedTime - sets the modified time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        timestamp - time (otherwise current time will be used)
-
-       returns - nothing
-
-    """
-
-    def setModifiedTime(self, uuid, objType="object", name=None, timestamp=None):
-        if not self.update_timestamps:
-            return
-        mtime_grp = self.dbGrp["{mtime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        if timestamp is None:
-            timestamp = time.time()
-        mtime_grp.attrs.create(ts_name, timestamp, dtype="int64")
-
-    """
-      getModifiedTime - gets the modified time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        useRoot - if true, use the time value for root object as default
-
-       returns - create time for object, or create time for root if not set
-    """
-
-    def getModifiedTime(self, uuid, objType="object", name=None, useRoot=True):
-        mtime_grp = self.dbGrp["{mtime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        timestamp = None
-        if ts_name in mtime_grp.attrs:
-            timestamp = mtime_grp.attrs[ts_name]
-        else:
-            # return create time if no modified time has been set
-            ctime_grp = self.dbGrp["{ctime}"]
-            if ts_name in ctime_grp.attrs:
-                timestamp = ctime_grp.attrs[ts_name]
-            elif useRoot:
-                # return root timestamp
-                root_uuid = self.dbGrp.attrs["rootUUID"]
-                timestamp = mtime_grp.attrs[root_uuid]
-        return timestamp
-
-    """
-      getAclGroup - return the db group "{acl}" if present,
-        otherwise return None
-    """
-
-    def getAclGroup(self, create=False):
-        if not self.dbGrp:
-            return None  # file not initialized
-        if "{acl}" in self.dbGrp:
-            return self.dbGrp["{acl}"]
-        if not create:
-            return None
-        self.dbGrp.create_group("{acl}")
-        return self.dbGrp["{acl}"]
-
-    """
-      getAclDtype - return detype for ACL
-    """
-
-    def getAclDtype(self):
-        fields = []
-        fields.append(("userid", np.int32))
-        fields.append(("create", np.int8))
-        fields.append(("read", np.int8))
-        fields.append(("update", np.int8))
-        fields.append(("delete", np.int8))
-        fields.append(("readACL", np.int8))
-        fields.append(("updateACL", np.int8))
-        dt = np.dtype(fields)
-        return dt
-
-    """
-      getAclDataset - return ACL datset for given uuid
-    """
-
-    def getAclDataset(self, obj_uuid, create=False):
-        acl_group = self.getAclGroup(create=create)
-
-        if acl_group is None:
-            return None
-
-        if obj_uuid in acl_group:
-            return acl_group[obj_uuid]
-
-        if not create:
-            return None
-
-        # create dataset
-        dt = self.getAclDtype()
-        acl_group.create_dataset(obj_uuid, (0,), dtype=dt, maxshape=(None,))
-        return acl_group[obj_uuid]
-
-    """
-      getNumAcls - return number of acls associatted with given uuid
-    """
-
-    def getNumAcls(self, obj_uuid):
-        acl_group = self.getAclGroup()
-        if acl_group is None:
-            return 0
-        if obj_uuid not in acl_group:
-            return 0
-        acls = acl_group[obj_uuid]
-        return acls.shape[0]
-
-    """
-      convertAclNdArrayToDict - helper function - return acl item to dict
-    """
-
-    def convertAclNdArrayToDict(self, acl_ndarray):
-        fields = acl_ndarray.dtype.fields.keys()
-        acl = {}
-        for field in fields:
-            value = int(acl_ndarray[field])
-            acl[field] = value
-        return acl
-
-    def getDefaultAcl(self):
-        """Get default acl - returns dict obj"""
-
-        dt = self.getAclDtype()
-        acl = {}
-        for field in dt.fields.keys():
-            if field == "userid":
-                acl[field] = 0
+        if self._writer:
+            self._writer.flush()
+            self._writer.close()
+         
+
+    def getObjectById(self, obj_id):
+        """ return objecct with given id """
+        if obj_id not in self._db:
+            if self._reader:
+                # load the obj from the reader
+                kwargs = {"include_attrs": True, "include_links": True}
+                obj_json = self._reader.get_obj(obj_id, **kwargs)
+                self._db[obj_id] = obj_json
             else:
-                acl[field] = 1  # default is allowed
-        return acl
-
-    def getAcl(self, obj_uuid, userid):
-        """
-        getAcl - return ACL for given uuid and userid
-            returns ACL associated with the given uuid, or if none exists,
-            the ACL associatted with the root group.
-
-            If an ACL is not present for a userid/obj and ACL will be returned
-            via the following precedence:
-
-            1) obj_uuid, user_id
-            2) root_uuid, user_id
-            3) obj_uuid, 0
-            4) root_uuid, 0
-            5) 'all perm' ACL
-        """
-        acl_grp = self.getAclGroup()
-
-        if acl_grp is not None:
-            acl = self.getAclByObjAndUser(obj_uuid, userid)
-            if acl is not None:
-                return acl
-
-            if obj_uuid != self.root_uuid and userid != 0:
-                # get the root acl for this user
-                acl = self.getAclByObjAndUser(self.root_uuid, userid)
-                if acl is not None:
-                    return acl
-
-            if userid != 0:
-                # get acl for default user
-                acl = self.getAclByObjAndUser(obj_uuid, 0)
-                if acl is not None:
-                    return acl
-
-            if obj_uuid != self.root_uuid:
-                # get root acl for default user
-                acl = self.getAclByObjAndUser(self.root_uuid, 0)
-                if acl is not None:
-                    return acl
-
-        # create an ACL with default permissions
-        acl = self.getDefaultAcl()
-
-        return acl
-
-    def getAclByObjAndUser(self, obj_uuid, userid):
-        """
-        get ACL for specific uuid and user
-            return None if not found
-        """
-        acl = None
-        acl_dset = self.getAclDataset(obj_uuid)
-
-        if acl_dset:
-            # iterate through elements, looking for user_id
-            acls = acl_dset[...]
-            num_acls = acl_dset.shape[0]
-            acl = None
-            for i in range(num_acls):
-                item = acls[i]
-                if item["userid"] == userid:
-                    acl = item
-                    break
-
-        if acl is not None:
-            acl = self.convertAclNdArrayToDict(acl)
-        return acl
-
-    def getAcls(self, obj_uuid):
-        """
-        getAcls - get all acls for given uuid
-        """
-        acls = []
-        acl_dset = self.getAclDataset(obj_uuid)
-
-        if acl_dset:
-            # iterate through elements, looking for user_id
-            num_acls = acl_dset.shape[0]
-
-            for i in range(num_acls):
-                item = acl_dset[i]
-                acl = self.convertAclNdArrayToDict(item)
-                acls.append(acl)
-
-        return acls
-
-    def setAcl(self, obj_uuid, acl):
-        """
-        setAcl -  set the acl for given uuid.
-        """
-        acl_dset = self.getAclDataset(obj_uuid, create=True)
-
-        if acl_dset is None:
-            msg = "Unexpected error acl not created for uuid:[" + obj_uuid + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        userid = acl["userid"]
-
-        # iterate through elements, looking for user_id
-        acls = acl_dset[...]
-        num_acls = acl_dset.shape[0]
-
-        user_index = None
-
-        for i in range(num_acls):
-            item = acls[i]
-            if item["userid"] == userid:
-                # update this element
-                user_index = i
-                break
-
-        if user_index is None:
-            # userid not found - add row
-            acl_dset.resize(((num_acls + 1),))
-            user_index = num_acls
-
-        # update the acl dataset
-        item = acl_dset[user_index]
-        for field in acl.keys():
-            item[field] = acl[field]
-        acl_dset[user_index] = item  # save back to the file
-
-    def initFile(self):
-        # self.log.info("initFile")
-        if self.readonly:
-            self.dbGrp = self.dbf
-            if "{groups}" in self.dbf:
-                # file already initialized
-                self.root_uuid = self.dbGrp.attrs["rootUUID"]
-                return
-
-        else:
-            if "__db__" in self.f:
-                # file already initialized
-                self.dbGrp = self.f["__db__"]
-                self.root_uuid = self.dbGrp.attrs["rootUUID"]
-                return  # already initialized
-            self.dbGrp = self.f.create_group("__db__")
-
-        self.log.info("initializing file")
-        if not self.root_uuid:
-            self.root_uuid = createObjId()
-        self.dbGrp.attrs["rootUUID"] = self.root_uuid
-        self.dbGrp.create_group("{groups}")
-        self.dbGrp.create_group("{datasets}")
-        self.dbGrp.create_group("{datatypes}")
-        self.dbGrp.create_group("{addr}")  # store object address
-        self.dbGrp.create_group("{ctime}")  # stores create timestamps
-        self.dbGrp.create_group("{mtime}")  # store modified timestamps
-
-        mtime = op.getmtime(self.f.filename)
-        ctime = mtime
-        self.setCreateTime(self.root_uuid, timestamp=ctime)
-        self.setModifiedTime(self.root_uuid, timestamp=mtime)
-
-        self.f.visititems(visitObj)
-
-    def visit(self, path, obj):
-        name = obj.__class__.__name__
-        if len(path) >= 6 and path[:6] == "__db__":
-            return  # don't include the db objects
-        self.log.info("visit: " + path + " name: " + name)
-        col = None
-        if name == "Group":
-            col = self.dbGrp["{groups}"].attrs
-        elif name == "Dataset":
-            col = self.dbGrp["{datasets}"].attrs
-        elif name == "Datatype":
-            col = self.dbGrp["{datatypes}"].attrs
-        else:
-            msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        obj_id = createObjId()  # create uuid
-
-        addrGrp = self.dbGrp["{addr}"]
-        if not self.readonly:
-            # storing db in the file itself, so we can link to the object directly
-            col[obj_id] = obj.ref  # save attribute ref to object
-        else:
-            # store path to object
-            col[obj_id] = obj.name
-        addr = h5py.h5o.get_info(obj.id).addr
-        # store reverse map as an attribute
-        addrGrp.attrs[str(addr)] = obj_id
-
-    #
-    # Get Dataset creation properties
-    #
-    def getDatasetCreationProps(self, dset_uuid):
-        prop_list = {}
-        if "{dataset_props}" not in self.dbGrp:
-            # no, group, so no properties
-            return prop_list  # return empty dict
-        dbPropsGrp = self.dbGrp["{dataset_props}"]
-
-        if dset_uuid not in dbPropsGrp.attrs:
-            return prop_list  # return empty dict
-        prop_str = dbPropsGrp.attrs[dset_uuid]
-        # expand json string
-        try:
-            prop_list = json.loads(prop_str)
-        except ValueError as ve:
-            msg = (
-                "Unable to load creation properties for dataset:["
-                + dset_uuid
-                + "]: "
-                + ve.message
-            )
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        # fill in Filter class values
-        if "filters" in prop_list:
-            prop_filters = prop_list["filters"]
-            for prop_filter in prop_filters:
-                if "class" not in prop_filter:
-                    filter_id = prop_filter["id"]
-                    if filter_id in _HDF_FILTERS:
-                        hdf_filter = _HDF_FILTERS[filter_id]
-                        prop_filter["class"] = hdf_filter["class"]
-                    else:
-                        prop_filter["class"] = "H5Z_FILTER_USER"
-
-        return prop_list
-
-    #
-    # Set dataset creation property
-    #
-    def setDatasetCreationProps(self, dset_uuid, prop_dict):
-        self.log.info("setDataProp([" + dset_uuid + "]")
-        if not prop_dict:
-            # just ignore if empty dictionary
-            return
-        if "{dataset_props}" not in self.dbGrp:
-            self.dbGrp.create_group("{dataset_props}")
-        dbPropsGrp = self.dbGrp["{dataset_props}"]
-        if dset_uuid in dbPropsGrp.attrs:
-            # this should be write once
-            msg = (
-                "Unexpected error setting dataset creation properties for dataset:["
-                + dset_uuid
-                + "]"
-            )
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        prop_str = json.dumps(prop_dict)
-        dbPropsGrp.attrs[dset_uuid] = prop_str
-
-    def getUUIDByAddress(self, addr):
-        if "{addr}" not in self.dbGrp:
-            self.log.error("expected to find {addr} group")
-            return None
-        addrGrp = self.dbGrp["{addr}"]
-        obj_uuid = None
-        if str(addr) in addrGrp.attrs:
-            obj_uuid = addrGrp.attrs[str(addr)]
-        if obj_uuid and type(obj_uuid) is not str:
-            # convert bytes to unicode
-            obj_uuid = obj_uuid.decode("utf-8")
-        return obj_uuid
-
-    def getNumLinksToObjectInGroup(self, grp, obj):
-        """
-        Get the number of links in a group to an object
-        """
-        objAddr = h5py.h5o.get_info(obj.id).addr
-        numLinks = 0
-        for name in grp:
-            try:
-                child = grp[name]
-            except KeyError:
-                # UDLink? Ignore for now
-                self.log.info("ignoring link (UDLink?): " + name)
+                raise KeyError(f"obj_id: {obj_id} not found")
+        obj_json = self._db[obj_id]
+
+        return obj_json
+
+    def getObjectIdByPath(self, h5path, parent_id=None):
+        """ Return id for the given link path starting from parent_id if set,
+        otherwise the root_id """
+
+        if h5path == "/":
+            return self._root_id  # just return root id
+
+        if parent_id is None:
+            parent_id = self._root_id
+        self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}")
+        
+        obj_json = self.getObjectById(parent_id)
+        if obj_json is None:
+            self.log.warning("getObjectIdDByPath - parent_id not found")
+            raise KeyError("parent_id: {parent_id} not found")
+
+        obj_id = parent_id
+        searched_ids = set(obj_id)
+
+        link_names = h5path.split('/')
+        self.log.debug(f"link_names: {link_names}")
+        for link_name in link_names:
+            if not link_name:
                 continue
-
-            addr = h5py.h5o.get_info(child.id).addr
-            if addr == objAddr:
-                numLinks = numLinks + 1
-
-        return numLinks
-
-    def getNumLinksToObject(self, obj):
-        """
-        Get the number of links to the given object
-        """
-        self.initFile()
-        groups = self.dbGrp["{groups}"]
-        numLinks = 0
-        # iterate through each group in the file and unlink tgt if it is linked
-        # by the group
-        for uuidName in groups:
-            # iterate through anonymous groups
-            grp = groups[uuidName]
-            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
-            if nLinks > 0:
-                numLinks += nLinks
-        for uuidName in groups.attrs:
-            # now non anonymous groups
-            grpRef = groups.attrs[uuidName]
-            grp = self.f[grpRef]  # dereference
-            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
-            if nLinks > 0:
-                numLinks += nLinks
-        # finally, check the root group
-        root = self.getObjByPath("/")
-        nLinks = self.getNumLinksToObjectInGroup(root, obj)
-        numLinks += nLinks
-
-        return numLinks
-
-    def getUUIDByPath(self, path):
-        self.initFile()
-        self.log.info("getUUIDByPath: [" + path + "]")
-        if len(path) >= 6 and path[:6] == "__db__":
-            msg = "getUUIDByPath called with invalid path: [" + path + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if path == "/":
-            # just return the root UUID
-            root_uuid = self.dbGrp.attrs["rootUUID"]
-            if root_uuid and type(root_uuid) is not str:
-                # convert bytes to unicode
-                root_uuid = root_uuid.decode("utf-8")
-            return root_uuid
-
-        obj = self.f[path]  # will throw KeyError if object doesn't exist
-        addr = h5py.h5o.get_info(obj.id).addr
-        obj_uuid = self.getUUIDByAddress(addr)
-        return obj_uuid
-
-    def getObjByPath(self, path):
-        if len(path) >= 6 and path[:6] == "__db__":
-            return None  # don't include the db objects
-        obj = self.f[path]  # will throw KeyError if object doesn't exist
-        return obj
-
-    def getObjectByUuid(self, col_type, obj_uuid):
-        # col_type should be either "datasets", "groups", or "datatypes"
-        if col_type not in ("datasets", "groups", "datatypes"):
-            msg = "Unexpected error, invalid col_type: [" + col_type + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]:
-            return self.f["/"]  # returns root group
-
-        obj = None  # Group, Dataset, or Datatype
-        col_name = "{" + col_type + "}"
-        # get the collection group for this collection type
-        col = self.dbGrp[col_name]
-        if obj_uuid in col.attrs:
-            ref = col.attrs[obj_uuid]
-            obj = self.f[ref]  # this works for read-only as well
-        elif obj_uuid in col:
-            # anonymous object
-            obj = col[obj_uuid]
-
-        return obj
-
-    def getDatasetObjByUuid(self, obj_uuid):
-        self.initFile()
-        self.log.info("getDatasetObjByUuid(" + obj_uuid + ")")
-
-        obj = self.getObjectByUuid("datasets", obj_uuid)
-
-        return obj
-
-    def getGroupObjByUuid(self, obj_uuid):
-        self.initFile()
-        self.log.info("getGroupObjByUuid(" + obj_uuid + ")")
-
-        obj = self.getObjectByUuid("groups", obj_uuid)
-
-        return obj
-
-    def getDatasetTypeItemByUuid(self, obj_uuid):
-        dset = self.getDatasetObjByUuid(obj_uuid)  # throws exception if not found
-        item = {"id": obj_uuid}
-        item["type"] = getTypeItem(dset.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        return item
-
-    def getNullReference(self):
-        """
-        getNullReference - return a null object reference
-        """
-        tmpGrp = None
-        if "{tmp}" not in self.dbGrp:
-            tmpGrp = self.dbGrp.create_group("{tmp}")
-        else:
-            tmpGrp = self.dbGrp["{tmp}"]
-        if "nullref" not in tmpGrp:
-            dt = h5py.special_dtype(ref=h5py.Reference)
-            tmpGrp.create_dataset("nullref", (1,), dtype=dt)
-        nullref_dset = tmpGrp["nullref"]
-        return nullref_dset[0]
-
-    def getNullRegionReference(self):
-        """
-        getNullRegionReference - return a null region reference
-        """
-        tmpGrp = None
-        if "{tmp}" not in self.dbGrp:
-            tmpGrp = self.dbGrp.create_group("{tmp}")
-        else:
-            tmpGrp = self.dbGrp["{tmp}"]
-            if "nullregref" not in tmpGrp:
-                dt = h5py.special_dtype(ref=h5py.RegionReference)
-                tmpGrp.create_dataset("nullregref", (1,), dtype=dt)
-                nullregref_dset = tmpGrp["nullregref"]
-                return nullregref_dset[0]
-
-    def getShapeItemByDsetObj(self, obj):
-        item = {}
-        if obj.shape is None:
-            # new with h5py 2.6, null space datasets will return None for shape
-            item["class"] = "H5S_NULL"
-        elif len(obj.shape) == 0:
-            # check to see if this is a null space vs a scalar dataset we'll do
-            # this by seeing if an exception is raised when reading the dataset
-            # h5py issue https://github.com/h5py/h5py/issues/279 will provide a
-            # better way to determine null spaces
-            # Update 3/10/17: Above issue is closed, but waiting on 2.7 final release
-            try:
-                val = obj[...]
-                if val is None:
-                    self.log.warning("no value returned for scalar dataset")
-                item["class"] = "H5S_SCALAR"
-            except IOError:
-                item["class"] = "H5S_NULL"
-        else:
-            item["class"] = "H5S_SIMPLE"
-            item["dims"] = obj.shape
-            maxshape = []
-            include_maxdims = False
-            for i in range(len(obj.shape)):
-                extent = 0
-                if len(obj.maxshape) > i:
-                    extent = obj.maxshape[i]
-                    if extent is None:
-                        extent = 0
-                    if extent > obj.shape[i] or extent == 0:
-                        include_maxdims = True
-                maxshape.append(extent)
-            if include_maxdims:
-                item["maxdims"] = maxshape
-        return item
-
-    def getShapeItemByAttrObj(self, obj):
-        item = {}
-        if obj.shape is None or obj.get_storage_size() == 0:
-            # If storage size is 0, assume this is a null space obj
-            # See: h5py issue https://github.com/h5py/h5py/issues/279
-            item["class"] = "H5S_NULL"
-        else:
-            if obj.shape:
-                item["class"] = "H5S_SIMPLE"
-                item["dims"] = obj.shape
-            else:
-                item["class"] = "H5S_SCALAR"
-        return item
-
-    #
-    # Get dataset creation properties maintained by HDF5 library
-    #
-    def getHDF5DatasetCreationProperties(self, obj_uuid, type_class):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        #
-        # Fill in creation properties
-        #
-        creationProps = {}
-        plist = h5py.h5d.DatasetID.get_create_plist(dset.id)
-
-        # alloc time
-        nAllocTime = plist.get_alloc_time()
-        if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_DEFAULT"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_LATE"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_EARLY"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_INCR"
-        else:
-            self.log.warning("Unknown alloc time value: " + str(nAllocTime))
-
-        # fill time
-        nFillTime = plist.get_fill_time()
-        if nFillTime == h5py.h5d.FILL_TIME_ALLOC:
-            creationProps["fillTime"] = "H5D_FILL_TIME_ALLOC"
-        elif nFillTime == h5py.h5d.FILL_TIME_NEVER:
-            creationProps["fillTime"] = "H5D_FILL_TIME_NEVER"
-        elif nFillTime == h5py.h5d.FILL_TIME_IFSET:
-            creationProps["fillTime"] = "H5D_FILL_TIME_IFSET"
-        else:
-            self.log.warning("unknown fill time value: " + str(nFillTime))
-
-        if type_class not in ("H5T_VLEN", "H5T_OPAQUE"):
-            if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED:
-                creationProps["fillValue"] = self.bytesArrayToList(dset.fillvalue)
-
-        # layout
-        nLayout = plist.get_layout()
-        if nLayout == h5py.h5d.COMPACT:
-            creationProps["layout"] = {"class": "H5D_COMPACT"}
-        elif nLayout == h5py.h5d.CONTIGUOUS:
-            creationProps["layout"] = {"class": "H5D_CONTIGUOUS"}
-        elif nLayout == h5py.h5d.CHUNKED:
-            creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
-        else:
-            self.log.warning("Unknown layout value:" + str(nLayout))
-
-        num_filters = plist.get_nfilters()
-        filter_props = []
-        if num_filters:
-            for n in range(num_filters):
-                filter_info = plist.get_filter(n)
-                opt_values = filter_info[2]
-                filter_prop = {}
-                filter_id = filter_info[0]
-                filter_prop["id"] = filter_id
-                if filter_info[3]:
-                    filter_prop["name"] = self.bytesArrayToList(filter_info[3])
-                if filter_id in _HDF_FILTERS:
-                    hdf_filter = _HDF_FILTERS[filter_id]
-                    filter_prop["class"] = hdf_filter["class"]
-                    if "options" in hdf_filter:
-                        filter_opts = hdf_filter["options"]
-                        for i in range(len(filter_opts)):
-                            if len(opt_values) <= i:
-                                break  # end of option values
-                            opt_value = opt_values[i]
-                            opt_value_enum = None
-                            option_name = filter_opts[i]
-                            if option_name in _HDF_FILTER_OPTION_ENUMS:
-                                option_enums = _HDF_FILTER_OPTION_ENUMS[option_name]
-                                if opt_value in option_enums:
-                                    opt_value_enum = option_enums[opt_value]
-                            if opt_value_enum:
-                                filter_prop[option_name] = opt_value_enum
-                            else:
-                                filter_prop[option_name] = opt_value
-                else:
-                    # custom filter
-                    filter_prop["class"] = "H5Z_FILTER_USER"
-                    if opt_values:
-                        filter_prop["parameters"] = opt_values
-                filter_props.append(filter_prop)
-            creationProps["filters"] = filter_props
-
-        return creationProps
-
-    #
-    # Get dataset information - type, shape, num attributes, creation properties
-    #
-    def getDatasetItemByUuid(self, obj_uuid):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Dataset with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
+            link_tgt = None
+            self.log.debug(f"link_name: {link_name}")
+            if not obj_id:
+                break
+            if 'links' not in obj_json:
+                self.log.error(f"expected to find links key in: {obj_json}")
+                raise KeyError(h5path)
+            links = obj_json['links']
+            self.log.debug(f"links: {links}")
+            if link_name not in links:
+                self.log.warning(f"link: {link_name} not found in {obj_id}")
+                self.log.debug(f"links: {links}")
+                raise KeyError(h5path)
+            link_tgt = links[link_name]
+            self.log.debug(f"link_tgt: {link_tgt}")
+            link_class = link_tgt['class']
+            obj_id = None
+            obj_json = None
+            if link_class == 'H5L_TYPE_HARD':
+                # hard link
+                obj_id = link_tgt['id']
+                if obj_id in searched_ids:
+                    self.log.warning(f"circular reference using path: {h5path}")
+                    raise KeyError(h5path)
+                obj_json = self.getObjectById(obj_id)
+                searched_ids.add(obj_id)
+            elif link_class == 'H5L_TYPE_SOFT':
+                self.log.warning("getObjectIdByPath can't follow soft links")
+            elif link_class == 'H5L_TYPE_EXTERNAL':
+                self.log.warning("getObjectIdByPath can't follow external links")
             else:
-                msg = "Dataset with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        # fill in the item info for the dataset
-        item = {"id": obj_uuid}
-
-        alias = []
-        if dset.name and not dset.name.startswith("/__db__"):
-            alias.append(dset.name)  # just use the default h5py path for now
-        item["alias"] = alias
-
-        item["attributeCount"] = len(dset.attrs)
-
-        # check if the dataset is using a committed type
-        typeid = h5py.h5d.DatasetID.get_type(dset.id)
-        typeItem = None
-        if h5py.h5t.TypeID.committed(typeid):
-            type_uuid = None
-            addr = h5py.h5o.get_info(typeid).addr
-            type_uuid = self.getUUIDByAddress(addr)
-            committedType = self.getCommittedTypeItemByUuid(type_uuid)
-            typeItem = committedType["type"]
-            typeItem["uuid"] = type_uuid
-        else:
-            typeItem = getTypeItem(dset.dtype)
-
-        item["type"] = typeItem
-
-        # get shape
-        item["shape"] = self.getShapeItemByDsetObj(dset)
-
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        creationProps = self.getDatasetCreationProps(obj_uuid)
-        if creationProps:
-            # if chunks is not in the db props, add it from the dataset prop
-            # (so auto-chunk values can be returned)
-            if dset.chunks and "layout" not in creationProps:
-                creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
-        else:
-            # no db-tracked creation properties, pull properties from library
-            creationProps = self.getHDF5DatasetCreationProperties(
-                obj_uuid, typeItem["class"]
-            )
-
-        if creationProps:
-            item["creationProperties"] = creationProps
-
-        return item
-
-    def createTypeFromItem(self, attr_type):
-        """
-        createTypeFromItem - create type given dictionary definition
-        """
-        dt = None
-
-        if isinstance(attr_type, (str, bytes)) and len(attr_type) == UUID_LEN:
-            # assume attr_type is a uuid of a named datatype
-            tgt = self.getCommittedTypeObjByUuid(attr_type)
-            if tgt is None:
-                msg = (
-                    "Unable to create attribute, committed type with uuid of: "
-                    + attr_type
-                    + " not found"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-            dt = tgt  # can use the object as the dt parameter
-        else:
-            try:
-                dt = createDataType(attr_type)
-            except KeyError as ke:
-                msg = "Unable to create type: " + str(ke)
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            except TypeError as te:
-                msg = "Unable to create type: " + str(te)
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dt is None:
-                msg = "Unexpected error creating type"
-                self.log.error(msg)
-                raise IOError(errno, errno.EIO, msg)
-        return dt
-
-    def createCommittedType(self, datatype, obj_uuid=None):
+                self.log.error(f"link type: {link_class} not supported")
+
+            if not obj_id:
+                self.log.warning(f"get_bypath {h5path} not found")
+                raise KeyError(h5path)
+        return obj_id
+    
+    def getObjectByPath(self, path):
+        """ Get Object JSON at given path """
+        obj_id = self.getObjectDByPath(path)
+        obj_json = self.getObjectById(obj_id)
+        return obj_json    
+
+    def getDtype(self, obj_id):
+        """ Return numpy data type for given object id """
+        if obj_id not in self._db:
+            raise KeyError(f"{obj_id} not found")
+        obj_json = self._db[obj_id]
+        if "type" not in obj_json:
+            # group id?
+            raise TypeError(f"{obj_id} does not have a datatype")
+        type_json = obj_json["type"]
+        
+        # TBD: what about datasets using a committed type?
+        dtype = createDataType(type_json)
+        return dtype
+ 
+ 
+    def createCommittedType(self, datatype, cpl=None):
         """
         createCommittedType - creates new named datatype
         Returns item
         """
         self.log.info("createCommittedType")
-        self.initFile()
-        if self.readonly:
-            msg = "Can't create committed type (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        datatypes = self.dbGrp["{datatypes}"]
-        if not obj_uuid:
-            obj_uuid = createObjId()
-        dt = self.createTypeFromItem(datatype)
-
-        datatypes[obj_uuid] = dt
-
-        if obj_uuid not in datatypes:
-            msg = "Unexpected failure to create committed datatype"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        newType = datatypes[obj_uuid]  # this will be a h5py Datatype class
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(newType.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-        # set timestamp
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-        item = {"id": obj_uuid}
-        item["attributeCount"] = len(newType.attrs)
-        # item['type'] = hdf5dtype.getTypeItem(datatype.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-        return item
-
-    def getCommittedTypeObjByUuid(self, obj_uuid):
-        """
-        getCommittedTypeObjByUuid - get obj from {datatypes} collection
-        Returns type obj
-        """
-        self.log.info("getCommittedTypeObjByUuid(" + obj_uuid + ")")
-        self.initFile()
-        datatype = None
-        datatypesGrp = self.dbGrp["{datatypes}"]
-        if obj_uuid in datatypesGrp.attrs:
-            typeRef = datatypesGrp.attrs[obj_uuid]
-            # typeRef could be a reference or (for read-only) a path
-            datatype = self.f[typeRef]
-        elif obj_uuid in datatypesGrp:
-            datatype = datatypesGrp[obj_uuid]  # non-linked type
+        if cpl is None:
+            cpl = {}
+         
+        ctype_id = createObjId(obj_type="datatypes", root_id=self._root_id)
+        if isinstance(datatype, np.dtype):
+            dt = datatype
         else:
-            msg = "Committed datatype: " + obj_uuid + " not found"
-            self.log.info(msg)
-
-        return datatype
-
-    def getCommittedTypeItemByUuid(self, obj_uuid):
-        """
-        getCommittedTypeItemByUuid - get json from {datatypes} collection
-        Returns type obj
-        """
-        self.log.info("getCommittedTypeItemByUuid(" + obj_uuid + ")")
-        self.initFile()
-        datatype = self.getCommittedTypeObjByUuid(obj_uuid)
+            dt = createDataType(datatype)
 
-        if datatype is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Datatype with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Datatype with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        item = {"id": obj_uuid}
-        alias = []
-        if datatype.name and not datatype.name.startswith("/__db__"):
-            alias.append(datatype.name)  # just use the default h5py path for now
-        item["alias"] = alias
-        item["attributeCount"] = len(datatype.attrs)
-        item["type"] = getTypeItem(datatype.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
+        type_json = getTypeItem(dt)  # get canonical json description of datatype
 
-        return item
+        ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
+        ctype_json["created"] = time.time()
+        ctype_json["modified"] = None
+        self._db[ctype_id] = ctype_json
+        return ctype_id
+  
 
-    def getAttributeItemByObj(self, obj, name, includeData=True):
+    def getAttribute(self, obj_id, name, includeData=True):
         """
-        Get attribute given an object and name
+        Get attribute given an object id and name
         returns: JSON object
         """
-        if name not in obj.attrs:
-            msg = "Attribute: [" + name + "] not found in object: " + obj.name
+
+        obj_json = self.getObjectById(obj_id)
+        attrs = obj_json["attributes"]
+        
+        if name not in attrs:
+            msg = f"Attribute: [{name }] not found in object: {obj_id}"
             self.log.info(msg)
             return None
-
-        # get the attribute!
-        attrObj = h5py.h5a.open(obj.id, np.bytes_(name))
-        attr = None
-
-        item = {"name": name}
-
-        # check if the dataset is using a committed type
-        typeid = attrObj.get_type()
-        typeItem = None
-        if h5py.h5t.TypeID.committed(typeid):
-            type_uuid = None
-            addr = h5py.h5o.get_info(typeid).addr
-            type_uuid = self.getUUIDByAddress(addr)
-            committedType = self.getCommittedTypeItemByUuid(type_uuid)
-            typeItem = committedType["type"]
-            typeItem["uuid"] = type_uuid
-        else:
-            typeItem = getTypeItem(attrObj.dtype)
-        item["type"] = typeItem
-        # todo - don't include data for OPAQUE until JSON serialization
-        # issues are addressed
-
-        if isinstance(typeItem, dict) and typeItem["class"] in ("H5T_OPAQUE"):
-            includeData = False
-
-        shape_json = self.getShapeItemByAttrObj(attrObj)
-        item["shape"] = shape_json
+        
+        attr_json = attrs[name]
+
+        if includeData and "value" not in attr_json:
+            # Reader may not have pre-loaded large attributes
+            # fetch it now
+            if not self._reader:
+                raise RuntimeError(f"Expected to find value for attribute {name} of {obj_id}")
+            attr_json = self._reader.get_attribute(obj_id, name)
+            attr_json["value"] = attr_json  # this will update the _db
+        
+        return attr_json
+    
+    def getAttributeValue(self, obj_id, name):
+        """ Return NDArray of the given attribute value """
+        attr_json = self.getAttribute(obj_id, name)
+        shape_json = attr_json["shape"]
         if shape_json["class"] == "H5S_NULL":
-            includeData = False
-        if includeData:
-            try:
-                attr = obj.attrs[name]  # returns a numpy array
-            except TypeError:
-                self.log.warning("type error reading attribute")
-
-        if includeData and attr is not None:
-            if shape_json["class"] == "H5S_SCALAR":
-                data = self.getDataValue(typeItem, attr)
-            else:
-                dims = shape_json["dims"]
-                rank = len(dims)
-                # convert numpy object to python list
-                # values = self.toList(typeItem, attr)
-                data = self.toList(rank, typeItem, attr)
-            # data = self.bytesToString(data)
-            item["value"] = data
-        # timestamps will be added by getAttributeItem()
-        return item
-
-    def getAttributeItems(self, col_type, obj_uuid, marker=None, limit=0):
-        self.log.info("db.getAttributeItems(" + obj_uuid + ")")
-        if marker:
-            self.log.info("...marker: " + marker)
-        if limit:
-            self.log.info("...limit: " + str(limit))
-
-        self.initFile()
-        obj = self.getObjectByUuid(col_type, obj_uuid)
-        if obj is None:
-            msg = "Object: " + obj_uuid + " could not be loaded"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        items = []
-        gotMarker = True
-        if marker is not None:
-            gotMarker = False
-        count = 0
-        for name in obj.attrs:
-            if not gotMarker:
-                if name == marker:
-                    gotMarker = True
-                    continue  # start filling in result on next pass
-                else:
-                    continue  # keep going!
-            item = self.getAttributeItemByObj(obj, name, False)
-            # mix-in timestamps
-            if self.update_timestamps:
-                item["ctime"] = self.getCreateTime(
-                    obj_uuid, objType="attribute", name=name
-                )
-                item["mtime"] = self.getModifiedTime(
-                    obj_uuid, objType="attribute", name=name
-                )
-
-            items.append(item)
-            count += 1
-            if limit > 0 and count == limit:
-                break  # return what we got
-        return items
-
-    def getAttributeItem(self, col_type, obj_uuid, name):
-        self.log.info(
-            "getAttributeItemByUuid(" + col_type + ", " + obj_uuid + ", " + name + ")"
-        )
-        self.initFile()
-        obj = self.getObjectByUuid(col_type, obj_uuid)
-        if obj is None:
-            msg = "Parent object: " + obj_uuid + " of attribute not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
+            # no value for empty shape attributes
             return None
-        item = self.getAttributeItemByObj(obj, name)
-        if item is None:
-            if self.getModifiedTime(
-                obj_uuid, objType="attribute", name=name, useRoot=False
-            ):
-                # attribute has been removed
-                msg = (
-                    "Attribute: ["
-                    + name
-                    + "] of object: "
-                    + obj_uuid
-                    + " has been previously deleted"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            msg = "Attribute: [" + name + "] of object: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        # mix-in timestamps
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid, objType="attribute", name=name)
-            item["mtime"] = self.getModifiedTime(
-                obj_uuid, objType="attribute", name=name
-            )
-
-        return item
-
-    def isDimensionList(self, attr_name, attr_type):
-        """
-        isDimensionList - return True if this attribute json looks like a dimension list
-        """
-        if attr_name != "DIMENSION_LIST":
-            return False
-        if type(attr_type) is not dict:
-            return False
-        if attr_type["class"] != "H5T_VLEN":
-            return False
-        base_type = attr_type["base"]
-        if base_type["class"] != "H5T_REFERENCE":
-            return False
-        return True
+        elif shape_json["class"] == "H5S_SCALAR":
+            dims = ()
+        else:
+            dims = shape_json["dims"]
+        dtype = createDataType(attr_json["type"])
+        value = attr_json["value"]
+        arr = jsonToArray(dims, dtype, value)
+        return arr
+
+
+    def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
+        """
+        create an attribute - will override any existing attributes
+        """
+        
+        # TBD: if dtype is a committed ref type, fetch it first
+        # TBD: also, check special case for complex types
+
+        if isinstance(dtype, str) and dtype.startswith("datatypes/"):
+            ctype_id = dtype[len("datatypes/"):]
+            if getCollectionForId(ctype_id) != "datatypes":
+                raise TypeError(f"unexpected dtype value for createAttribute: {dtype}")
+            if ctype_id not in self._db:
+                raise KeyError(f"ctype: {ctype_id} not found")
+            ctype_json = self.getObjectById(ctype_id)
+            type_json = ctype_json["type"]
+            dtype = createDataType(type_json)
+
+        # First, make sure we have a NumPy array.   
+        if isinstance(value, Reference) and dtype is None:
+            dtype = special_dtype(ref=Reference)
+        if shape == "H5S_NULL":
+            if value:
+                raise ValueError("Value can't be set for Null space attributes")
+            if dtype is None:
+                raise ValueError("Dtype must be set for Null space attributes")
+            else:
+                dtype = np.dtype(dtype)
+        else:
+            value = np.asarray(value, dtype=dtype, order='C')
+            if dtype is None:
+                dtype = value.dtype
+            else:
+                dtype = np.dtype(dtype)  # In case a string, e.g. 'i8' is passed
+ 
+        # Where a top-level array type is requested, we have to do some
+        # fiddling around to present the data as a smaller array of
+        # sub-arrays.
+        if value is not None:
+            if dtype.subdtype is not None:
+                subdtype, subshape = dtype.subdtype
+
+                # Make sure the subshape matches the last N axes' sizes.
+                if shape[-len(subshape):] != subshape:
+                    raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shape}")
+
+                # New "advertised" shape and dtype
+                shape = shape[0:len(shape) - len(subshape)]
+                dtype = subdtype
+
+            # Not an array type; make sure to check the number of elements
+            # is compatible, and reshape if needed.
+            else:
+                if isinstance(shape, tuple):
+                    if np.prod(shape) != np.prod(value.shape):
+                        raise ValueError("Shape of new attribute conflicts with shape of data")
 
-    def isReferenceList(self, attr_name, attr_type):
-        """
-        isReferenceList - return True if this attribute json looks like a reference list
-        """
-        if attr_name != "REFERENCE_LIST":
-            return False
-        if type(attr_type) is not dict:
-            return False
-        if attr_type["class"] != "H5T_COMPOUND":
-            return False
+                    if shape != value.shape:
+                        value = value.reshape(shape)
 
-        return True
+                # We need this to handle special string types.
+                value = np.asarray(value, dtype=dtype)
+            value_json = bytesArrayToList(value)
+        else:
+            value_json = None
 
-    def makeDimensionList(self, obj, shape, value):
-        """
-        makeDimensionList - work-around for h5py problems saving dimension list -
-            types which are vlen's of references are not working directly, so use dim_scale api
-            Note: this is a work-around for h5py issue:
-            https://github.com/h5py/h5py/issues/553
+        if shape is None:
+            shape = value.shape
+        if shape == "H5S_NULL":
+            shape_json = {"class": "H5S_NULL"}
+        elif len(shape) == 0:
+            shape_json = {"class": "H5S_SCALAR"}
+        else:
+            shape_json = {"class": "H5S_SIMPLE"}
+            shape_json["dims"] = list(shape)
+
+        obj_json = self.getObjectById(obj_id)
+        attrs_json = obj_json["attributes"]
+        if name in attrs_json:
+                # replace, update modified timestamp
+            created = attrs_json["created"]
+            modified = time.time()
+        else:
+            created = time.time()
+            modified = None
+        type_json = getTypeItem(dtype)
+        # finally put it all together...
+        attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
+        attr_json["created"] = created
+        attr_json["modified"] = modified
+
+        # slot into the obj_json["attrs"]
+        attrs_json[name] = attr_json
+
+
+    def deleteAttribute(self, obj_id, name):
+        """ delete the given attribute """
+        obj_json = self.getObjectById(obj_id)
+        attrs_json = obj_json["attributes"]
+        if name not in attrs_json:
+            raise KeyError(f"attribute [{name}] not found in {obj_id}")
+        del attrs_json[name]
+
+
+    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
         """
-        dset_refs = self.listToRef(value)
-        for i in range(len(dset_refs)):
-            refs = dset_refs[i]
-            if type(refs) not in (list, tuple):
-                msg = "Invalid dimension list value"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            for j in range(len(refs)):
-                scale_obj = self.f[refs[j]]
-                if scale_obj is None:
-                    self.log.warning(
-                        "dimension list, missing obj reference: " + value[i]
-                    )
-                    continue
-                if "CLASS" not in scale_obj.attrs:
-                    self.log.warning("dimension list, no scale obj")
-                    continue
-                if scale_obj.attrs["CLASS"] != b"DIMENSION_SCALE":
-                    self.log.warning("dimension list, invalid class for scale obj")
-                    continue
+        self.log.info(f"getDatasetValues obj_id: {obj_id}, slices: {slices} format: {format}")
+        #TBD
+      
 
-                try:
-                    h5py.h5ds.attach_scale(obj.id, scale_obj.id, i)
-                except RuntimeError:
-                    self.log.error("got runtime error attaching scale")
-
-    def writeNdArrayToAttribute(self, attrs, attr_name, npdata, shape, dt):
-        """
-        writeNdArrayToAttribute - create an attribute given numpy array
-        """
-        attrs.create(attr_name, npdata, shape=shape, dtype=dt)
-
-    def makeNullTermStringAttribute(self, obj, attr_name, strLength, value):
-        """
-        create a scalar string attribute using nullterm padding
-        """
-        self.log.info(
-            "make nullterm, length: " + str(strLength) + " value:" + str(value)
-        )
-        value = str(value)
-        if strLength < len(value):
-            self.log.warning(
-                "makeNullTermStringAttribute: value string longer than length"
-            )
-            # value = value[:strLength]  # truncate to length
-
-        if isinstance(attr_name, str):
-            try:
-                attr_name = attr_name.encode("ascii")
-            except UnicodeDecodeError:
-                raise TypeError("non-ascii attribute name not allowed")
-
-        # create the attribute
-        tid = h5py.h5t.TypeID.copy(h5py.h5t.C_S1)
-        tid.set_size(strLength)
-        tid.set_strpad(h5py.h5t.STR_NULLTERM)
-        sid = h5py.h5s.create(h5py.h5s.SCALAR)
-        aid = h5py.h5a.create(obj.id, attr_name, tid, sid)
-        # write the value
-        dtype_code = "S" + str(strLength)
-        ndarr = np.array(value, dtype=np.dtype(dtype_code))
-        aid.write(ndarr)
-
-    def makeAttribute(self, obj, attr_name, shape, attr_type, value):
-        """
-        makeAttribute - create an attribute (except for dimension list
-        attribute)
-        """
-        is_committed_type = False
-        if isinstance(attr_type, str) and len(attr_type) == UUID_LEN:
-            # assume attr_type is a uuid of a named datatype
-            is_committed_type = True
-
-        dt = self.createTypeFromItem(attr_type)
-
-        if shape is None:
-            self.log.info("shape is null - will create null space attribute")
-            # create null space attribute
-            # null space datasets/attributes not supported in h5py yet:
-            # See: https://github.com/h5py/h5py/issues/279
-            # work around this by using low-level interface.
-            # first create a temp scalar dataset so we can pull out the typeid
-            tmpGrp = None
-            if "{tmp}" not in self.dbGrp:
-                tmpGrp = self.dbGrp.create_group("{tmp}")
-            else:
-                tmpGrp = self.dbGrp["{tmp}"]
-            tmpGrp.attrs.create(attr_name, 0, shape=(), dtype=dt)
-            b_attr_name = attr_name.encode("utf-8")
-            tmpAttr = h5py.h5a.open(tmpGrp.id, name=b_attr_name)
-            if not tmpAttr:
-                msg = "Unexpected error creating datatype for nullspace attribute"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            tid = tmpAttr.get_type()
-            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
-            # now create the permanent attribute
-            if attr_name in obj.attrs:
-                self.log.info("deleting attribute: " + attr_name)
-                del obj.attrs[attr_name]
-            attr_id = h5py.h5a.create(obj.id, b_attr_name, tid, sid)
-            # delete the temp attribute
-            del tmpGrp.attrs[attr_name]
-            if not attr_id:
-                msg = "Unexpected error creating nullspace attribute"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-        else:
-            if type(value) is tuple:
-                value = list(value)
-            if type(shape) is list:
-                shape = tuple(shape)
-            if not is_committed_type:
-                # apparently committed types can not be used as reference types
-                # todo - verify why that is
-
-                rank = len(shape)
-                # convert python list to numpy object
-                strPad = None
-                strLength = 0
-                if (
-                    isinstance(attr_type, dict)
-                    and attr_type["class"] == "H5T_STRING"
-                    and "strPad" in attr_type
-                ):
-                    strPad = attr_type["strPad"]
-                    strLength = attr_type["length"]
-
-                if (
-                    rank == 0
-                    and isinstance(strLength, int)
-                    and strPad == "H5T_STR_NULLTERM"
-                ):
-                    self.makeNullTermStringAttribute(obj, attr_name, strLength, value)
-                else:
-                    typeItem = getTypeItem(dt)
-                    dt = convert_dtype(dt)
-                    value = self.toRef(rank, typeItem, value)
-
-                    # create numpy array
-                    npdata = np.zeros(shape, dtype=dt)
-
-                    if rank == 0:
-                        npdata[()] = self.toNumPyValue(attr_type, value, npdata[()])
-                    else:
-                        self.toNumPyArray(rank, attr_type, value, npdata)
-
-                    self.writeNdArrayToAttribute(
-                        obj.attrs, attr_name, npdata, shape, dt
-                    )
-
-    """
-    createAttribute - create an attribute
-    """
-
-    def createAttribute(self, col_name, obj_uuid, attr_name, shape, attr_type, value):
-        self.log.info("createAttribute: [" + attr_name + "]")
-
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create attribute (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        obj = self.getObjectByUuid(col_name, obj_uuid)
-        if not obj:
-            msg = "Object with uuid: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if self.isDimensionList(attr_name, attr_type):
-            self.makeDimensionList(obj, shape, value)
-        elif self.isReferenceList(attr_name, attr_type):
-            pass  # Skip since reference list will be created by attach scale
-        else:
-            self.makeAttribute(obj, attr_name, shape, attr_type, value)
-
-        now = time.time()
-        self.setCreateTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now)
-        self.setModifiedTime(
-            obj_uuid, objType="attribute", name=attr_name, timestamp=now
-        )
-        self.setModifiedTime(obj_uuid, timestamp=now)  # owner entity is modified
-
-    def deleteAttribute(self, col_name, obj_uuid, attr_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to delete attribute (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        obj = self.getObjectByUuid(col_name, obj_uuid)
-
-        if attr_name not in obj.attrs:
-            msg = (
-                "Attribute with name: ["
-                + attr_name
-                + "] of object: "
-                + obj_uuid
-                + " not found"
-            )
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        del obj.attrs[attr_name]
-        now = time.time()
-        self.setModifiedTime(
-            obj_uuid, objType="attribute", name=attr_name, timestamp=now
-        )
-
-        return True
-
-    """
-      Return a json-serializable representation of the numpy value
-    """
-
-    def getDataValue(self, typeItem, value, dimension=0, dims=None):
-        if dimension > 0:
-            if type(dims) not in (list, tuple):
-                msg = "unexpected type for type array dimensions"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            out = []
-            rank = len(dims)
-            if dimension > rank:
-                msg = "unexpected dimension for type array"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nElements = dims[rank - dimension]
-            for i in range(nElements):
-                item_value = self.getDataValue(
-                    typeItem, value[i], dimension=(dimension - 1), dims=dims
-                )
-                out.append(item_value)
-            return out  # done for array case
-
-        out = None
-        typeClass = typeItem["class"]
-        if isinstance(value, (np.ndarray, np.generic)):
-            value = value.tolist()  # convert numpy object to list
-        if typeClass == "H5T_COMPOUND":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for compound value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            fields = typeItem["fields"]
-            if len(fields) != len(value):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-            out = []
-            for i in range(nFields):
-                field = fields[i]
-                item_value = self.getDataValue(field["type"], value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_VLEN":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-            out = []
-            nElements = len(value)
-            for i in range(nElements):
-                item_value = self.getDataValue(baseType, value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_REFERENCE":
-            out = self.refToList(value)
-        elif typeClass == "H5T_OPAQUE":
-            out = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            type_dims = typeItem["dims"]
-            if type(type_dims) not in (list, tuple):
-                msg = "unexpected type for type array dimensions"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            rank = len(type_dims)
-            baseType = typeItem["base"]
-            out = self.getDataValue(baseType, value, dimension=rank, dims=type_dims)
-
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            out = value  # just copy value
-        elif typeClass == "H5T_STRING":
-            if "charSet" in typeItem:
-                charSet = typeItem["charSet"]
-            else:
-                charSet = "H5T_CSET_ASCII"
-            if charSet == "H5T_CSET_ASCII" and isinstance(value, bytes):
-                out = value.decode("utf-8")
-            else:
-                out = value
-        else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
-        return out
-
-    def getRefValue(self, typeItem: dict, value: list):
-        """
-        Return a numpy value based on json representation
-        """
-        out = None
-        typeClass = typeItem["class"]
-        if typeClass == "H5T_COMPOUND":
-            if not isinstance(value, (list, tuple)):
-                msg = f"Unexpected type for compound value: {type(value)}"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            fields = typeItem["fields"]
-            if len(fields) != len(value):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-            out = []
-            for i in range(nFields):
-                field = fields[i]
-                item_value = self.getRefValue(field["type"], value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_VLEN":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-            out = []
-            nElements = len(value)
-            for i in range(nElements):
-                item_value = self.getRefValue(baseType, value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_REFERENCE":
-            out = self.listToRef(value)
-        elif typeClass == "H5T_OPAQUE":
-            out = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            out = self.toRef(len(typeItem["dims"]), typeItem["base"], value)
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            out = value  # just copy value
-        elif typeClass == "H5T_STRING":
-            if typeItem["charSet"] == "H5T_CSET_UTF8":
-                # out = value.encode('utf-8')
-                out = value
-            else:
-                out = value.encode()
-        else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
-
-        if isinstance(out, list):
-            out = tuple(out)  # convert to tuple
-        return out
-
-    """
-      Return a numpy value based on json representation
-    """
-
-    def toNumPyValue(self, typeItem, src, des):
-        typeClass = "H5T_INTEGER"  # default to int type
-        if type(typeItem) is dict:
-            typeClass = typeItem["class"]
-        if typeClass == "H5T_COMPOUND":
-            fields = typeItem["fields"]
-            if len(fields) != len(src):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-
-            for i in range(nFields):
-                field = fields[i]
-                field_name = field["name"]
-                des[field_name] = src[i]
-
-        elif typeClass == "H5T_VLEN":
-            if type(src) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-
-            dt = self.createTypeFromItem(baseType)
-            dt = convert_dtype(dt)
-            des = np.array(src, dtype=dt)
-
-        elif typeClass == "H5T_REFERENCE":
-            des = src  # self.listToRef(src)
-
-        elif typeClass == "H5T_OPAQUE":
-            des = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            des = src
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            des = src  # just copy value
-        elif typeClass == "H5T_STRING":
-            if typeItem["charSet"] == "H5T_CSET_UTF8":
-                des = src  # src.encode('utf-8')
-            else:
-                if type(src) is str:
-                    try:
-                        src.encode("ascii")
-                    except UnicodeDecodeError:
-                        raise TypeError(
-                            "non-ascii value not allowed with H5T_CSET_ASCII"
-                        )
-                des = src
-
-        else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
-        return des
-
-    """
-       copy src data to numpy array
-    """
-
-    def toNumPyArray(self, rank, typeItem, src, des):
-        if rank == 0:
-            msg = "unexpected rank value"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)  # shouldn't be called with rank 0
-
-        for i in range(len(des)):
-            des_sec = des[i]  # numpy slab
-
-            src_sec = src[i]
-
-            if rank > 1:
-                self.toNumPyArray(rank - 1, typeItem, src_sec, des_sec)
-            else:
-                rv = self.toNumPyValue(typeItem, src_sec, des_sec)
-                # if the numpy object is writeable, des_sec will be
-                # already updated.  Otherwise, update the des by assignment
-                if not hasattr(des_sec, "flags") or not des_sec.flags["WRITEABLE"]:
-                    des[i] = rv
-
-    def toRef(self, rank, typeItem, data):
-        """
-        Convert json list to h5py compatible values
-        """
-        out = None
-
-        if isinstance(typeItem, str):
-            # commited type - get json representation
-            committed_type_item = self.getCommittedTypeItemByUuid(typeItem)
-            typeItem = committed_type_item["type"]
-
-        typeClass = typeItem["class"]
-        if typeClass in ("H5T_INTEGER", "H5T_FLOAT"):
-            out = data  # just use as is
-
-        elif rank == 0:
-            # scalar value
-            out = self.getRefValue(typeItem, data)
-        else:
-            out = []
-            for item in data:
-                if rank > 1:
-                    out_item = self.toRef(rank - 1, typeItem, item)
-                    out.append(out_item)
-                else:
-                    out_item = self.getRefValue(typeItem, item)
-                    out.append(out_item)
-
-        return out
-
-    """
-       Convert list to json serializable values.
-    """
-
-    def toList(self, rank, typeItem, data):
-        out = None
-        typeClass = typeItem["class"]
-        if typeClass in ("H5T_INTEGER", "H5T_FLOAT"):
-            out = data.tolist()  # just use as is
-
-        elif rank == 0:
-            # scalar value
-            out = self.getDataValue(typeItem, data)
-        else:
-            out = []
-            for item in data:
-                if rank > 1:
-                    out_item = self.toList(rank - 1, typeItem, item)
-                    out.append(out_item)
-                else:
-                    out_item = self.getDataValue(typeItem, item)
-                    out.append(out_item)
-
-        return out
-
-    """
-       Create ascii representation of vlen data object
-    """
-
-    def vlenToList(self, data):
-        # todo - verify that data is a numpy.ndarray
-        out = None
-        if len(data.shape) == 0:
-            out = []
-        else:
-            try:
-                if data.dtype.kind != "O":
-                    out = data.tolist()
-                else:
-                    out = []
-                    for item in data:
-                        out.append(self.vlenToList(item))  # recursive call
-            except AttributeError:
-                # looks like this is not a numpy ndarray, just return the value
-                out = data
-        return out
-
-    """
-       Create ascii representation of ref data object
-    """
-
-    def refToList(self, data):
-        # todo - verify that data is a numpy.ndarray
-        out = None
-        if type(data) is h5py.h5r.Reference:
-            if bool(data):
-                grpref = self.f[data]
-                addr = h5py.h5o.get_info(grpref.id).addr
-                uuid = self.getUUIDByAddress(addr)
-                if self.getGroupObjByUuid(uuid):
-                    out = "groups/" + uuid
-                elif self.getDatasetObjByUuid(uuid):
-                    out = "datasets/" + uuid
-                elif self.getCommittedTypeObjByUuid(uuid):
-                    out = "datatypes/" + uuid
-                else:
-                    self.log.warning("uuid in region ref not found: [" + uuid + "]")
-                    return None
-            else:
-                out = "null"
-        elif type(data) is h5py.h5r.RegionReference:
-            out = self.getRegionReference(data)
-        else:
-            out = []
-            for item in data:
-                out.append(self.refToList(item))  # recursive call
-        return out
-
-    """
-       Convert ascii representation of data references to data ref
-    """
-
-    def listToRef(self, data):
-        out = None
-        if not data:
-            # null reference
-            out = self.getNullReference()
-        elif isinstance(data, (bytes, str)):
-            obj_ref = None
-            # object reference should be in the form: <collection_name>/<uuid>
-            for prefix in ("datasets", "groups", "datatypes"):
-                if data.startswith(prefix):
-                    uuid_ref = data[len(prefix):]
-                    if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith("/"):
-                        obj = self.getObjectByUuid(prefix, uuid_ref[1:])
-                        if obj:
-                            obj_ref = obj.ref
-                        else:
-                            msg = (
-                                "Invalid object reference value: ["
-                                + uuid_ref
-                                + "] not found"
-                            )
-                            self.log.info(msg)
-                            raise IOError(errno.ENXIO, msg)
-                    break
-            if not obj_ref:
-                msg = "Invalid object reference value: [" + data + "]"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            else:
-                out = obj_ref
-
-        elif isinstance(data, (list, tuple)):
-            out = []
-            for item in data:
-                out.append(self.listToRef(item))  # recursive call
-        elif isinstance(data, dict):
-            # assume region ref
-            out = self.createRegionReference(data)
-        else:
-            msg = "Invalid object reference value type: [" + str(type(data)) + "]"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        return out
-
-    def bytesArrayToList(self, data):
-        """
-        Convert list that may contain bytes type elements to list of string elements
-        """
-        if isinstance(data, (bytes, str)):
-            is_list = False
-        elif isinstance(data, (np.ndarray, np.generic)):
-            if len(data.shape) == 0:
-                is_list = False
-                data = data.tolist()  # tolist will return a scalar in this case
-                if isinstance(data, (list, tuple)):
-                    is_list = True
-                else:
-                    is_list = False
-            else:
-                is_list = True
-        elif isinstance(data, (list, tuple)):
-            is_list = True
-        else:
-            is_list = False
-
-        if is_list:
-            out = []
-            for item in data:
-                out.append(self.bytesArrayToList(item))  # recursive call
-        elif isinstance(data, bytes):
-            out = data.decode("utf-8")
-        else:
-            out = data
-
-        return out
-
-    def getRegionReference(self, regionRef):
-        """
-        Get item description of region reference value
-        """
-        selectionEnums = {
-            h5py.h5s.SEL_NONE: "H5S_SEL_NONE",
-            h5py.h5s.SEL_ALL: "H5S_SEL_ALL",
-            h5py.h5s.SEL_POINTS: "H5S_SEL_POINTS",
-            h5py.h5s.SEL_HYPERSLABS: "H5S_SEL_HYPERSLABS",
-        }
-
-        item = {}
-        objid = h5py.h5r.dereference(regionRef, self.f.file.file.id)
-        if objid:
-            item["id"] = self.getUUIDByAddress(h5py.h5o.get_info(objid).addr)
-        else:
-            self.log.info("region reference unable to find item with objid: " + objid)
-            return item
-
-        sel = h5py.h5r.get_region(regionRef, objid)
-        select_type = sel.get_select_type()
-        if select_type not in selectionEnums:
-            msg = "Unexpected selection type: " + regionRef.typecode
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        item["select_type"] = selectionEnums[select_type]
-        pointlist = None
-        if select_type == h5py.h5s.SEL_POINTS:
-            # retrieve a numpy array of selection points
-            points = sel.get_select_elem_pointlist()
-            pointlist = points.tolist()
-        elif select_type == h5py.h5s.SEL_HYPERSLABS:
-            points = sel.get_select_hyper_blocklist()
-            if points is not None:
-                pointlist = points[...].tolist()
-                # bump up the second coordinate by one to match api spec
-                for point in pointlist:
-                    coord2 = point[1]
-                    for i in range(len(coord2)):
-                        coord2[i] = coord2[i] + 1
-
-        item["selection"] = pointlist
-
-        return item
-
-    def createRegionReference(self, item):
-        """
-        Create region reference from item description of region reference value
-        """
-        selectionEnums = {
-            "H5S_SEL_NONE": h5py.h5s.SEL_NONE,
-            "H5S_SEL_ALL": h5py.h5s.SEL_ALL,
-            "H5S_SEL_POINTS": h5py.h5s.SEL_POINTS,
-            "H5S_SEL_HYPERSLABS": h5py.h5s.SEL_HYPERSLABS,
-        }
-        region_ref = None
-
-        if "select_type" not in item:
-            msg = "select_type not provided for region selection"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        select_type = item["select_type"]
-        if select_type not in selectionEnums.keys():
-            msg = "selection type: [" + select_type + "] is not valid"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        dset = None
-        if select_type == "H5S_SEL_NONE":
-            if "id" not in item:
-                #        select none on null dataset, return null ref
-                out = self.getNullReference()
-                return out
-        else:  # select_type != 'H5S_SEL_NONE'
-            if "id" not in item:
-                msg = "id not provided for region selection"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        # Otherwise need to provide uuid of dataset
-        uuid_ref = item["id"]
-        if len(uuid_ref) != UUID_LEN:
-            msg = "uuid value: [" + uuid_ref + "] for region reference is not valid"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        obj = self.getObjectByUuid("datasets", uuid_ref)
-        if obj:
-            dset = obj
-        else:
-            msg = "Invalid region refence value: [" + uuid_ref + "] not found"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if select_type in ("H5S_SEL_POINTS", "H5S_SEL_HYPERSLABS"):
-            if "selection" not in item:
-                msg = "selection key not provided for region selection"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        rank = len(dset.shape)
-        space_id = h5py.h5d.DatasetID.get_space(dset.id)
-        h5py.h5s.SpaceID.select_none(space_id)
-
-        if select_type == "H4S_SEL_NONE":
-            pass  # did select_none above
-        elif select_type == "H5S_SEL_ALL":
-            h5py.h5s.SpaceID.select_all(space_id)
-        elif select_type == "H5S_SEL_POINTS":
-            selection = item["selection"]
-            for point in selection:
-                if len(point) != rank:
-                    msg = "point selection number of elements must mach rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-            h5py.h5s.SpaceID.select_elements(space_id, selection)
-        elif select_type == "H5S_SEL_HYPERSLABS":
-            selection = item["selection"]
-
-            for slab in selection:
-                # each item should be a two element array defining the hyperslab boundary
-                if len(slab) != 2:
-                    msg = "selection value not valid (not a 2 element array)"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                start = slab[0]
-                if isinstance(start, list):
-                    start = tuple(start)
-                if type(start) is not tuple or len(start) != rank:
-                    msg = "selection value not valid, start element should have number "
-                    msg += "elements equal to rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                stop = slab[1]
-                if isinstance(stop, list):
-                    stop = tuple(stop)
-                if type(stop) is not tuple or len(stop) != rank:
-                    msg = "selection value not valid, count element should have number "
-                    msg += "elements equal to rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                count = []
-                for i in range(rank):
-                    if start[i] < 0:
-                        msg = "start value for hyperslab selection must be non-negative"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    if stop[i] <= start[i]:
-                        msg = "stop value must be greater than start value for hyperslab selection"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    count.append(stop[i] - start[i])
-                count = tuple(count)
-
-                h5py.h5s.SpaceID.select_hyperslab(
-                    space_id, start, count, op=h5py.h5s.SELECT_OR
-                )
-
-        # now that we've selected the desired region in the space, return a region reference
-        dset_name = dset.name.encode("utf-8")
-        region_ref = h5py.h5r.create(
-            self.f.id, dset_name, h5py.h5r.DATASET_REGION, space_id
-        )
-
-        return region_ref
-
-    def toTuple(self, rank, data):
-        """
-        Convert a list to a tuple, recursively.
-        Example. [[1,2],[3,4]] -> ((1,2),(3,4))
-        """
-        if isinstance(data, (list, tuple)):
-            if rank > 0:
-                return list(self.toTuple(rank - 1, x) for x in data)
-            else:
-                return tuple(self.toTuple(rank - 1, x) for x in data)
-        else:
-            return data
-
-    def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
-        """
-        Get values from dataset identified by obj_uuid.
-        If a slices list or tuple is provided, it should have the same
-        number of elements as the rank of the dataset.
-        """
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        values = None
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset.shape is None:
-            # null space dataset (with h5py 2.6.0)
-            return None
-
-        rank = len(dset.shape)
-
-        if rank == 0:
-            # check for null dataspace
-            try:
-                val = dset[...]
-            except IOError:
-                # assume null dataspace, return none
-                return None
-            if val is None:
-                self.log.warning("no value returned from scalar dataset")
-
-        if not isinstance(slices, (list, tuple)) and slices is not Ellipsis:
-            msg = "Unexpected error: getDatasetValuesByUuid: bad type for dim parameter"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if isinstance(slices, (list, tuple)) and len(slices) != rank:
-            msg = "Unexpected error: getDatasetValuesByUuid: "
-            msg += "number of dims in selection not same as rank"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if dt.kind == "O":
-            if format != "json":
-                msg = "Only JSON is supported for for this data type"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            # numpy object type - could be a vlen string or generic vlen
-            h5t_check = h5py.h5t.check_dtype(vlen=dt)
-            if h5t_check is str or h5t_check is bytes:
-                values = self.bytesArrayToList(dset[slices])
-            elif h5t_check is not None:
-                # other vlen data
-                values = self.vlenToList(dset[slices])
-            else:
-                # check for reference type
-                h5t_check = h5py.h5t.check_dtype(ref=dt)
-                if h5t_check is not None:
-                    # reference type
-                    values = self.refToList(dset[slices])
-                else:
-                    msg = "Unexpected error, object type unknown"
-                    self.log.error(msg)
-                    raise IOError(errno.EIO, msg)
-        elif dt.kind == "V" and len(dt) <= 1 and len(dt.shape) == 0 and not dt.names:
-            # opaque type - skip for now
-            self.log.warning("unable to get opaque type values")
-            values = "????"
-        elif dt.kind == "S" and format == "json":
-            values = self.bytesArrayToList(dset[slices])
-        elif len(dt) > 1 or dt.names:
-            # compound type
-            if format == "json":
-                values = self.bytesArrayToList(dset[slices])
-            else:
-                values = dset[slices].tobytes()
-        else:
-            values = dset[slices]
-
-            # just use tolist to dump
-            if format == "json":
-                values = values.tolist()
-            else:
-                # values = base64.b64encode(dset[slices].tobytes())
-                values = values.tobytes()
-
-        return values
-
-    """
-      doDatasetQueryByUuid: return rows based on query string
-        Return rows from a dataset that matches query string.
-
-        Note: Only supported for compound_type/one-dimensional datasets
-    """
-
-    def doDatasetQueryByUuid(
-        self, obj_uuid, query, start=0, stop=-1, step=1, limit=None
-    ):
-        self.log.info("doQueryByUuid - uuid: " + obj_uuid + " query:" + query)
-        self.log.info(
-            "start: "
-            + str(start)
-            + " stop: "
-            + str(stop)
-            + " step: "
-            + str(step)
-            + " limit: "
-            + str(limit)
-        )
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        values = []
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        # itemSize = getItemSize(typeItem)
-        if typeItem["class"] != "H5T_COMPOUND":
-            msg = "Only compound type datasets can be used as query target"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset.shape is None:
-            # null space dataset (with h5py 2.6.0)
-            return None
-
-        rank = len(dset.shape)
-        if rank != 1:
-            msg = "One one-dimensional datasets can be used as query target"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        values = []
-        indexes = []
-        count = 0
-
-        num_elements = dset.shape[0]
-        if stop == -1:
-            stop = num_elements
-        elif stop > num_elements:
-            stop = num_elements
-        block_size = self._getBlockSize(dset)
-        self.log.info("block_size: " + str(block_size))
-
-        field_names = list(dset.dtype.fields.keys())
-        eval_str = self._getEvalStr(query, field_names)
-
-        while start < stop:
-            if limit and (count == limit):
-                break  # no more rows for this batch
-            end = start + block_size
-            if end > stop:
-                end = stop
-            rows = dset[start:end]  # read from dataset
-            where_result = np.where(eval(eval_str))
-            index = where_result[0].tolist()
-            if len(index) > 0:
-                for i in index:
-                    row = rows[i]
-                    item = self.bytesArrayToList(row)
-                    values.append(item)
-                    indexes.append(start + i)
-                    count += 1
-                    if limit and (count == limit):
-                        break  # no more rows for this batch
-
-            start = end  # go to next block
-
-        # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),))
-
-        self.log.info("got " + str(count) + " query matches")
-        return (indexes, values)
-
-    """
-     _getBlockSize: Get number of rows to read from disk
-
-        heurestic to get reasonable sized chunk of data to fetch.
-        make multiple of chunk_size if possible
-    """
-
-    def _getBlockSize(self, dset):
-        target_block_size = 256 * 1000
-        if dset.chunks:
-            chunk_size = dset.chunks[0]
-            if chunk_size < target_block_size:
-                block_size = (target_block_size // chunk_size) * chunk_size
-            else:
-                block_size = target_block_size
-        else:
-            block_size = target_block_size
-        return block_size
-
-    """
-     _getEvalStr: Get eval string for given query
-
-        Gets Eval string to use with numpy where method.
-    """
-
-    def _getEvalStr(self, query, field_names):
-        i = 0
-        eval_str = ""
-        var_name = None
-        end_quote_char = None
-        var_count = 0
-        paren_count = 0
-        black_list = ("import",)  # field names that are not allowed
-        self.log.info("getEvalStr(" + query + ")")
-        for item in black_list:
-            if item in field_names:
-                msg = "invalid field name"
-                self.log.info("EINVAL: " + msg)
-                raise IOError(errno.EINVAL, msg)
-        while i < len(query):
-            ch = query[i]
-            if (i + 1) < len(query):
-                ch_next = query[i + 1]
-            else:
-                ch_next = None
-            if var_name and not ch.isalnum():
-                # end of variable
-                if var_name not in field_names:
-                    # invalid
-                    msg = "unknown field name"
-                    self.log.info("EINVAL: " + msg)
-                    raise IOError(errno.EINVAL, msg)
-                eval_str += "rows['" + var_name + "']"
-                var_name = None
-                var_count += 1
-
-            if end_quote_char:
-                if ch == end_quote_char:
-                    # end of literal
-                    end_quote_char = None
-                eval_str += ch
-            elif ch in ("'", '"'):
-                end_quote_char = ch
-                eval_str += ch
-            elif ch.isalpha():
-                if ch == "b" and ch_next in ("'", '"'):
-                    eval_str += "b"  # start of a byte string literal
-                elif var_name is None:
-                    var_name = ch  # start of a variable
-                else:
-                    var_name += ch
-            elif ch == "(" and end_quote_char is None:
-                paren_count += 1
-                eval_str += ch
-            elif ch == ")" and end_quote_char is None:
-                paren_count -= 1
-                if paren_count < 0:
-                    msg = "Mismatched paren"
-                    self.log.info("EINVAL: " + msg)
-                    raise IOError(errno.EINVAL, msg)
-                eval_str += ch
-            else:
-                # just add to eval_str
-                eval_str += ch
-            i = i + 1
-        if end_quote_char:
-            msg = "no matching quote character"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-        if var_count == 0:
-            msg = "No field value"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-        if paren_count != 0:
-            msg = "Mismatched paren"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-
-        return eval_str
-
-    """
-    Get values from dataset identified by obj_uuid using the given
-    point selection.
-    """
-
-    def getDatasetPointSelectionByUuid(self, obj_uuid, points):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        rank = len(dset.shape)
-        values = np.zeros(len(points), dtype=dset.dtype)
-        try:
-            i = 0
-            for point in points:
-                if rank == 1:
-                    values[i] = dset[[point]]
-                else:
-                    values[i] = dset[tuple(point)]
-                i += 1
-        except ValueError:
-            # out of range error
-            msg = "getDatasetPointSelection, out of range error"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        return values.tolist()
-
-    """
-    setDatasetValuesByUuid - update the given dataset values with supplied data
-      and optionally a hyperslab selection (slices)
-    """
-
-    def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if format == "binary" and type(data) is not bytes:
-            msg = "data must be of type bytes for binary writing"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        rank = len(dset.shape)
-        arraySize = 1
-        for extent in dset.shape:
-            arraySize *= arraySize
-
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if slices is None:
-            slices = []
-            # create selection that covers entire dataset
-            for dim in range(rank):
-                s = slice(0, dset.shape[dim], 1)
-                slices.append(s)
-            slices = tuple(slices)
-
-        if not isinstance(slices, tuple):
-            msg = "setDatasetValuesByUuid: bad type for dim parameter"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if len(slices) != rank:
-            msg = "number of dims in selection not same as rank"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        npoints = 1
-        np_shape = []
-        for i in range(rank):
-            s = slices[i]
-
-            if s.start < 0 or s.step <= 0 or s.stop < s.start:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if s.stop > dset.shape[i]:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            np_shape.append(s.stop - s.start)
-
-            count = (s.stop - s.start) // s.step
-            if count <= 0:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-            npoints *= count
-
-        np_shape = tuple(np_shape)  # for comparison with ndarray shape
-
-        self.log.info("selection shape:" + str(np_shape))
-
-        # need some special conversion for compound types --
-        # each element must be a tuple, but the JSON decoder
-        # gives us a list instead.
-        if format != "binary" and dset.dtype.names and isinstance(data, (list, tuple)):
-            data = self.toTuple(rank, data)
-            # for i in range(len(data)):
-            #    converted_data.append(self.toTuple(data[i]))
-            # data = converted_data
-        else:
-            h5t_check = h5py.check_dtype(ref=dset.dtype)
-            if h5t_check in (h5py.Reference, h5py.RegionReference):
-                # convert data to data refs
-                if format == "binary":
-                    msg = "Only JSON is supported for for this data type"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                data = self.listToRef(data)
-
-        if format == "binary":
-            if npoints * itemSize != len(data):
-                msg = (
-                    "Expected: "
-                    + str(npoints * itemSize)
-                    + " bytes, but got: "
-                    + str(len(data))
-                )
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dset.dtype.shape == ():
-                arr = np.fromstring(data, dtype=dset.dtype)
-                arr = arr.reshape(np_shape)  # conform to selection shape
-            else:
-                # tricy array type!
-                arr = np.empty(np_shape, dtype=dset.dtype)
-                base_arr = np.fromstring(data, dtype=dset.dtype.base)
-                base_shape = list(np_shape)
-                base_shape.extend(dset.dtype.shape)  # add on the type dimensions
-                base_arr = base_arr.reshape(base_shape)
-                arr[...] = base_arr
-        else:
-            # data is json
-            if npoints == 1 and len(dset.dtype) > 1:
-                # convert to tuple for compound singleton writes
-                data = [
-                    tuple(data),
-                ]
-
-            arr = np.array(data, dtype=dset.dtype)
-            # raise an exception of the array shape doesn't match the selection shape
-            # allow if the array is a scalar and the selection shape is one element,
-            # numpy is ok with this
-            np_index = 0
-            for dim in range(len(arr.shape)):
-                data_extent = arr.shape[dim]
-                selection_extent = 1
-                if np_index < len(np_shape):
-                    selection_extent = np_shape[np_index]
-                if selection_extent == data_extent:
-                    np_index += 1
-                    continue  # good
-                if data_extent == 1:
-                    continue  # skip singleton selection
-                if selection_extent == 1:
-                    np_index += 1
-                    continue  # skip singleton selection
-
-                # selection/data mismatch!
-                msg = "data shape doesn't match selection shape"
-                msg += "--data shape: " + str(arr.shape)
-                msg += "--selection shape: " + str(np_shape)
-
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        # write temp numpy array to dataset
-        if rank == 1:
-            s = slices[0]
-            try:
-                dset[s] = arr
-            except TypeError as te:
-                self.log.info("h5py setitem exception: " + str(te))
-                raise IOError(errno.EINVAL, str(te))
-        else:
-            try:
-                dset[slices] = arr
-            except TypeError as te:
-                self.log.info("h5py setitem exception: " + str(te))
-                raise IOError(errno.EINVAL, str(te))
-
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-        return True
-
-    """
-    setDatasetValuesByPointSelection - Update the dataset values using the given
-      data and point selection
-    """
-
-    def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if format == "binary" and type(data) is not bytes:
-            msg = "data must be of type bytes for binary writing"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        rank = len(dset.shape)
-
-        # need some special conversion for compound types --
-        # each element must be a tuple, but the JSON decoder
-        # gives us a list instead.
-        if format == "json" and len(dset.dtype) > 1 and type(data) in (list, tuple):
-            raise NotImplementedError("need some special conversion for compound types")
-            # converted_data = self.toTuple(rank, data)
-            # for i in range(len(data)):
-            #    converted_data.append(self.toTuple(data[i]))
-            # data = converted_data
-
-        if format == "json":
-            try:
-                i = 0
-                for point in points:
-                    if rank == 1:
-                        dset[[point]] = data[i]
-                    else:
-                        dset[tuple(point)] = data[i]
-                    i += 1
-            except ValueError:
-                # out of range error
-                msg = "setDatasetValuesByPointSelection, out of range error"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        else:
-            # binary
-            arr = np.fromstring(data, dtype=dset.dtype)
-            dset[points] = arr  # coordinate write
-
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-        return True
-
-    """
-    createDataset - creates new dataset given shape and datatype
-    Returns item
-    """
+    """
+    createDataset - creates new dataset given shape and datatype
+    Returns obj_id
+    """
 
     def createDataset(
-        self, datatype, datashape, max_shape=None, creation_props=None, obj_uuid=None
+        self,
+        shape=None,
+        dtype=None,
+        chunks=None,
+        compression=None,
+        shuffle=None,
+        maxshape=None,
+        compression_opts=None,
+        fillvalue=None,
+        cpl=None,
     ):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create dataset (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        datasets = self.dbGrp["{datasets}"]
-        if not obj_uuid:
-            obj_uuid = createObjId()
-        dt = None
-        item = {}
-        fillvalue = None
-
-        # h5py.createdataset fields
-        kwargs = {}  # key word arguments for h5py dataset creation
-
-        if creation_props is None:
-            creation_props = {}  # create empty list for convience
-
-        if creation_props:
-            if "fillValue" in creation_props:
-                fillvalue = creation_props["fillValue"]
-            if "trackTimes" in creation_props:
-                kwargs["track_times"] = creation_props["trackTimes"]
-            if "layout" in creation_props:
-                layout = creation_props["layout"]
-                if "dims" in layout:
-                    kwargs["chunks"] = tuple(layout["dims"])
-            if "filters" in creation_props:
-                filter_props = creation_props["filters"]
-                for filter_prop in filter_props:
-                    if "id" not in filter_prop:
-                        msg = "filter id not provided"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    filter_id = filter_prop["id"]
-                    if filter_id not in _HDF_FILTERS:
-                        self.log.info(
-                            "unknown filter id: " + str(filter_id) + " ignoring"
-                        )
-                        continue
-
-                    hdf_filter = _HDF_FILTERS[filter_id]
-
-                    self.log.info("got filter: " + str(filter_id))
-                    if "alias" not in hdf_filter:
-                        self.log.info(
-                            "unsupported filter id: " + str(filter_id) + " ignoring"
-                        )
-                        continue
-
-                    filter_alias = hdf_filter["alias"]
-                    if not h5py.h5z.filter_avail(filter_id):
-                        self.log.info(
-                            "compression filter not available, filter: "
-                            + filter_alias
-                            + " will be ignored"
-                        )
-                        continue
-                    if filter_alias in _H5PY_COMPRESSION_FILTERS:
-                        if kwargs.get("compression"):
-                            self.log.info(
-                                "compression filter already set, filter: "
-                                + filter_alias
-                                + " will be ignored"
-                            )
-                            continue
-
-                        kwargs["compression"] = filter_alias
-                        self.log.info(
-                            "setting compression filter to: " + kwargs["compression"]
-                        )
-                        if filter_alias == "gzip":
-                            # check for an optional compression value
-                            if "level" in filter_prop:
-                                kwargs["compression_opts"] = filter_prop["level"]
-                        elif filter_alias == "szip":
-                            bitsPerPixel = None
-                            coding = "nn"
-
-                            if "bitsPerPixel" in filter_prop:
-                                bitsPerPixel = filter_prop["bitsPerPixel"]
-                            if "coding" in filter_prop:
-                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
-                                    coding = "ec"
-                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
-                                    coding = "nn"
-                                else:
-                                    msg = "invalid szip option: 'coding'"
-                                    self.log.info(msg)
-                                    raise IOError(errno.EINVAL, msg)
-                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
-                            # so these options will be ignored
-                            if "pixelsPerBlock" in filter_props:
-                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
-                            if "pixelsPerScanline" in filter_props:
-                                self.log.info(
-                                    "ignoring szip option: 'pixelsPerScanline'"
-                                )
-                            if bitsPerPixel:
-                                kwargs["compression_opts"] = (coding, bitsPerPixel)
-                    else:
-                        if filter_alias == "shuffle":
-                            kwargs["shuffle"] = True
-                        elif filter_alias == "fletcher32":
-                            kwargs["fletcher32"] = True
-                        elif filter_alias == "scaleoffset":
-                            if "scaleOffset" not in filter_prop:
-                                msg = "No scale_offset provided for scale offset filter"
-                                self.log(msg)
-                                raise IOError(errno.EINVAL, msg)
-                            kwargs["scaleoffset"] = filter_prop["scaleOffset"]
-                        else:
-                            self.log.info(
-                                "Unexpected filter name: "
-                                + filter_alias
-                                + " , ignoring"
-                            )
-
-        dt_ref = self.createTypeFromItem(datatype)
-        if dt_ref is None:
-            msg = "Unexpected error, no type returned"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        dt = dt_ref
-        if hasattr(dt_ref, "dtype"):
-            # dt_ref is actualy a handle to a committed type
-            # get the dtype prop, but use dt_ref for the actual dataset creation
-            dt = dt_ref.dtype
-
-        if fillvalue and len(dt) > 1 and type(fillvalue) in (list, tuple):
-            # for compound types, need to convert from list to dataset compatible element
-
-            if len(dt) != len(fillvalue):
-                msg = "fillvalue has incorrect number of elements"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            ndscalar = np.zeros((), dtype=dt)
-            for i in range(len(fillvalue)):
-                field = dt.names[i]
-                ndscalar[field] = self.toTuple(0, fillvalue[i])
-            fillvalue = ndscalar
-
+        
+        kwds = {}
+        if chunks:
+            kwds["chunks"] = chunks
+        if compression:
+            kwds["compression"] = compression
+        if shuffle:
+            kwds["shuffle"] = shuffle
+        if compression_opts:
+            kwds["compression_opts"] = compression_opts
+        if maxshape:
+            kwds["maxshape"] = maxshape
         if fillvalue:
-            kwargs["fillvalue"] = fillvalue
-
-        dataset_id = None
-        if datashape is None:
-            # create null space dataset
-            # null space datasets not supported in h5py yet:
-            # See: https://github.com/h5py/h5py/issues/279
-            # work around this by using low-level interface.
-            # first create a temp scalar dataset so we can pull out the typeid
-            tmpGrp = None
-            if "{tmp}" not in self.dbGrp:
-                tmpGrp = self.dbGrp.create_group("{tmp}")
-            else:
-                tmpGrp = self.dbGrp["{tmp}"]
-            tmpDataset = tmpGrp.create_dataset(obj_uuid, shape=(1,), dtype=dt_ref)
-            tid = tmpDataset.id.get_type()
-            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
-            # now create the permanent dataset
-            gid = datasets.id
-            b_obj_uuid = obj_uuid.encode("utf-8")
-            dataset_id = h5py.h5d.create(gid, b_obj_uuid, tid, sid)
-            # delete the temp dataset
-            del tmpGrp[obj_uuid]
-        else:
-            # create the dataset
-            try:
-                newDataset = datasets.create_dataset(
-                    obj_uuid,
-                    shape=datashape,
-                    maxshape=max_shape,
-                    dtype=dt_ref,
-                    **kwargs,
-                )
-            except ValueError as ve:
-                msg = "Unable to create dataset"
-                try:
-                    msg += ": " + ve.message
-                except AttributeError:
-                    pass  # no message
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)  # assume this is due to invalid params
-
-            if newDataset:
-                dataset_id = newDataset.id
-
-        if dataset_id is None:
-            msg = "Unexpected failure to create dataset"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(dataset_id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-
-        # save creation props if any
-        if creation_props:
-            self.setDatasetCreationProps(obj_uuid, creation_props)
-
-        # set timestamp
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-
-        item["id"] = obj_uuid
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-        item["attributeCount"] = 0
-        return item
-
-    """
-    Resize existing Dataset
-    """
-
-    def resizeDataset(self, obj_uuid, shape):
-        self.log.info("resizeDataset(")  # + obj_uuid + "): ") # + str(shape))
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to resize dataset (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EACESS, msg)
-        dset = self.getDatasetObjByUuid(obj_uuid)  # will throw exception if not found
-        if len(shape) != len(dset.shape):
-            msg = "Unable to resize dataset, shape has wrong number of dimensions"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        for i in range(len(shape)):
-            if shape[i] < dset.shape[i]:
-                msg = "Unable to resize dataset, cannot make extent smaller"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dset.maxshape[i] is not None and shape[i] > dset.maxshape[i]:
-                msg = "Unable to resize dataset, max extent exceeded"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        dset.resize(shape)  # resize
-
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-
-    """
-    Check if link points to given target (as a HardLink)
-    """
-
-    def isObjectHardLinked(self, parentGroup, targetGroup, linkName):
-        try:
-            linkObj = parentGroup.get(linkName, None, False, True)
-            linkClass = linkObj.__class__.__name__
-        except TypeError:
-            # UDLink? Ignore for now
-            return False
-        if linkClass == "SoftLink":
-            return False
-        elif linkClass == "ExternalLink":
-            return False
-        elif linkClass == "HardLink":
-            if parentGroup[linkName] == targetGroup:
-                return True
-        else:
-            self.log.warning("unexpected linkclass: " + linkClass)
-            return False
-
-    """
-    Delete Dataset, Group or Datatype by UUID
-    """
-
-    def deleteObjectByUuid(self, objtype, obj_uuid):
-        if objtype not in ("group", "dataset", "datatype"):
-            msg = "unexpected objtype: " + objtype
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        self.initFile()
-        self.log.info("delete uuid: " + obj_uuid)
-        if self.readonly:
-            msg = "Unable to delete object (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        if obj_uuid == self.dbGrp.attrs["rootUUID"] and objtype == "group":
-            # can't delete root group
-            msg = "Unable to delete group (root group may not be deleted)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        dbCol = None
-        tgt = None
-        if objtype == "dataset":
-            tgt = self.getDatasetObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{datasets}"]
-        elif objtype == "group":
-            tgt = self.getGroupObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{groups}"]
-        else:  # datatype
-            tgt = self.getCommittedTypeObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{datatypes}"]
-
-        if tgt is None:
-            msg = "Unable to delete " + objtype + ", uuid: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        # unlink from root (if present)
-        self.unlinkObject(self.f["/"], tgt)
-
-        groups = self.dbGrp["{groups}"]
-        # iterate through each group in the file and unlink tgt if it is linked
-        # by the group.
-        # We'll store a list of links to be removed as we go, and then actually
-        # remove the links after the iteration is done (otherwise we can run into issues
-        # where the key has become invalid)
-        linkList = []  # this is our list
-        for uuidName in groups.attrs:
-            grpRef = groups.attrs[uuidName]
-            # de-reference handle
-            grp = self.f[grpRef]
-            for linkName in grp:
-                if self.isObjectHardLinked(grp, tgt, linkName):
-                    linkList.append({"group": grp, "link": linkName})
-        for item in linkList:
-            self.unlinkObjectItem(item["group"], tgt, item["link"])
-
-        addr = h5py.h5o.get_info(tgt.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        del addrGrp.attrs[str(addr)]  # remove reverse map
-        dbRemoved = False
-
-        # finally, remove the dataset from db
-        if obj_uuid in dbCol:
-            # should be here (now it is anonymous)
-            del dbCol[obj_uuid]
-            dbRemoved = True
-
-        if not dbRemoved:
-            self.log.warning("did not find: " + obj_uuid + " in anonymous collection")
-
-            if obj_uuid in dbCol.attrs:
-                self.log.info(
-                    "removing: " + obj_uuid + " from non-anonymous collection"
-                )
-                del dbCol.attrs[obj_uuid]
-                dbRemoved = True
-
-        if not dbRemoved:
-            msg = "Unexpected Error, did not find reference to: " + obj_uuid
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        # note when the object was deleted
-        self.setModifiedTime(obj_uuid)
-
-        return True
-
-    def getGroupItemByUuid(self, obj_uuid):
-        self.initFile()
-        grp = self.getGroupObjByUuid(obj_uuid)
-        if grp is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Group with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Group with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        linkCount = len(grp)
-        if "__db__" in grp:
-            linkCount -= 1  # don't include the db group
-
-        item = {"id": obj_uuid}
-        alias = []
-        if grp.name and not grp.name.startswith("/__db__"):
-            alias.append(grp.name)  # just use the default h5py path for now
-        item["alias"] = alias
-        item["attributeCount"] = len(grp.attrs)
-        item["linkCount"] = linkCount
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        return item
-
-    """
-    getLinkItemByObj - return info about a link
-        parent: reference to group
-        linkName: name of link
-        return: item dictionary with link attributes, or None if not found
-    """
-
-    def getLinkItemByObj(self, parent, link_name):
-        if link_name not in parent:
-            return None
-
-        if link_name == "__db__":
-            return None  # don't provide link to db group
-        #  "http://somefile/#h5path(somepath)")
-        item = {"title": link_name}
-        # get the link object, one of HardLink, SoftLink, or ExternalLink
-        try:
-            linkObj = parent.get(link_name, None, False, True)
-            linkClass = linkObj.__class__.__name__
-        except TypeError:
-            # UDLink? set class as 'user'
-            linkClass = "UDLink"  # user defined links
-            item["class"] = "H5L_TYPE_USER_DEFINED"
-        if linkClass == "SoftLink":
-            item["class"] = "H5L_TYPE_SOFT"
-            item["h5path"] = linkObj.path
-            item["href"] = "#h5path(" + linkObj.path + ")"
-        elif linkClass == "ExternalLink":
-            item["class"] = "H5L_TYPE_EXTERNAL"
-            item["h5path"] = linkObj.path
-            item["file"] = linkObj.filename
-            item["href"] = "#h5path(" + linkObj.path + ")"
-        elif linkClass == "HardLink":
-            # Hardlink doesn't have any properties itself, just get the linked
-            # object
-            obj = parent[link_name]
-            addr = h5py.h5o.get_info(obj.id).addr
-            item["class"] = "H5L_TYPE_HARD"
-            item["id"] = self.getUUIDByAddress(addr)
-            class_name = obj.__class__.__name__
-            if class_name == "Dataset":
-                item["href"] = "datasets/" + item["id"]
-                item["collection"] = "datasets"
-            elif class_name == "Group":
-                item["href"] = "groups/" + item["id"]
-                item["collection"] = "groups"
-            elif class_name == "Datatype":
-                item["href"] = "datatypes/" + item["id"]
-                item["collection"] = "datatypes"
-            else:
-                self.log.warning("unexpected object type: " + item["type"])
-
-        return item
-
-    def getLinkItemByUuid(self, grpUuid, link_name):
-        self.log.info("db.getLinkItemByUuid(" + grpUuid + ", [" + link_name + "])")
-        if not link_name:
-            msg = "link_name not specified"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        self.initFile()
-        parent = self.getGroupObjByUuid(grpUuid)
-        if parent is None:
-            msg = "Parent group: " + grpUuid + " of link not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        item = self.getLinkItemByObj(parent, link_name)
-        # add timestamps
-        if item:
-            if self.update_timestamps:
-                item["ctime"] = self.getCreateTime(
-                    grpUuid, objType="link", name=link_name
-                )
-                item["mtime"] = self.getModifiedTime(
-                    grpUuid, objType="link", name=link_name
-                )
-        else:
-            self.log.info("link not found")
-            mtime = self.getModifiedTime(
-                grpUuid, objType="link", name=link_name, useRoot=False
-            )
-            if mtime:
-                msg = (
-                    "Link ["
-                    + link_name
-                    + "] of: "
-                    + grpUuid
-                    + " has been previously deleted"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Link [" + link_name + "] of: " + grpUuid + " not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        return item
-
-    def getLinkItems(self, grpUuid, marker=None, limit=0):
-        self.log.info("db.getLinkItems(" + grpUuid + ")")
-        if marker:
-            self.log.info("...marker: " + marker)
-        if limit:
-            self.log.info("...limit: " + str(limit))
-
-        self.initFile()
-        parent = self.getGroupObjByUuid(grpUuid)
-        if parent is None:
-            msg = "Parent group: " + grpUuid + " not found, no links returned"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        items = []
-        gotMarker = True
-        if marker is not None:
-            gotMarker = False
-        count = 0
-        for link_name in parent:
-            if link_name == "__db__":
-                continue
-            if not gotMarker:
-                if link_name == marker:
-                    gotMarker = True
-                    continue  # start filling in result on next pass
-                else:
-                    continue  # keep going!
-            item = self.getLinkItemByObj(parent, link_name)
-            items.append(item)
-
-            count += 1
-            if limit > 0 and count == limit:
-                break  # return what we got
-        return items
-
-    def unlinkItem(self, grpUuid, link_name):
-        if self.readonly:
-            msg = "Unable to unlink item (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        grp = self.getGroupObjByUuid(grpUuid)
-        if grp is None:
-            msg = "Parent group: " + grpUuid + " not found, cannot remove link"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if link_name not in grp:
-            msg = (
-                "Link: ["
-                + link_name
-                + "] of group: "
-                + grpUuid
-                + " not found, cannot remove link"
-            )
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if link_name == "__db__":
-            # don't allow db group to be unlinked!
-            msg = "Unlinking of __db__ group not allowed"
-            raise IOError(errno.EPERM, msg)
-
-        obj = None
-        try:
-            linkObj = grp.get(link_name, None, False, True)
-            linkClass = linkObj.__class__.__name__
-            if linkClass == "HardLink":
-                # we can safely reference the object
-                obj = grp[link_name]
-        except TypeError:
-            # UDLink? Return false to indicate that we can not delete this
-            msg = "Unable to unlink user defined link"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        linkDeleted = False
-        if obj is not None:
-            linkDeleted = self.unlinkObjectItem(grp, obj, link_name)
-        else:
-            # SoftLink or External Link - we can just remove the key
-            del grp[link_name]
-            linkDeleted = True
-
-        if linkDeleted:
-            # update timestamp
-            self.setModifiedTime(grpUuid, objType="link", name=link_name)
-
-        return linkDeleted
-
-    def getCollection(self, col_type, marker=None, limit=None):
-        self.log.info("db.getCollection(" + col_type + ")")
-        # col_type should be either "datasets", "groups", or "datatypes"
-        if col_type not in ("datasets", "groups", "datatypes"):
-            msg = "Unexpected col_type: [" + col_type + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        self.initFile()
-        col = None  # Group, Dataset, or Datatype
-        if col_type == "datasets":
-            col = self.dbGrp["{datasets}"]
-        elif col_type == "groups":
-            col = self.dbGrp["{groups}"]
-        else:  # col_type == "datatypes"
-            col = self.dbGrp["{datatypes}"]
-
-        uuids = []
-        count = 0
-        # gather the non-anonymous ids first
-        for obj_uuid in col.attrs:
-            if marker:
-                if obj_uuid == marker:
-                    marker = None  # clear and pick up next item
-                continue
-            uuids.append(obj_uuid)
-            count += 1
-            if limit is not None and limit > 0 and count == limit:
-                break
-
-        if limit == 0 or (limit is not None and count < limit):
-            # grab any anonymous obj ids next
-            for obj_uuid in col:
-                if marker:
-                    if obj_uuid == marker:
-                        marker = None  # clear and pick up next item
-                    continue
-                uuids.append(obj_uuid)
-                count += 1
-                if limit is not None and limit > 0 and count == limit:
-                    break
-
-        return uuids
-
-    """
-      Get the DB Collection names
-    """
-
-    def getDBCollections(self):
-        return ("{groups}", "{datasets}", "{datatypes}")
-
-    """
-        Return the db collection the uuid belongs to
-    """
-
-    def getDBCollection(self, obj_uuid):
-        dbCollections = self.getDBCollections()
-        for dbCollectionName in dbCollections:
-            col = self.dbGrp[dbCollectionName]
-            if obj_uuid in col or obj_uuid in col.attrs:
-                return col
-        return None
-
-    def unlinkObjectItem(self, parentGrp, tgtObj, link_name):
-        if self.readonly:
-            msg = "Unexpected attempt to unlink object"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if link_name not in parentGrp:
-            msg = "Unexpected: did not find link_name: [" + link_name + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        try:
-            linkObj = parentGrp.get(link_name, None, False, True)
-        except TypeError:
-            # user defined link?
-            msg = "Unable to remove link (user-defined link?)"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        linkClass = linkObj.__class__.__name__
-        # only deal with HardLinks
-        linkDeleted = False
-        if linkClass == "HardLink":
-            obj = parentGrp[link_name]
-            if tgtObj is None or obj == tgtObj:
-                numlinks = self.getNumLinksToObject(obj)
-                if numlinks == 1:
-                    # last link to this object - convert to anonymous object by
-                    # creating link under {datasets} or {groups} or {datatypes}
-                    # also remove the attribute UUID key
-                    addr = h5py.h5o.get_info(obj.id).addr
-                    obj_uuid = self.getUUIDByAddress(addr)
-                    self.log.info("converting: " + obj_uuid + " to anonymous obj")
-                    dbCol = self.getDBCollection(obj_uuid)
-                    del dbCol.attrs[obj_uuid]  # remove the object ref
-                    dbCol[obj_uuid] = obj  # add a hardlink
-                self.log.info(
-                    "deleting link: [" + link_name + "] from: " + parentGrp.name
-                )
-                del parentGrp[link_name]
-                linkDeleted = True
-        else:
-            self.log.info("unlinkObjectItem: link is not a hardlink, ignoring")
-        return linkDeleted
-
-    def unlinkObject(self, parentGrp, tgtObj):
-        for name in parentGrp:
-            self.unlinkObjectItem(parentGrp, tgtObj, name)
-        return True
-
-    def linkObject(self, parentUUID, childUUID, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        childObj = self.getDatasetObjByUuid(childUUID)
-        if childObj is None:
-            # maybe it's a group...
-            childObj = self.getGroupObjByUuid(childUUID)
-        if childObj is None:
-            # or maybe it's a committed datatype...
-            childObj = self.getCommittedTypeObjByUuid(childUUID)
-        if childObj is None:
-            msg = "Unable to link item, child UUID: " + childUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            self.unlinkObjectItem(parentObj, None, link_name)
-        parentObj[link_name] = childObj
-
-        # convert this from an anonymous object to ref if needed
-        dbCol = self.getDBCollection(childUUID)
-        if childUUID in dbCol:
-            # convert to a ref
-            del dbCol[childUUID]  # remove hardlink
-            dbCol.attrs[childUUID] = childObj.ref  # create a ref
-
-        # set link timestamps
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        return True
-
-    def createSoftLink(self, parentUUID, linkPath, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            del parentObj[link_name]  # delete old link
-        parentObj[link_name] = h5py.SoftLink(linkPath)
-
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-
-        return True
-
-    def createExternalLink(self, parentUUID, extPath, linkPath, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            del parentObj[link_name]  # delete old link
-        parentObj[link_name] = h5py.ExternalLink(extPath, linkPath)
-
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-
-        return True
-
-    def createGroup(self, obj_uuid=None):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create group (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        groups = self.dbGrp["{groups}"]
-        if not obj_uuid:
-            obj_uuid = createObjId()
-        newGroup = groups.create_group(obj_uuid)
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(newGroup.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-
-        # set timestamps
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-
-        return obj_uuid
-
-    def getNumberOfGroups(self):
-        self.initFile()
-        count = 0
-        groups = self.dbGrp["{groups}"]
-        count += len(groups)  # anonymous groups
-        count += len(groups.attrs)  # linked groups
-        count += 1  # add of for root group
-
-        return count
-
-    def getNumberOfDatasets(self):
-        self.initFile()
-        count = 0
-        datasets = self.dbGrp["{datasets}"]
-        count += len(datasets)  # anonymous datasets
-        count += len(datasets.attrs)  # linked datasets
-        return count
-
-    def getNumberOfDatatypes(self):
-        self.initFile()
-        count = 0
-        datatypes = self.dbGrp["{datatypes}"]
-        count += len(datatypes)  # anonymous datatypes
-        count += len(datatypes.attrs)  # linked datatypes
-        return count
+            kwds["fillvalue"] = fillvalue
+        if cpl:
+            kwds["cpl"] = cpl
+        dset_json = make_new_dset(shape=shape, dtype=dtype, **kwds)
+ 
+        dset_id = createObjId("datasets", root_id=self._root_id)   
+        self._db[dset_id] = dset_json 
+        return dset_id
+
+
+    def resizeDataset(self, dset_id, shape):
+        """
+        Resize existing Dataset
+        """
+        self.log.info(f"resizeDataset {dset_id}, {shape}")
+        
+        dset_json = self.getObjectById(dset_id)  # will throw exception if not found
+        resize_dataset(dset_json, shape)
+         
+
+    def deleteObject(self, obj_id):
+        """ Delete the given object """
+        self.log.info(f"deleteObject: {obj_id}")
+        if obj_id not in self._db:
+            raise KeyError(f"Object {obj_id} not found for deletion")
+        if obj_id == self._root_id:
+            raise KeyError("Root group cannot be deleted")
+        del self._db[obj_id]
+        # TBD: add to pending deleted items
+        
+    def getLinks(self, grp_id):
+        """ Get the links for the given group """
+        grp_json = self.getObjectById(grp_id)
+        if "links" not in grp_json:
+            raise KeyError(f"No links - {grp_id} not a group?")
+        return grp_json["links"]
+      
+    def getLink(self, grp_id, name):
+        """ Get the given link """
+        
+        links = self.getLinks(grp_id)
+        if name not in links:
+            raise KeyError(f"Link [{name}] not found in {grp_id}")
+        return links[name]
+    
+    def createHardLink(self, grp_id, name, tgt_id):
+        """ Create a new hardlink """
+        links = self.getLinks(grp_id)
+        if name in links:
+            self.deleteLink(grp_id, name)
+        link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id}
+        link_json["created"] = time.time()
+        links[name] = link_json
+
+    def createSoftLink(self, grp_id, name, h5path):
+        """ Create a soft link """
+        links = self.getLinks(grp_id)
+        if name in links:
+            self.deleteLink(grp_id, name)
+        link_json = {"class": "H5L_TYPE_SOFT", "h5path": h5path}
+        link_json["created"] = time.time()
+        links[name] = link_json
+
+    def createCustomLink(self, grp_id, name, link_json):
+        """ create a custom link """
+        links = self.getLinks(grp_id)
+        if name in links:
+            self.deleteLink(grp_id, name)
+        if link_json.get("class") != "H5L_TYPE_USER_DEFINED":
+            link_json["class"] = "H5L_TYPE_USER_DEFINED"
+        link_json["created"] = time.time()
+        links[name] = link_json
+
+
+    def createExternalLink(self, grp_id, name, h5path, filepath):
+        """ Create a external link link """
+        links = self.getLinks(grp_id)
+        if name in links:
+            self.deleteLink(grp_id, name)
+        link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath}
+        link_json["created"] = time.time()
+        links[name] = link_json
+ 
+    def deleteLink(self, grp_id, name):
+        """ Delete the given link """
+        grp_json = self.getObjectById(grp_id)
+        if "links" not in grp_json:
+            raise KeyError(f"No links - {grp_id} not a group?")
+        links = self.getLinks(grp_id)
+        if name not in links:
+            raise KeyError(f"Link [{name}] not found in {grp_id}")
+        del links[name]
+        grp_json["modified"] = time.time()
+ 
+
+    def createGroup(self, cpl=None):
+        """ Create a new group """
+
+        grp_id = createObjId("groups", root_id=self._root_id)
+        group_json = {"attributes": {}, "links": {}}
+        if cpl:
+            group_json["cpl"] = cpl
+        else:
+            group_json["cpl"] = {}
+        group_json["created"] = time.time
+        group_json["modified"] = None
+        self._db[grp_id] = group_json
+        return grp_id
+
+
+    def __len__(self):
+        # return the number of objects
+        return len(self._db)
+
+
+    def __iter__(self):
+        """ Iterate over object ids """
+
+        for obj_id in self._db:
+            yield obj_id
+
+
+    def __contains__(self, obj_id):
+        """ Test if a obj id  exists """
+        return obj_id in self._db
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index 9c565ce0..be1ffd62 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -14,6 +14,9 @@
 import numpy as np
 
 
+numpy_integer_types = (np.int8, np.uint8, np.int16, np.int16, np.int32, np.uint32, np.int64, np.uint64)
+numpy_float_types = (np.float16, np.float32, np.float64)
+
 class Reference:
     """
     Represents an HDF5 object reference
@@ -148,6 +151,58 @@ def special_dtype(**kwds):
     raise TypeError(f'Unknown special type "{name}"')
 
 
+
+def find_item_type(data):
+    """Find the item type of a simple object or collection of objects.
+
+    E.g. [[['a']]] -> str
+
+    The focus is on collections where all items have the same type; we'll return
+    None if that's not the case.
+
+    The aim is to treat numpy arrays of Python objects like normal Python
+    collections, while treating arrays with specific dtypes differently.
+    We're also only interested in array-like collections - lists and tuples,
+    possibly nested - not things like sets or dicts.
+    """
+    if isinstance(data, np.ndarray):
+        if (
+            data.dtype.kind == 'O' and not check_dtype(vlen=data.dtype)
+        ):
+            item_types = {type(e) for e in data.flat}
+        else:
+            return None
+    elif isinstance(data, (list, tuple)):
+        item_types = {find_item_type(e) for e in data}
+    else:
+        return type(data)
+
+    if len(item_types) != 1:
+        return None
+    return item_types.pop()
+
+def guess_dtype(data):
+    """ Attempt to guess an appropriate dtype for the object, returning None
+    if nothing is appropriate (or if it should be left up the the array
+    constructor to figure out)
+    """
+
+    # todo - handle RegionReference, Reference
+    item_type = find_item_type(data)
+    if item_type is bytes:
+        return special_dtype(vlen=bytes)
+    if item_type is str:
+        return special_dtype(vlen=str)
+
+    return None
+
+def is_float16_dtype(dt):
+    if dt is None:
+        return False
+
+    dt = np.dtype(dt)  # normalize strings -> np.dtype objects
+    return dt.kind == 'f' and dt.itemsize == 2
+
 def check_dtype(**kwds):
     """Check a dtype for h5py special type "hint" information.  Only one
     keyword may be given.
@@ -222,7 +277,7 @@ def getTypeResponse(typeItem):
         for k in typeItem.keys():
             if k == "base":
                 if isinstance(typeItem[k], dict):
-                    response[k] = getTypeResponse(typeItem[k])  # recurse call
+                    response[k] = getTypeResponse(typeItem[k])  # recursive call
                 else:
                     response[k] = typeItem[k]  # predefined type
             elif k not in ("size", "base_size"):
@@ -251,6 +306,9 @@ def getTypeItem(dt, metadata=None):
         "float32": "H5T_IEEE_F32",
         "float64": "H5T_IEEE_F64",
     }
+    
+    dt = np.dtype(dt)  # convert 'int32', np.int32, etc. to a dtype
+
     if not metadata and dt.metadata:
         metadata = dt.metadata
 
@@ -421,6 +479,23 @@ def getTypeItem(dt, metadata=None):
     return type_info
 
 
+def isVlen(dt):
+    """
+    Return True if the type contains variable length elements
+    """
+    is_vlen = False
+    if len(dt) > 1:
+        names = dt.names
+        for name in names:
+            if isVlen(dt[name]):
+                is_vlen = True
+                break
+    else:
+        if dt.metadata and "vlen" in dt.metadata:
+            is_vlen = True
+    return is_vlen
+
+
 def getItemSize(typeItem):
     """
     Get size of an item in bytes.
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index 598790e0..8c62a752 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -84,6 +84,8 @@ def isSchema2Id(id):
     """return true if this is a v2 id"""
     # v1 ids are in the standard UUID format: 8-4-4-4-12
     # v2 ids are in the non-standard: 8-8-4-6-6
+    if not isValidUuid(id):
+        return False
     parts = id.split("-")
     if len(parts) != 6:
         raise ValueError(f"Unexpected id formation for uuid: {id}")
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
new file mode 100644
index 00000000..d37c7f5f
--- /dev/null
+++ b/test/unit/array_util_test.py
@@ -0,0 +1,1021 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import json
+import numpy as np
+
+import base64
+
+from h5json.array_util import bytesArrayToList
+from h5json.array_util import toTuple
+from h5json.array_util import getNumElements
+from h5json.array_util import jsonToArray
+from h5json.array_util import arrayToBytes
+from h5json.array_util import bytesToArray
+from h5json.array_util import getByteArraySize
+from h5json.array_util import IndexIterator
+from h5json.array_util import ndarray_compare
+from h5json.array_util import getNumpyValue
+from h5json.array_util import getBroadcastShape
+
+from h5json.hdf5dtype import special_dtype
+from h5json.hdf5dtype import check_dtype
+from h5json.hdf5dtype import createDataType
+
+
+class ArrayUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(ArrayUtilTest, self).__init__(*args, **kwargs)
+        # main
+
+    def testByteArrayToList(self):
+        data_items = (
+            42,
+            "foo",
+            b"foo",
+            [1, 2, 3],
+            (1, 2, 3),
+            ["A", "B", "C"],
+            [b"A", b"B", b"C"],
+            [["A", "B"], [b"a", b"b", b"c"]],
+        )
+        for data in data_items:
+            json_data = bytesArrayToList(data)
+            # will throw TypeError if not able to convert
+            json.dumps(json_data)
+
+    def testToTuple(self):
+        data0d = 42  # scalar
+        data1d1 = [1]  # one dimensional, one element list
+        data1d = [1, 2, 3, 4, 5]  # list
+        data2d1 = [
+            [1, 2],
+        ]  # two dimensional, one element
+        data2d = [[1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]]  # list of two-element lists
+        data3d = [[[0, 0.0], [1, 0.1]], [[2, 0.2], [3, 0.3]]]  # list of list of lists
+        out = toTuple(0, data0d)
+        self.assertEqual(data0d, out)
+        out = toTuple(1, data1d1)
+        self.assertEqual(data1d1, out)
+        out = toTuple(1, data1d)
+        self.assertEqual(data1d, out)
+        out = toTuple(2, data2d)
+        self.assertEqual(data2d, out)
+        out = toTuple(1, data2d1)
+        self.assertEqual([(1, 2)], out)
+        out = toTuple(3, data3d)
+        self.assertEqual(data3d, out)
+        out = toTuple(1, data2d)  # treat input as 1d array of two-field compound types
+        self.assertEqual([(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)], out)
+        out = toTuple(2, data3d)  # treat input as 2d array of two-field compound types
+        self.assertEqual([[(0, 0.0), (1, 0.1)], [(2, 0.2), (3, 0.3)]], out)
+        out = toTuple(1, data3d)  # treat input a 1d array of compound type of compound types
+        self.assertEqual([((0, 0.0), (1, 0.1)), ((2, 0.2), (3, 0.3))], out)
+
+    def testGetNumElements(self):
+        shape = (4,)
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 4)
+
+        shape = [10,]
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 10)
+
+        shape = (10, 8)
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 80)
+
+    def testJsonToArray(self):
+        dt = np.dtype("i4")
+        shape = [4, ]
+        data = [0, 2, 4, 6]
+        out = jsonToArray(shape, dt, data)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (4,))
+        for i in range(4):
+            self.assertEqual(out[i], i * 2)
+
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [2, ]
+        data = [[4, "four"], [5, "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        self.assertEqual(out.shape, (2,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (4, b"four"))
+        self.assertTrue(isinstance(out[1], np.void))
+        e1 = out[1].tolist()
+        self.assertEqual(e1, (5, b"five"))
+
+        shape = [1, ]
+        data = [
+            [6, "six"],
+        ]
+        out = jsonToArray(shape, dt, data)
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (6, b"six"))
+
+        data = [6, "six"]
+        out = jsonToArray(shape, dt, data)
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (6, b"six"))
+
+        # test ascii chars >127
+        dt = np.dtype("S26")
+        data = "extended ascii char 241: " + chr(241)
+        out = jsonToArray(shape, dt, data)
+        self.assertEqual(out[0], b'extended ascii char 241: \xc3')
+
+        dt = np.dtype("S12")
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertEqual(out[0], b'eight: \xe5\x85\xab')
+
+        # VLEN ascii
+        dt = special_dtype(vlen=bytes)
+        data = [b"one", b"two", b"three", b"four", b"five"]
+        shape = [5, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], bytes)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out.shape, (5,))
+        # TBD: code does not actually enforce use of bytes vs. str,
+        #  probably not worth the effort to fix
+        self.assertEqual(out[2], b"three")
+        self.assertEqual(out[3], b"four")
+
+        # VLEN str
+        dt = special_dtype(vlen=str)
+        data = [
+            [b"part 1 - section A", b"part 1 - section B"],
+            [b"part 2 - section A", b"part 2 - section B"],
+        ]
+        shape = [2,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], str)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out.shape, (2,))
+        self.assertEqual(out[0], tuple(data[0]))
+        self.assertEqual(out[1], tuple(data[1]))
+
+        # VLEN Scalar str
+        dt = special_dtype(vlen=str)
+        data = "I'm a string!"
+        shape = [1, ]
+        out = jsonToArray(shape, dt, data)
+
+        # VLEN unicode
+        dt = special_dtype(vlen=bytes)
+        data = ["one", "two", "three", "four", "five"]
+        shape = [5, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], bytes)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out[2], b"three")
+
+        # VLEN data
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [4, ]
+        data = [
+            [1,],
+            [1, 2],
+            [1, 2, 3],
+            [1, 2, 3, 4],
+        ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+
+        self.assertEqual(out.shape, (4,))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+        for i in range(4):
+            e = out[i]  # .tolist()
+            self.assertTrue(isinstance(e, tuple))
+            self.assertEqual(e, tuple(range(1, i + 2)))
+
+        # VLEN 2D data
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [2, 2]
+        data = [
+            [
+                [0,],
+                [1, 2],
+            ],
+            [
+                [1,],
+                [2, 3],
+            ],
+        ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+
+        self.assertEqual(out.shape, (2, 2))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+        for i in range(2):
+            for j in range(2):
+                e = out[i, j]  # .tolist()
+                self.assertTrue(isinstance(e, tuple))
+
+        # create VLEN of obj ref's
+        ref_type = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
+        vlen_type = {"class": "H5T_VLEN", "base": ref_type}
+        dt = createDataType(vlen_type)  # np datatype
+
+        id0 = b"g-a4f455b2-c8cf-11e7-8b73-0242ac110009"
+        id1 = b"g-a50af844-c8cf-11e7-8b73-0242ac110009"
+        id2 = b"g-a5236276-c8cf-11e7-8b73-0242ac110009"
+
+        data = [
+            [id0, ],
+            [id0, id1],
+            [id0, id1, id2],
+        ]
+        shape = [3, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        base_type = check_dtype(vlen=out.dtype)
+        self.assertEqual(base_type.kind, "S")
+        self.assertEqual(base_type.itemsize, 48)
+
+        self.assertEqual(out.shape, (3,))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("S48"))
+
+        e = out[0]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0,))
+        e = out[1]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0, id1))
+        e = out[2]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0, id1, id2))
+
+        # compound type with array field
+        dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])
+        shape = [2, ]
+        data = [[[4, 8, 12], "four"], [[5, 10, 15], "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        self.assertEqual(out.shape, (2,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e0 = out[0]
+        self.assertEqual(len(e0), 2)
+        e0a = e0[0]
+        self.assertTrue(isinstance(e0a, np.ndarray))
+        self.assertEqual(e0a[0], 4)
+        self.assertEqual(e0a[1], 8)
+        self.assertEqual(e0a[2], 12)
+        e0b = e0[1]
+        self.assertEqual(e0b, b"four")
+        self.assertTrue(isinstance(out[1], np.void))
+        e1 = out[1]
+        self.assertEqual(len(e1), 2)
+        e1a = e1[0]
+        self.assertTrue(isinstance(e1a, np.ndarray))
+        self.assertEqual(e1a[0], 5)
+        self.assertEqual(e1a[1], 10)
+        self.assertEqual(e1a[2], 15)
+        e1b = e1[1]
+        self.assertEqual(e1b, b"five")
+
+    def testToBytes(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert buffer back to arr
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(np.array_equal(arr, arr_copy))
+
+        # fixed length string
+        dt = np.dtype("S8")
+        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (3,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # fixed length UTF8 string
+        dt = np.dtype("S10")
+        arr = np.asarray(b'eight: \xe5\x85\xab', dtype=dt)
+        buffer = arrayToBytes(arr)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, ())
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # invalid UTF string
+        dt = np.dtype("S2")
+        arr = np.asarray(b'\xff\xfe', dtype=dt)
+        buffer = arrayToBytes(arr)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, ())
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # invalid UTF string with base64 encoding
+        dt = np.dtype("S2")
+        arr = np.asarray(b'\xff\xfe', dtype=dt)
+        buffer = b'//4='  # this is the base64 encoding of b'\xff\xfe'
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # Compound non-vlen
+        dt = np.dtype([("x", "f8"), ("y", "i4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (3.12, 42)
+        arr[3] = (1.28, 69)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of int32's
+        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = np.int32([1, ])
+        arr[1] = np.int32([1, 2])
+        arr[2] = 0  # test un-intialized value
+        arr[3] = np.int32([1, 2, 3])
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 40)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of strings
+        dt = np.dtype("O", metadata={"vlen": str})
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = "one: \u4e00"
+        arr[1] = "two: \u4e8c"
+        arr[2] = "three: \u4e09"
+        arr[3] = "four: \u56db"
+        arr[4] = 0
+        buffer = arrayToBytes(arr)
+
+        expected_length = 55
+        expected = bytearray(expected_length)
+        expected[0:4] = b"\x08\x00\x00\x00"
+        expected[4:16] = b"one: \xe4\xb8\x80\x08\x00\x00\x00"
+        expected[16:28] = b"two: \xe4\xba\x8c\n\x00\x00\x00"
+        expected[28:42] = b"three: \xe4\xb8\x89\t\x00\x00\x00"
+        expected[42:55] = b"four: \xe5\x9b\x9b\x00\x00\x00\x00"
+
+        self.assertEqual(len(buffer), expected_length)
+
+        self.assertEqual(buffer, expected)
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,))
+
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+        # VLEN of bytes
+        dt = np.dtype("O", metadata={"vlen": bytes})
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = b"Parting"
+        arr[1] = b"is such"
+        arr[2] = b"sweet"
+        arr[3] = b"sorrow"
+        arr[4] = 0
+
+        buffer = arrayToBytes(arr)
+
+        expected = bytearray(45)
+        expected[0:11] = b"\x07\x00\x00\x00Parting"
+        expected[11:22] = b"\x07\x00\x00\x00is such"
+        expected[22:31] = b"\x05\x00\x00\x00sweet"
+        expected[31:41] = b"\x06\x00\x00\x00sorrow"
+        expected[41:45] = b"\x00\x00\x00\x00"
+
+        self.assertEqual(len(buffer), len(expected))
+        self.assertEqual(buffer, expected)  # same serialization as with str
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound str vlen
+        #
+        dt_vstr = np.dtype("O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, "Hello", "X1")
+        arr[3] = (84, "Bye", "XYZ")
+        count = getByteArraySize(arr)
+        buffer = arrayToBytes(arr)
+
+        self.assertEqual(len(buffer), 56)
+        self.assertEqual(buffer.find(b"Hello"), 8)
+        self.assertEqual(buffer.find(b"Bye"), 49)
+        self.assertEqual(buffer.find(b"X1"), 13)
+        self.assertEqual(buffer.find(b"XYZ"), 52)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound int vlen
+        #
+        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, np.array((), dtype="int32"))
+        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
+        count = getByteArraySize(arr)
+        self.assertEqual(count, 44)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 44)
+        buffer_expected = {0: 42, 24: 84, 28: 12, 32: 1, 36: 2, 40: 3}
+        for i in range(44):
+            if i in buffer_expected:
+                self.assertEqual(buffer[i], buffer_expected[i])
+            else:
+                self.assertEqual(buffer[i], 0)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # VLEN utf string with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+        dt_str = np.dtype("O", metadata={"vlen": str})
+        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # VLEN ascii with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+        dt_str = np.dtype("O", metadata={"vlen": bytes})
+        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        # convert back to array
+
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+    def testArrToBytesBase64(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr, encoding="base64")
+        # should be a bit longer than the byte representation...
+        expected_num_bytes = np.prod(arr.shape) * dt.itemsize
+        self.assertTrue(len(buffer) > expected_num_bytes)
+
+        # convert buffer back to arr
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(np.array_equal(arr, arr_copy))
+
+        # fixed length string
+        dt = np.dtype("S8")
+        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (3,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # Compound non-vlen
+        dt = np.dtype([("x", "f8"), ("y", "i4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (3.12, 42)
+        arr[3] = (1.28, 69)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of int32's
+        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = np.int32([1, ])
+        arr[1] = np.int32([1, 2])
+        arr[2] = 0  # test un-intialized value
+        arr[3] = np.int32([1, 2, 3])
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of strings
+        dt = np.dtype("O", metadata={"vlen": str})
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = "one: \u4e00"
+        arr[1] = "two: \u4e8c"
+        arr[2] = "three: \u4e09"
+        arr[3] = "four: \u56db"
+        arr[4] = 0
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+        # VLEN of bytes
+        dt = np.dtype("O", metadata={"vlen": bytes})
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = b"Parting"
+        arr[1] = b"is such"
+        arr[2] = b"sweet"
+        arr[3] = b"sorrow"
+        arr[4] = 0
+
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound str vlen
+        #
+        dt_vstr = np.dtype("O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, "Hello", "X1")
+        arr[3] = (84, "Bye", "XYZ")
+        count = getByteArraySize(arr)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound int vlen
+        #
+        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, np.array((), dtype="int32"))
+        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
+        count = getByteArraySize(arr)
+        self.assertEqual(count, 44)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # VLEN utf string with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+        dt_str = np.dtype("O", metadata={"vlen": str})
+        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # VLEN ascii with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+        dt_str = np.dtype("O", metadata={"vlen": bytes})
+        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+    def testArrayCompareInt(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr1 = np.zeros((1024, 1024), dtype=dt)
+        arr2 = np.zeros((1024, 1024), dtype=dt)
+        for _ in range(100):
+            self.assertTrue(ndarray_compare(arr1, arr2))
+        arr1[123, 456] = 42
+        self.assertFalse(ndarray_compare(arr1, arr2))
+
+    def testArrayCompareVlenInt(self):
+        # Vlen array
+        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr1 = np.zeros((1024, 1024), dtype=dt)
+        arr2 = np.zeros((1024, 1024), dtype=dt)
+        e1 = (42, np.array((), dtype="int32"))
+        e2 = (84, np.array((1, 2, 3), dtype="int32"))
+        arr1[123, 456] = e1
+        arr2[123, 456] = e1
+        arr1[888, 999] = e2
+        arr2[888, 999] = e2
+
+        # performance is marginal for this case
+        for _ in range(1):
+            self.assertTrue(ndarray_compare(arr1, arr2))
+        arr2[123, 456] = e2
+        self.assertFalse(ndarray_compare(arr1, arr2))
+
+    def testJsonToBytes(self):
+        #
+        # VLEN int
+        #
+
+        def array_equal(a, b):
+            """ compare two values element by element."""
+            if type(a) in (list, tuple, np.void, np.ndarray):
+                if len(a) != len(b):
+                    print("number of elements doesn't match")
+                    return False
+                nelements = len(a)
+                for i in range(nelements):
+                    if not array_equal(a[i], b[i]):
+                        return False
+            else:
+                # treat a string and bytes as equal if the utf-8 encoding
+                # of the string is equal to the byte encoding
+                if isinstance(a, str):
+                    a = a.encode("utf8")
+                if isinstance(b, str):
+                    b = b.encode("utf8")
+                if a != b:
+                    print(f"{a} != {b}")
+                    return False
+
+            return True
+
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [4,]
+        data = [
+            [1,],
+            [1, 2],
+            [1, 2, 3],
+            [1, 2, 3, 4],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(check_dtype(vlen=arr.dtype), np.dtype("int32"))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 56)
+
+        expected = bytearray(48)
+        expected[0:8] = b"\x04\x00\x00\x00\x01\x00\x00\x00"
+        expected[8:16] = b"\x08\x00\x00\x00\x01\x00\x00\x00"
+        expected[16:24] = b"\x02\x00\x00\x00\x0c\x00\x00\x00"
+        expected[24:32] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
+        expected[32:40] = b"\x03\x00\x00\x00\x10\x00\x00\x00"
+        expected[40:48] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
+        expected[48:56] = b"\x03\x00\x00\x00\x04\x00\x00\x00"
+        self.assertEqual(buffer, expected)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, shape)
+        # np.array_equal doesn't work for object arrays
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # Compound vlen
+        #
+        dt_str = np.dtype("O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_str)])
+        shape = [4, ]
+        data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 40)
+
+        expected = bytearray(40)
+        expected[0:8] = b"*\x00\x00\x00\x05\x00\x00\x00"
+        expected[8:19] = b"Hello\x00\x00\x00\x00\x00\x00"
+        expected[19:26] = b"\x00\x00\x00\x00\x00\x00\x00"
+        expected[26:40] = b"\x00\x00\x00T\x00\x00\x00\x03\x00\x00\x00Bye"
+
+        self.assertEqual(buffer, expected)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        # np.array_equal doesn't work for object arrays
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(arr, arr_copy))
+
+        #
+        # VLEN utf with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        shape = [4,]
+        data = [
+            [42, ["hi", "bye"]],
+            [0, [0, 0]],
+            [0, [0, 0]],
+            [84, ["hi-hi", "bye-bye"]],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        arr_copy = bytesToArray(buffer, dt, shape)
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(e, e_copy))
+
+        #
+        # VLEN ascii with array type
+        #
+        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        shape = [4,]
+        data = [
+            [42, [b"hi", b"bye"]],
+            [0, [0, 0]],
+            [0, [0, 0]],
+            [84, [b"hi-hi", b"bye-bye"]],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        arr_copy = bytesToArray(buffer, dt, shape)
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(e, e_copy))
+
+    def testIndexIterator(self):
+        i = 0
+        for index in IndexIterator((10,)):
+            self.assertEqual(index, (i,))
+            i += 1
+        self.assertEqual(i, 10)
+        i = 0
+        for index in IndexIterator((10,), sel=slice(0, 10, 2)):
+            self.assertEqual(index, (i,))
+
+            i += 2
+        self.assertEqual(i, 10)
+        i = 2
+        for index in IndexIterator((10, ), sel=slice(2, 8)):
+            self.assertEqual(index, (i,))
+            i += 1
+        self.assertEqual(i, 8)
+        cnt = 0
+        for index in IndexIterator((4, 5)):
+            cnt += 1
+        self.assertEqual(cnt, 20)
+        cnt = 0
+        for index in IndexIterator((8, 10), sel=(slice(0, 8, 2), slice(0, 10, 2))):
+            cnt += 1
+        self.assertEqual(cnt, 20)
+
+    def testGetNumpyValue(self):
+        # test int conversion
+        dt = np.dtype("<i4")
+        val = getNumpyValue(42, dt=dt)
+        self.assertTrue(isinstance(val, np.int32))
+        self.assertEqual(42, val)
+
+        # test fixed length string conversion
+        dt = np.dtype("S5")
+        val = getNumpyValue("hello", dt=dt)
+        self.assertTrue(isinstance(val, np.bytes_))
+        self.assertEqual(val, b"hello")
+
+        # test variable length string conversion
+        dt = np.dtype("O", metadata={"vlen": bytes})
+        val = getNumpyValue("hello", dt=dt)
+        self.assertTrue(isinstance(val, str))
+        self.assertEqual(val, "hello")
+
+        # test compound type
+        dt = np.dtype([('int', "<i4"), ('str', "S4")])
+        val = getNumpyValue((42, "hdf5"), dt=dt)
+        self.assertTrue(isinstance(val, np.void))
+        self.assertEqual(val[0], 42)
+        self.assertEqual(val[1], b'hdf5')
+
+        # test array of ints
+        dt = np.dtype("<i4")
+        arr = np.array([0, 1], dtype=dt)
+        dt = np.dtype(("<i4", (len(arr),)))
+        val = getNumpyValue(arr, dt=dt)
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.int32))
+
+        # test array of floats
+        dt = np.dtype("f4")
+        arr = np.array([0.001, 1.001], dtype=dt)
+        val = getNumpyValue(arr, dt=np.dtype(("f4", (len(arr),))))
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.float32))
+
+        # test array of fixed-length strings
+        dt = np.dtype("S5")
+        arr = np.array([b'hello', b'world'], dtype=dt)
+        val = getNumpyValue(arr, dt=np.dtype(("S5", (len(arr),))))
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.bytes_))
+
+        # test nan string
+        dt = np.dtype("f4")
+        val = getNumpyValue("nan", dt=dt)
+        self.assertTrue(isinstance(val, np.float32))
+        self.assertTrue(val != val)
+
+    def testGetNumpyValueBase64Encoded(self):
+        # Set up value, numpy dtype, and expected type after decoding
+        value_info = []
+        value_info.append([42, np.dtype("<i4"), np.int32])  # int
+        value_info.append([1.001, np.dtype("f4"), np.float32])  # float
+        value_info.append([b"hello", np.dtype("S5"), np.bytes_])  # fixed-length string
+        value_info.append([(42, b'hdf5'),
+                           np.dtype([('int', "<i4"), ('str', "S4")]), np.void])  # compound type
+        np_values = []
+
+        for vi in value_info:
+            np_values.append(np.array(vi[0], dtype=vi[1]))
+
+        for i in range(len(np_values)):
+            numpy_dtype_out = value_info[i][2]
+
+            # Turn numpy array to bytes object which can be encoded
+            encoded_val = np_values[i].tobytes()
+            # Encode numpy bytes object
+            encoded_val = base64.b64encode(encoded_val)
+            # Decode from bytes object to regular string containing a base64 encoded numpy array
+            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
+            encoded_val = encoded_val.decode()
+            decoded_val = getNumpyValue(encoded_val, dt=np_values[i].dtype, encoding="base64")
+            self.assertTrue(isinstance(decoded_val, numpy_dtype_out))
+            self.assertEqual(decoded_val, np_values[i])
+
+        # test array types
+
+        # Set up value, numpy dtype, and expected type after decoding
+        value_info = []
+        value_info.append([np.array([0, 1], dtype=np.dtype("<i4")),
+                           np.dtype(("<i4", (2,))), np.int32])  # int array
+        value_info.append([np.array([0.001, 1.001], dtype=np.dtype("f4")),
+                           np.dtype(("f4", (2,))), np.float32])  # float array
+        value_info.append([np.array([b'hello', b'world'], dtype=np.dtype("S5")),
+                           np.dtype(("S5", (2,))), np.bytes_])  # fixed length string array
+
+        for i in range(len(value_info)):
+            this_array = value_info[i][0]
+            array_dtype = value_info[i][1]
+            array_dtype_out = value_info[i][2]
+
+            # Turn numpy array to bytes object which can be encoded
+            encoded_val = this_array.tobytes()
+            # Encode numpy bytes object
+            encoded_val = base64.b64encode(encoded_val)
+            # Decode from bytes object to regular string containing a base64 encoded numpy array
+            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
+            encoded_val = encoded_val.decode()
+            decoded_val = getNumpyValue(encoded_val, dt=array_dtype, encoding="base64")
+
+            self.assertTrue(np.array_equal(decoded_val, this_array))
+            self.assertTrue(isinstance(decoded_val[0], array_dtype_out))
+
+        # test invalid base64 length
+        try:
+            dt = np.dtype("<i8")
+            getNumpyValue("KgAAAA==", dt=dt, encoding="base64")
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+    def testJsonToArrayOnNoneArray(self):
+        data_dtype = np.dtype("i4")
+        data_shape = [0, ]
+        data_json = [None]
+        arr = None
+
+        try:
+            arr = jsonToArray(data_shape, data_dtype, data_json)
+        except Exception as e:
+            print(f"Exception while testing jsonToArray on array with None elements: {e}")
+
+        self.assertTrue(len(arr) == 0)
+        self.assertTrue(arr.dtype == data_dtype)
+
+    def testGetBroadcastShape(self):
+        bcshape = getBroadcastShape([1, ], 1)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 6)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 5)
+        self.assertEqual(bcshape, None)
+
+        bcshape = getBroadcastShape([4, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([4, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+
+        bcshape = getBroadcastShape([2, 3, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([2, 3, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+        bcshape = getBroadcastShape([2, 3, 5], 15)
+        self.assertEqual(bcshape, [3, 5])
+
+    def testJsonToArrayOnNoneCompoundArray(self):
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [1,]
+        data = None
+
+        arr = jsonToArray(shape, dt, data)
+
+        self.assertEqual(len(arr), 0)
+        self.assertEqual(arr.dtype, dt)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 9ac6578d..bee33014 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -10,43 +10,17 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import os
 import time
-import errno
-import os.path as op
-import stat
 import logging
-import shutil
+import numpy as np
 from h5json import Hdf5db
+from h5json.objid import isRootObjId, isValidUuid, isSchema2Id
+from h5json.hdf5dtype import special_dtype, Reference
 
 
 UUID_LEN = 36  # length for uuid strings
 
 
-def getFile(name, tgt, ro=False):
-    src = "data/hdf5/" + name
-    logging.info("copying file to this directory: " + src)
-
-    filepath = "./out/" + tgt
-
-    if op.isfile(filepath):
-        # make sure it's writable, before we copy over it
-        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
-    shutil.copyfile(src, filepath)
-    if ro:
-        logging.info("make read-only")
-        os.chmod(filepath, stat.S_IREAD)
-    return filepath
-
-
-def removeFile(name):
-    try:
-        os.stat(name)
-    except OSError:
-        return
-        # file does not exist
-    os.remove(name)
-
 
 class Hdf5dbTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
@@ -59,7 +33,7 @@ def __init__(self, *args, **kwargs):
         else:
             lhStdout = None
 
-        self.log.setLevel(logging.INFO)
+        self.log.setLevel(logging.DEBUG)
         # create logger
 
         handler = logging.FileHandler("./hdf5dbtest.log")
@@ -71,777 +45,234 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-        # create directory for test output files
-        if not os.path.exists("./out"):
-            os.makedirs("./out")
-
-    def testInvalidPath(self):
-        filepath = "/tmp/thisisnotafile.h5"
-        try:
-            with Hdf5db(filepath, app_logger=self.log) as db:
-                self.log.error(f"Unexpected Hdf5db ref: {db}")
-                self.assertTrue(False)  # shouldn't get here
-        except IOError as e:
-            self.assertEqual(e.errno, errno.ENXIO)
-            self.assertEqual(e.strerror, "file not found")
-
-    def testInvalidFile(self):
-        filepath = getFile("notahdf5file.h5", "notahdf5file.h5")
-        try:
-            with Hdf5db(filepath, app_logger=self.log) as db:
-                self.log.error(f"Unexpected Hdf5db ref: {db}")
-                self.assertTrue(False)  # shouldn't get here
-        except IOError as e:
-            self.assertEqual(e.errno, errno.EINVAL)
-            self.assertEqual(e.strerror, "not an HDF5 file")
-
-    def testGetUUIDByPath(self):
-        # get test file
-        g1Uuid = None
-        filepath = getFile("tall.h5", "getuuidbypath.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1Uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1Uuid), UUID_LEN)
-            obj = db.getObjByPath("/g1")
-            self.assertEqual(obj.name, "/g1")
-            for name in obj:
-                g = obj[name]
-                self.log.debug(f"got obj: {g}")
-            g1links = db.getLinkItems(g1Uuid)
-            self.assertEqual(len(g1links), 2)
-            for item in g1links:
-                self.assertEqual(len(item["id"]), UUID_LEN)
-
-        # end of with will close file
-        # open again and verify we can get obj by name
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            obj = db.getGroupObjByUuid(g1Uuid)
-            g1 = db.getObjByPath("/g1")
-            self.assertEqual(obj, g1)
-
-    def testGetCounts(self):
-        filepath = getFile("tall.h5", "testgetcounts_tall.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            cnt = db.getNumberOfGroups()
-            self.assertEqual(cnt, 6)
-            cnt = db.getNumberOfDatasets()
-            self.assertEqual(cnt, 4)
-            cnt = db.getNumberOfDatatypes()
-            self.assertEqual(cnt, 0)
-
-        filepath = getFile("empty.h5", "testgetcounts_empty.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            cnt = db.getNumberOfGroups()
-            self.assertEqual(cnt, 1)
-            cnt = db.getNumberOfDatasets()
-            self.assertEqual(cnt, 0)
-            cnt = db.getNumberOfDatatypes()
-            self.assertEqual(cnt, 0)
-
-    def testGroupOperations(self):
-        # get test file
-        filepath = getFile("tall.h5", "tall_del_g11.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootuuid = db.getUUIDByPath("/")
-            root = db.getGroupObjByUuid(rootuuid)
-            self.assertEqual("/", root.name)
-            rootLinks = db.getLinkItems(rootuuid)
-            self.assertEqual(len(rootLinks), 2)
-            g1uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1uuid), UUID_LEN)
-            g1Links = db.getLinkItems(g1uuid)
-            self.assertEqual(len(g1Links), 2)
-            g11uuid = db.getUUIDByPath("/g1/g1.1")
-            db.deleteObjectByUuid("group", g11uuid)
 
-    def testCreateGroup(self):
-        # get test file
-        filepath = getFile("tall.h5", "tall_newgrp.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 2)
-            newGrpUuid = db.createGroup()
-            newGrp = db.getGroupObjByUuid(newGrpUuid)
-            self.assertNotEqual(newGrp, None)
-            db.linkObject(rootUuid, newGrpUuid, "g3")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 3)
-            # verify linkObject can be called idempotent-ly
-            db.linkObject(rootUuid, newGrpUuid, "g3")
+    def testGroup(self):
+    
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            self.assertTrue(isSchema2Id(root_id))
+            self.assertTrue(isRootObjId(root_id))
+
+            g1_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g1_id))
+            self.assertFalse(isRootObjId(g1_id))
+            self.assertTrue(isValidUuid(g1_id, obj_class="groups"))
+            db.createHardLink(root_id, "g1", g1_id)
+
+            g2_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g2_id))
+            self.assertFalse(isRootObjId(g2_id))
+            self.assertTrue(isValidUuid(g2_id, obj_class="groups"))
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g1_1_id))
+            self.assertFalse(isRootObjId(g1_1_id))
+            self.assertTrue(isValidUuid(g1_1_id, obj_class="groups"))
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+
+            self.assertEqual(db.getObjectIdByPath("g1"), g1_id)
+            self.assertEqual(db.getObjectIdByPath("/g1"), g1_id)
+            self.assertEqual(db.getObjectIdByPath("g1/"), g1_id)
+
+            self.assertEqual(db.getObjectIdByPath("g1/g1.1"), g1_1_id)
+            self.assertEqual(db.getObjectIdByPath("/g1/g1.1"), g1_1_id)
+            self.assertEqual(db.getObjectIdByPath("g1/g1.1/"), g1_1_id)
+
+            grp1_json = db.getObjectById(g1_id)
+            self.assertTrue("links" in grp1_json)
+            g1_links = grp1_json["links"]
+            self.assertTrue("g1.1" in g1_links)
+            g1_1_link = db.getLink(g1_id, "g1.1")
+            self.assertEqual(g1_1_link["class"], "H5L_TYPE_HARD")
+            self.assertEqual(g1_1_link["id"], g1_1_id)
+            self.assertTrue(g1_1_link["created"] > time.time() - 1.0)
+
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            soft_link = db.getLink(g2_id, "slink")
+            self.assertEqual(soft_link["class"], "H5L_TYPE_SOFT")
+            self.assertEqual(soft_link["h5path"], "somewhere")
+            self.assertTrue(soft_link["created"] > time.time() - 1.0)
+
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            ext_link = db.getLink(g2_id, "extlink")
+            self.assertEqual(ext_link["class"], "H5L_TYPE_EXTERNAL")
+            self.assertEqual(ext_link["h5path"], "somewhere")
+            self.assertEqual(ext_link["file"], "someplace")
+            self.assertTrue(ext_link["created"] > time.time() - 1.0)
+
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+            cust_link = db.getLink(g2_id, "cust")
+            self.assertEqual(cust_link["class"], "H5L_TYPE_USER_DEFINED")
+            self.assertEqual(cust_link["foo"], "bar")
+            self.assertTrue(cust_link["created"] > time.time() - 1.0)
+
+            links = db.getLinks(g2_id)
+            self.assertEqual(len(links), 3)
+            for title in  "slink", "extlink", "cust":
+                self.assertTrue(title in links)
+
+            db.deleteLink(g2_id, "cust")
+            links = db.getLinks(g2_id)
+            self.assertEqual(len(links), 2)
+            for title in  "slink", "extlink":
+                self.assertTrue(title in links)
 
-    def testGetLinkItemsBatch(self):
-        # get test file
-        filepath = getFile("group100.h5", "getlinkitemsbatch.h5")
-        marker = None
-        count = 0
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            while True:
-                # get items 13 at a time
-                batch = db.getLinkItems(rootUuid, marker=marker, limit=13)
-                if len(batch) == 0:
-                    break  # done!
-                count += len(batch)
-                lastItem = batch[len(batch) - 1]
-                marker = lastItem["title"]
-        self.assertEqual(count, 100)
-
-    def testGetItemHardLink(self):
-        filepath = getFile("tall.h5", "getitemhardlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.1")
-            item = db.getLinkItemByUuid(grpUuid, "dset1.1.1")
-            self.assertTrue("id" in item)
-            self.assertEqual(item["title"], "dset1.1.1")
-            self.assertEqual(item["class"], "H5L_TYPE_HARD")
-            self.assertEqual(item["collection"], "datasets")
-            self.assertTrue("target" not in item)
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemSoftLink(self):
-        filepath = getFile("tall.h5", "getitemsoftlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2/g1.2.1")
-            item = db.getLinkItemByUuid(grpUuid, "slink")
-            self.assertTrue("id" not in item)
-            self.assertEqual(item["title"], "slink")
-            self.assertEqual(item["class"], "H5L_TYPE_SOFT")
-            self.assertEqual(item["h5path"], "somevalue")
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemExternalLink(self):
-        filepath = getFile("tall_with_udlink.h5", "getitemexternallink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2")
-            item = db.getLinkItemByUuid(grpUuid, "extlink")
-            self.assertTrue("uuid" not in item)
-            self.assertEqual(item["title"], "extlink")
-            self.assertEqual(item["class"], "H5L_TYPE_EXTERNAL")
-            self.assertEqual(item["h5path"], "somepath")
-            self.assertEqual(item["file"], "somefile")
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemUDLink(self):
-        filepath = getFile("tall_with_udlink.h5", "getitemudlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g2")
-            item = db.getLinkItemByUuid(grpUuid, "udlink")
-            self.assertTrue("uuid" not in item)
-            self.assertEqual(item["title"], "udlink")
-            self.assertEqual(item["class"], "H5L_TYPE_USER_DEFINED")
-            self.assertTrue("h5path" not in item)
-            self.assertTrue("file" not in item)
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetNumLinks(self):
-        filepath = getFile("tall.h5", "getnumlinks.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1 = db.getObjByPath("/g1")
-            numLinks = db.getNumLinksToObject(g1)
-            self.assertEqual(numLinks, 1)
-
-    def testGetLinks(self):
-        g12_links = ("extlink", "g1.2.1")
-        hardLink = None
-        externalLink = None
-        filepath = getFile("tall_with_udlink.h5", "getlinks.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2")
-            items = db.getLinkItems(grpUuid)
-            self.assertEqual(len(items), 2)
-            for item in items:
-                self.assertTrue(item["title"] in g12_links)
-                if item["class"] == "H5L_TYPE_HARD":
-                    hardLink = item
-                elif item["class"] == "H5L_TYPE_EXTERNAL":
-                    externalLink = item
-        self.assertEqual(hardLink["collection"], "groups")
-        self.assertTrue("id" in hardLink)
-        self.assertTrue("id" not in externalLink)
-        self.assertEqual(externalLink["h5path"], "somepath")
-        self.assertEqual(externalLink["file"], "somefile")
-
-    def testDeleteLink(self):
-        # get test file
-        filepath = getFile("tall.h5", "deletelink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 2)
-            db.unlinkItem(rootUuid, "g2")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 1)
-
-    def testDeleteUDLink(self):
-        # get test file
-        filepath = getFile("tall_with_udlink.h5", "deleteudlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g2Uuid = db.getUUIDByPath("/g2")
-            numG2Children = len(db.getLinkItems(g2Uuid))
-            self.assertEqual(numG2Children, 3)
-            got_exception = False
             try:
-                db.unlinkItem(g2Uuid, "udlink")
-            except IOError as ioe:
-                got_exception = True
-                self.assertEqual(ioe.errno, errno.EPERM)
-            self.assertTrue(got_exception)
-            numG2Children = len(db.getLinkItems(g2Uuid))
-            self.assertEqual(numG2Children, 3)
-
-    def testReadOnlyGetUUID(self):
-        # get test file
-        filepath = getFile("tall.h5", "readonlygetuuid.h5", ro=True)
-        # remove db file!
-        removeFile("./out/." + "readonlygetuuid.h5")
-        g1Uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1Uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1Uuid), UUID_LEN)
-            obj = db.getObjByPath("/g1")
-            self.assertEqual(obj.name, "/g1")
-
-        # end of with will close file
-        # open again and verify we can get obj by name
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            obj = db.getGroupObjByUuid(g1Uuid)
-            g1 = db.getObjByPath("/g1")
-            self.assertEqual(obj, g1)
-            g1links = db.getLinkItems(g1Uuid)
-            self.assertEqual(len(g1links), 2)
-            for item in g1links:
-                self.assertEqual(len(item["id"]), UUID_LEN)
-
-    def testReadDataset(self):
-        filepath = getFile("tall.h5", "readdataset.h5")
-        d111_values = None
-        d112_values = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            self.assertEqual(len(d111Uuid), UUID_LEN)
-            d111_values = db.getDatasetValuesByUuid(d111Uuid)
-            self.assertTrue(type(d111_values) is list)
-            self.assertEqual(len(d111_values), 10)
-            for i in range(10):
-                arr = d111_values[i]
-                self.assertEqual(len(arr), 10)
-                for j in range(10):
-                    self.assertEqual(arr[j], i * j)
-
-            d112Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.2")
-            self.assertEqual(len(d112Uuid), UUID_LEN)
-            d112_values = db.getDatasetValuesByUuid(d112Uuid)
-            self.assertTrue(type(d112_values) is list)
-            self.assertEqual(len(d112_values), 20)
-            for i in range(20):
-                self.assertEqual(d112_values[i], i)
-
-    def testReadDatasetBinary(self):
-        filepath = getFile("tall.h5", "readdatasetbinary.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            self.assertEqual(len(d111Uuid), UUID_LEN)
-            d111_data = db.getDatasetValuesByUuid(d111Uuid, format="binary")
-            self.assertTrue(type(d111_data) is bytes)
-            self.assertEqual(len(d111_data), 400)  # 10x10x(4 byte type)
-
-            d112Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.2")
-            self.assertEqual(len(d112Uuid), UUID_LEN)
-            d112_data = db.getDatasetValuesByUuid(d112Uuid, format="binary")
-            self.assertEqual(len(d112_data), 80)  # 20x(4 byte type)
-
-    def testReadCompoundDataset(self):
-        filepath = getFile("compound.h5", "readcompound.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dset_uuid), UUID_LEN)
-            dset_values = db.getDatasetValuesByUuid(dset_uuid)
-
-            self.assertEqual(len(dset_values), 72)
-            elem = dset_values[0]
-            self.assertEqual(elem[0], 24)
-            self.assertEqual(elem[1], "13:53")
-            self.assertEqual(elem[2], 63)
-            self.assertEqual(elem[3], 29.88)
-            self.assertEqual(elem[4], "SE 10")
-
-    def testReadDatasetCreationProp(self):
-        filepath = getFile("compound.h5", "readdatasetcreationprop.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dset_uuid), UUID_LEN)
-            dset_item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertTrue("creationProperties" in dset_item)
-            creationProp = dset_item["creationProperties"]
-            self.assertTrue("fillValue" in creationProp)
-            fillValue = creationProp["fillValue"]
-
-            self.assertEqual(fillValue[0], 999)
-            self.assertEqual(fillValue[1], "99:90")
-            self.assertEqual(fillValue[2], 999)
-            self.assertEqual(fillValue[3], 999.0)
-            self.assertEqual(fillValue[4], "N")
-
-    def testCreateScalarDataset(self):
-        creation_props = {
-            "allocTime": "H5D_ALLOC_TIME_LATE",
-            "fillTime": "H5D_FILL_TIME_IFSET",
-            "fillValue": "",
-            "layout": {"class": "H5D_CONTIGUOUS"},
-        }
-        datatype = {
-            "charSet": "H5T_CSET_ASCII",
-            "class": "H5T_STRING",
-            "length": 1,
-            "strPad": "H5T_STR_NULLPAD",
-        }
-        filepath = getFile("empty.h5", "createscalardataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dims = ()  # if no space in body, default to scalar
-            max_shape = None
-
-            db.createDataset(
-                datatype, dims, max_shape=max_shape, creation_props=creation_props
-            )
-
-    def testCreate1dDataset(self):
-        datatype = "H5T_STD_I64LE"
-        dims = (10,)
-        filepath = getFile("empty.h5", "create1ddataset.h5")
-        dset_uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rsp = db.createDataset(datatype, dims)
-
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertEqual(item["attributeCount"], 0)
-            type_item = item["type"]
-            self.assertEqual(type_item["class"], "H5T_INTEGER")
-            self.assertEqual(type_item["base"], "H5T_STD_I64LE")
-            shape_item = item["shape"]
-            self.assertEqual(shape_item["class"], "H5S_SIMPLE")
-            self.assertEqual(shape_item["dims"], (10,))
-
-    def testCreate2dExtendableDataset(self):
-        datatype = "H5T_STD_I64LE"
-        dims = (10, 10)
-        max_shape = (None, 10)
-        filepath = getFile("empty.h5", "create2dextendabledataset.h5")
-        dset_uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rsp = db.createDataset(datatype, dims, max_shape=max_shape)
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertEqual(item["attributeCount"], 0)
-            type_item = item["type"]
-            self.assertEqual(type_item["class"], "H5T_INTEGER")
-            self.assertEqual(type_item["base"], "H5T_STD_I64LE")
-            shape_item = item["shape"]
-            self.assertEqual(shape_item["class"], "H5S_SIMPLE")
-            self.assertEqual(shape_item["dims"], (10, 10))
-            self.assertTrue("maxdims" in shape_item)
-            self.assertEqual(shape_item["maxdims"], [0, 10])
-
-    def testCreateCommittedTypeDataset(self):
-        filepath = getFile("empty.h5", "createcommittedtypedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(type_uuid, dims, max_shape=None, creation_props=None)
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            type_item = item["type"]
-            self.assertTrue("uuid" in type_item)
-            self.assertEqual(type_item["uuid"], type_uuid)
-
-    def testCreateCommittedCompoundTypeDataset(self):
-        filepath = getFile("empty.h5", "createcommittedcompoundtypedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-
-            datatype = {"class": "H5T_COMPOUND", "fields": []}
-
-            type_fields = []
-            type_fields.append({"name": "field_1", "type": "H5T_STD_I64BE"})
-            type_fields.append({"name": "field_2", "type": "H5T_IEEE_F64BE"})
-
-            datatype["fields"] = type_fields
-
-            creation_props = {"fillValue": [0, 0.0]}
-
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                type_uuid, dims, max_shape=None, creation_props=creation_props
-            )
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            type_item = item["type"]
-            self.assertTrue("uuid" in type_item)
-            self.assertEqual(type_item["uuid"], type_uuid)
+                db.getObjectIdByPath("/g1/foo")
+                self.assertTrue(False)
+            except KeyError:
+                pass  # expected
 
-    def testReadZeroDimDataset(self):
-        filepath = getFile("zerodim.h5", "readzerodeimdataset.h5")
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            dset_value = db.getDatasetValuesByUuid(dsetUuid)
-            self.assertEqual(dset_value, 42)
-
-    def testReadNullSpaceDataset(self):
-        filepath = getFile("null_space_dset.h5", "readnullspacedataset.h5")
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/DS1")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            obj = db.getDatasetObjByUuid(dsetUuid)
-            shape_item = db.getShapeItemByDsetObj(obj)
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_NULL")
-
-    def testReadScalarSpaceArrayDataset(self):
-        filepath = getFile("scalar_array_dset.h5", "readscalarspacearraydataset.h5")
+            try:
+                db.getLink(g2_id, "not_a_link")
+                self.assertTrue(False)
+            except KeyError:
+                pass  # expected
 
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/DS1")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            obj = db.getDatasetObjByUuid(dsetUuid)
-            shape_item = db.getShapeItemByDsetObj(obj)
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_SCALAR")
 
-    def testReadNullSpaceAttribute(self):
-        filepath = getFile("null_space_attr.h5", "readnullspaceattr.h5")
+    def testNullSpaceAttribute(self):
 
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            self.assertEqual(len(rootUuid), UUID_LEN)
-            item = db.getAttributeItem("groups", rootUuid, "attr1")
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in item)
             shape_item = item["shape"]
             self.assertTrue("class" in shape_item)
             self.assertEqual(shape_item["class"], "H5S_NULL")
-
-    def testReadAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("tall.h5", "readattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            self.assertEqual(len(rootUuid), UUID_LEN)
-            item = db.getAttributeItem("groups", rootUuid, "attr1")
-            self.assertTrue(item is not None)
-
-    def testWriteScalarAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writescalarattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
+            self.assertTrue(item["created"] > time.time() - 1.0)
+            self.assertEqual(item["modified"], None)
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(value, None)
+
+    def testScalarAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             dims = ()
-            datatype = "H5T_STD_I32LE"
             value = 42
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
+            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
             self.assertEqual(item["value"], 42)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
             shape = item["shape"]
             self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
 
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            self.assertEqual(
-                len(item_type.keys()), 2
-            )  # just two keys should be returned
+            
 
-    def testWriteFixedStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writefixedstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLPAD",
-                "length": 13,
-            }
+    def testFixedStringAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             value = "Hello, world!"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
+            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
-            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["class"], "H5T_STRING")
             self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-
-    def testWriteFixedNullTermStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writefixednulltermstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 13,
-            }
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+            ret_value = db.getAttributeValue(root_id, "A1")
+       
+
+    def testVlenAsciiAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
             value = b"Hello, world!"
+            dt = special_dtype(vlen=bytes)
 
             # write the attribute
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
+            db.createAttribute(root_id, "A1", value, dtype=dt)
             # read it back
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            self.assertEqual(item["name"], "A1")
-            # the following compare fails - see issue #34
-            # self.assertEqual(item['value'], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
-            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["class"], "H5T_STRING")
-            # NULLTERM get's converted to NULLPAD since the numpy dtype does not
-            # support other padding conventions.
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-
-    def testWriteVlenStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writevlenstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": "H5T_VARIABLE",
-            }
-
-            # value = np.string_("Hello, world!")
-            value = "Hello, world!"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
 
-    def testReadVlenStringDataset(self):
-        item = None
-        filepath = getFile("vlen_string_dset.h5", "vlen_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            # actual padding is SPACEPAD - See issue #32
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-            self.assertEqual(row, ["Parting"])
+    def testVlenUtf8Attribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=str)
 
-    def testReadVlenStringDataset_utc(self):
-        item = None
-        filepath = getFile("vlen_string_dset_utc.h5", "vlen_string_dset_utc.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/ds1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 2293)
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
             self.assertEqual(item_type["class"], "H5T_STRING")
             self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
             self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            # next line throws conversion error - see issue #19
-            # row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-
-    def testReadFixedStringDataset(self):
-        item = None
-        filepath = getFile("fixed_string_dset.h5", "fixed_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 7)
-            row = db.getDatasetValuesByUuid(dset_uuid)
-            self.assertEqual(row, ["Parting", "is such", "sweet", "sorrow."])
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-            self.assertEqual(
-                row,
-                [
-                    "Parting",
-                ],
-            )
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(2, 3),))
-            self.assertEqual(
-                row,
-                [
-                    "sweet",
-                ],
-            )
-
-    def testReadFixedStringDatasetBinary(self):
-        item = None
-        filepath = getFile("fixed_string_dset.h5", "fixed_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 7)
-            row = db.getDatasetValuesByUuid(dset_uuid, format="binary")
-            self.assertEqual(row, b"Partingis suchsweet\x00\x00sorrow.")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),), format="binary")
-            self.assertEqual(row, b"Parting")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(2, 3),), format="binary")
-            self.assertEqual(row, b"sweet\x00\x00")
-
-    def testWriteVlenUnicodeAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writevlenunicodeattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_UTF8",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": "H5T_VARIABLE",
-            }
-            value = "\u6b22\u8fce\u63d0\u4ea4\u5fae\u535a\u641c\u7d22\u4f7f\u7528\u53cd\u9988\uff0c\u8bf7\u76f4\u63a5"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            self.assertEqual(item["name"], "A1")
-            self.assertEqual(item["value"], value)
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
             self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+
+ 
 
-    def testWriteIntAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writeintattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = (5,)
-            datatype = "H5T_STD_I16LE"
+    def testIntAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             value = [2, 3, 5, 7, 11]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
+            db.createAttribute(root_id, "A1", value, dtype=np.int16)
+            item = db.getAttribute(root_id, "A1")
             self.assertEqual(item["value"], [2, 3, 5, 7, 11])
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(item_shape["dims"], [5,])
             item_type = item["type"]
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I16LE")
 
     def testCreateReferenceAttribute(self):
-        filepath = getFile("empty.h5", "createreferencedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "DS1")
-
-            dims = (1,)
-            datatype = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
-            ds1_ref = "datasets/" + dset_uuid
-            value = [
-                ds1_ref,
-            ]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            attr_type = item["type"]
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+
+            dt = special_dtype(ref=Reference)
+
+            ds1_ref = "datasets/" + dset_id
+            value = [ds1_ref,]
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            item = db.getAttribute(root_id, "A1")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in attr)
+            
+            attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
             attr_value = item["value"]
@@ -849,149 +280,49 @@ def testCreateReferenceAttribute(self):
             self.assertEqual(attr_value[0], ds1_ref)
 
     def testCreateVlenReferenceAttribute(self):
-        filepath = getFile("empty.h5", "createreferenceattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "DS1")
-
-            dims = (1,)
-            datatype = {
-                "class": "H5T_VLEN",
-                "base": {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"},
-            }
-            ds1_ref = "datasets/" + dset_uuid
-            value = [
-                [
-                    ds1_ref,
-                ],
-            ]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            attr_type = item["type"]
-            self.assertEqual(attr_type["class"], "H5T_VLEN")
-            base_type = attr_type["base"]
-            # todo - this should be H5T_REFERENCE, not H5T_OPAQUE
-            # See h5py issue: https://github.com/h5py/h5py/issues/553
-            import h5py
-
-            # test based on h5py version until we change install requirements
-            if h5py.version.version_tuple >= (2, 6, 0):
-                self.assertEqual(base_type["class"], "H5T_REFERENCE")
-            else:
-                self.assertEqual(base_type["class"], "H5T_OPAQUE")
-
-    def testCreateReferenceListAttribute(self):
-        filepath = getFile("empty.h5", "createreferencelistattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = (10,)
-
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "dset")
-
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            xscale_uuid = rsp["id"]
-            nullterm_string_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": 16,
-                "strPad": "H5T_STR_NULLTERM",
-            }
-            scalar_dims = ()
-            db.createAttribute(
-                "datasets",
-                xscale_uuid,
-                "CLASS",
-                scalar_dims,
-                nullterm_string_type,
-                "DIMENSION_SCALE",
-            )
-            db.linkObject(root_uuid, xscale_uuid, "xscale")
-
-            ref_dims = (1,)
-            datatype = {
-                "class": "H5T_VLEN",
-                "base": {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"},
-            }
-            xscale_ref = "datasets/" + xscale_uuid
-            value = [
-                (xscale_ref,),
-            ]
-            db.createAttribute(
-                "datasets", dset_uuid, "DIMENSION_LIST", ref_dims, datatype, value
-            )
-            item = db.getAttributeItem("datasets", dset_uuid, "DIMENSION_LIST")
-
-            attr_type = item["type"]
-            self.assertEqual(attr_type["class"], "H5T_VLEN")
-            base_type = attr_type["base"]
-            # todo - this should be H5T_REFERENCE, not H5T_OPAQUE
-            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+            grp_id = db.createGroup()
+            db.createHardLink(root_id, "G1", grp_id)
+
+            dt_base = special_dtype(ref=Reference)
+            dt = special_dtype(vlen=dt_base)
+             
+            ds1_ref = "datasets/" + dset_id
+            grp_ref = "groups/" + grp_id
+            ref_arr = np.zeros((2,), dtype=dt_base)
+            ref_arr[0] = ds1_ref
+            ref_arr[1] = grp_ref
+            vlen_arr = np.zeros((), dtype=dt)
+            vlen_arr[()] = ref_arr
+             
+            db.createAttribute(root_id, "A1", vlen_arr)
+            item = db.getAttribute(root_id, "A1")
 
-    def testReadCommittedType(self):
-        filepath = getFile("committed_type.h5", "readcommitted_type.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            type_uuid = db.getUUIDByPath("/Sensor_Type")
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertTrue("type" in item)
             item_type = item["type"]
-            self.assertTrue(item_type["class"], "H5T_COMPOUND")
-            ds1_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(ds1_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertTrue("class" in item_type)
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            self.assertTrue("uuid" in item_type)
-            self.assertEqual(item_type["uuid"], type_uuid)
-
-            item = db.getAttributeItem("groups", root_uuid, "attr1")
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertTrue("class" in item_type)
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            self.assertTrue("uuid" in item_type)
-            self.assertEqual(item_type["uuid"], type_uuid)
-
-    def testWriteCommittedType(self):
-        filepath = getFile("empty.h5", "writecommittedtype.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertEqual(item["id"], type_uuid)
-            self.assertEqual(item["attributeCount"], 0)
+            self.assertEqual(item_type["class"], "H5T_VLEN")
+            self.assertEqual(item_type["size"], "H5T_VARIABLE")
+            base_type = item_type["base"]
+            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+            
+
+    def testCommittedType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dt = np.dtype("S15")
+             
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            self.assertEqual(len(item["alias"]), 0)  # anonymous, so no alias
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
@@ -1000,318 +331,56 @@ def testWriteCommittedType(self):
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
             self.assertEqual(item_type["length"], 15)
 
-    def testWriteCommittedCompoundType(self):
-        filepath = getFile("empty.h5", "writecommittedcompoundtype.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
+            # create an attribute using the committed type
+            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], "hello world!")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_STRING")
+            self.assertEqual(attr_type["length"], 15)
+            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
-            datatype = {"class": "H5T_COMPOUND", "fields": []}
 
-            fixed_str_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
+    def testCommittedCompoundType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
 
-            var_str_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": "H5T_VARIABLE",
-                "strPad": "H5T_STR_NULLTERM",
-            }
-            type_fields = []
-            type_fields.append({"name": "field_1", "type": "H5T_STD_I64BE"})
-            type_fields.append({"name": "field_2", "type": "H5T_IEEE_F64BE"})
-            type_fields.append({"name": "field_3", "type": fixed_str_type})
-            type_fields.append({"name": "field_4", "type": var_str_type})
-            datatype["fields"] = type_fields
+            dt_str = special_dtype(vlen=str)
+            fields = []
+            fields.append(("field_1", np.dtype(">i8")))
+            fields.append(("field_2", ">f8"))
+            fields.append(("field_3", np.dtype("S15")))
+            fields.append(("field_4", dt_str))
+            dt = np.dtype(fields)
 
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertEqual(item["id"], type_uuid)
-            self.assertEqual(item["attributeCount"], 0)
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            self.assertEqual(len(item["alias"]), 0)  # anonymous, so no alias
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
             self.assertEqual(item_type["class"], "H5T_COMPOUND")
             fields = item_type["fields"]
             self.assertEqual(len(fields), 4)
-            # todo - the last field class should be H5T_STRING, but it is getting
-            # saved to HDF5 as Opaque - see: https://github.com/h5py/h5py/issues/613
-            # this is fixed in h5py v. 2.6.0 - check the version until 2.6.0 becomes
-            # available via pip and anaconda.
-            import h5py
-
-            if h5py.version.version_tuple >= (2, 6, 0):
-                field_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_STRING")
-            else:
-                field_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_OPAQUE")
-            for i in range(4):
-                field = fields[i]
-                self.assertEqual(field["name"], "field_" + str(i + 1))
-                field_type = field["type"]
-                self.assertEqual(field_type["class"], field_classes[i])
-
-    def testToRef(self):
-
-        filepath = getFile("empty.h5", "toref.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            type_item = {
-                "order": "H5T_ORDER_LE",
-                "base_size": 1,
-                "class": "H5T_INTEGER",
-                "base": "H5T_STD_I8LE",
-                "size": 1,
-            }
-            data_list = [2, 3, 5, 7, 11]
-            ref_value = db.toRef(1, type_item, data_list)
-            self.assertEqual(ref_value, data_list)
-
-            type_item = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": 8,
-                "strPad": "H5T_STR_NULLPAD",
-            }
-            data_list = ["Hypertext", "as", "engine", "of", "state"]
-            ref_value = db.toRef(1, type_item, data_list)
-
-    def testToTuple(self):
-        filepath = getFile("empty.h5", "totuple.h5")
-        data1d = [1, 2, 3]
-        data2d = [[1, 2], [3, 4]]
-        data3d = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            self.assertEqual(db.toTuple(1, data1d), [1, 2, 3])
-            self.assertEqual(db.toTuple(2, data2d), [[1, 2], [3, 4]])
-            self.assertEqual(db.toTuple(1, data2d), [(1, 2), (3, 4)])
-            self.assertEqual(
-                db.toTuple(3, data3d), [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-            )
-            self.assertEqual(
-                db.toTuple(2, data3d), [[(1, 2), (3, 4)], [(5, 6), (7, 8)]]
-            )
-            self.assertEqual(
-                db.toTuple(1, data3d), [((1, 2), (3, 4)), ((5, 6), (7, 8))]
-            )
-
-    def testBytesArrayToList(self):
-        filepath = getFile("empty.h5", "bytestostring.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            val = db.bytesArrayToList(b"Hello")
-            self.assertTrue(type(val) is str)
-            val = db.bytesArrayToList(
-                [
-                    b"Hello",
-                ]
-            )
-            self.assertEqual(len(val), 1)
-            self.assertTrue(type(val[0]) is str)
-            self.assertEqual(val[0], "Hello")
-
-            import numpy as np
-
-            data = np.array([b"Hello"])
-            val = db.bytesArrayToList(data)
-            self.assertEqual(len(val), 1)
-            self.assertTrue(type(val[0]) is str)
-            self.assertEqual(val[0], "Hello")
-
-    def testGetDataValue(self):
-        # typeItem, value, dimension=0, dims=None):
-        filepath = getFile("empty.h5", "bytestostring.h5")
-        string_type = {
-            "charSet": "H5T_CSET_ASCII",
-            "class": "H5T_STRING",
-            "strPad": "H5T_STR_NULLTERM",
-            "length": "H5T_VARIABLE",
-        }
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            import numpy as np
-
-            data = np.array([b"Hello"])
-            val = db.getDataValue(string_type, data, dimension=1, dims=(1,))
-            self.assertTrue(type(val[0]) is str)
-
-    def testGetAclDataset(self):
-        filepath = getFile("tall.h5", "getacldataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-            acl_dset = db.getAclDataset(d111_uuid, create=True)
-            self.assertTrue(acl_dset.name.endswith(d111_uuid))
-            self.assertEqual(len(acl_dset.dtype), 7)
-            self.assertEqual(len(acl_dset.shape), 1)
-            self.assertEqual(acl_dset.shape[0], 0)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-    def testSetAcl(self):
-        filepath = getFile("tall.h5", "setacl.h5")
-        user1 = 123
-        user2 = 456
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            # add read/write acl for user1
-            acl_user1 = db.getAcl(d111_uuid, user1)
-
-            self.assertEqual(acl_user1["userid"], 0)
-            acl_user1["userid"] = user1
-            acl_user1["readACL"] = 0
-            acl_user1["updateACL"] = 0
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            db.setAcl(d111_uuid, acl_user1)
-            acl = db.getAcl(d111_uuid, user1)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 1)
-
-            # add read-only acl for user2
-            acl_user2 = db.getAcl(d111_uuid, user2)
-            self.assertEqual(acl_user2["userid"], 0)
-            acl_user2["userid"] = user2
-            acl_user2["create"] = 0
-            acl_user2["read"] = 1
-            acl_user2["update"] = 0
-            acl_user2["delete"] = 0
-            acl_user2["readACL"] = 0
-            acl_user2["updateACL"] = 0
-            db.setAcl(d111_uuid, acl_user2)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 2)
-
-            # fetch and verify acls
-            acl = db.getAcl(d111_uuid, user1)
-            self.assertEqual(acl["userid"], user1)
-            self.assertEqual(acl["create"], 1)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 1)
-            self.assertEqual(acl["delete"], 1)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-            acl = db.getAcl(d111_uuid, user2)
-            self.assertEqual(acl["userid"], user2)
-            self.assertEqual(acl["create"], 0)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 0)
-            self.assertEqual(acl["delete"], 0)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 2)
-
-            # get acl data_list
-            acls = db.getAcls(d111_uuid)
-            self.assertEqual(len(acls), 2)
-
-    def testRootAcl(self):
-        filepath = getFile("tall.h5", "rootacl.h5")
-        user1 = 123
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            # add read/write acl for user1 at root
-            acl_root = db.getAcl(root_uuid, 0)
-            self.assertEqual(acl_root["userid"], 0)
-            acl_root["create"] = 0
-            acl_root["read"] = 1
-            acl_root["update"] = 0
-            acl_root["delete"] = 0
-            acl_root["readACL"] = 0
-            acl_root["updateACL"] = 0
-            num_acls = db.getNumAcls(root_uuid)
-            self.assertEqual(num_acls, 0)
-
-            db.setAcl(root_uuid, acl_root)
-            num_acls = db.getNumAcls(root_uuid)
-            self.assertEqual(num_acls, 1)
-
-            acl = db.getAcl(d111_uuid, user1)
-            num_acls = db.getNumAcls(d111_uuid)  # this will fetch the root acl
-            self.assertEqual(num_acls, 0)
-            self.assertEqual(acl["userid"], 0)
-            self.assertEqual(acl["create"], 0)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 0)
-            self.assertEqual(acl["delete"], 0)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-    def testGetEvalStr(self):
-        queries = {
-            "date == 23": "rows['date'] == 23",
-            "wind == b'W 5'": "rows['wind'] == b'W 5'",
-            "temp > 61": "rows['temp'] > 61",
-            "(date >=22) & (date <= 24)": "(rows['date'] >=22) & (rows['date'] <= 24)",
-            "(date == 21) & (temp > 70)": "(rows['date'] == 21) & (rows['temp'] > 70)",
-            "(wind == b'E 7') | (wind == b'S 7')": "(rows['wind'] == b'E 7') | (rows['wind'] == b'S 7')",
-        }
-
-        fields = ["date", "wind", "temp"]
-        filepath = getFile("empty.h5", "getevalstring.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            for query in queries.keys():
-                eval_str = db._getEvalStr(query, fields)
-                self.assertEqual(eval_str, queries[query])
-
-    def testBadQuery(self):
-        queries = (
-            "foobar",  # no variable used
-            "wind = b'abc",  # non-closed literal
-            "(wind = b'N') & (temp = 32",  # missing paren
-            "foobar > 42",  # invalid field name
-            "import subprocess; subprocess.call(['ls', '/'])",
-        )  # injection attack
-
-        fields = ("date", "wind", "temp")
-        filepath = getFile("empty.h5", "badquery.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            for query in queries:
-                try:
-                    eval_str = db._getEvalStr(query, fields)
-                    self.log.error(f"got eval_str: {eval_str}")
-                    self.assertTrue(False)  # shouldn't get here
-                except IOError:
-                    pass  # ok
-
-    def testInjectionBlock(self):
-        queries = (
-            "import subprocess; subprocess.call(['ls', '/'])",
-        )  # injection attack
-
-        fields = ("import", "subprocess", "call")
-        filepath = getFile("empty.h5", "injectionblock.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            for query in queries:
-                try:
-                    eval_str = db._getEvalStr(query, fields)
-                    self.log.error(f"got eval_str: {eval_str}")
-                    self.assertTrue(False)  # shouldn't get here
-                except IOError:
-                    pass  # ok
 
+            # create an attribute using the committed type
+            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], list(attr_value))
+            attr_shape = attr["shape"]
+            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+            
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(value, np.ndarray))
+   
 
 if __name__ == "__main__":
     # setup test files

From 2f546b999e1f1e18491e69cea1327f47c8d645f3 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 18 Feb 2025 10:14:18 -0800
Subject: [PATCH 008/129] first pass at h5py reader

---
 src/h5json/h5py_reader.py     | 273 ++++++++++++++++++++++++++++++++++
 src/h5json/h5reader.py        |  61 ++++++++
 src/h5json/hdf5db.py          |  49 +++---
 test/unit/h5py_reader_test.py | 126 ++++++++++++++++
 test/unit/hdf5db_test.py      |   4 -
 5 files changed, 488 insertions(+), 25 deletions(-)
 create mode 100644 src/h5json/h5py_reader.py
 create mode 100644 src/h5json/h5reader.py
 create mode 100644 test/unit/h5py_reader_test.py

diff --git a/src/h5json/h5py_reader.py b/src/h5json/h5py_reader.py
new file mode 100644
index 00000000..fc9bb07b
--- /dev/null
+++ b/src/h5json/h5py_reader.py
@@ -0,0 +1,273 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import logging
+
+import h5py
+import numpy as np
+
+from .objid import createObjId
+from .hdf5dtype import getTypeItem
+from .array_util import bytesArrayToList
+from .h5reader import H5Reader
+
+
+class H5pyReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an HDF5 file (using h5py) 
+    """
+
+    def visit(self, path, obj):
+        name = obj.__class__.__name__
+        self.log.info(f"visit: {path} name: {name}")
+        
+        obj_id = createObjId(obj_type=name, root_id=self._root_id)  # create uuid
+
+        self._id_map[obj_id] = obj        
+        
+        addr = h5py.h5o.get_info(obj.id).addr
+        self._addr_map[addr] = obj_id
+
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+        self._id_map = {}
+        self._addr_map = {}
+        self._filepath = filepath
+        f = h5py.File(self._filepath)
+        self._f = f
+        self._root_id = createObjId(obj_type="groups")
+        self._id_map[self._root_id] = f
+        addr = h5py.h5o.get_info(f.id).addr
+        self._addr_map[addr] = self._root_id
+        f.visititems(self.visit)
+
+    def close(self):
+        if self._f:
+            self._f.close()
+            self._f = None
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+    
+    def getAttribute(self, obj_id, name, include_data=True):
+        """ Return JSON for the given attribute """
+
+        obj = self._id_map[obj_id]
+
+        if name not in obj.attrs:
+            msg = f"Attribute: [{name}] not found in object: {obj.name}"
+            self.log.info(msg)
+            return None
+
+        # get the attribute!
+        attrObj = h5py.h5a.open(obj.id, np.bytes_(name))
+
+        item = {}
+
+        # check if the dataset is using a committed type
+        typeid = attrObj.get_type()
+        type_item = None
+        if h5py.h5t.TypeID.committed(typeid):
+            type_uuid = None
+            addr = h5py.h5o.get_info(typeid).addr
+            type_uuid = self.getObjIdByAddress(addr)
+            committedType = self.getCommittedTypeItemByUuid(type_uuid)
+            type_item = committedType["type"].copy()
+            type_item["id"] = type_uuid
+        else:
+            type_item = getTypeItem(attrObj.dtype)
+        item["type"] = type_item
+
+        shape_item = {}
+        if attrObj.shape is None or attrObj.get_storage_size() == 0:
+            # If storage size is 0, assume this is a null space obj
+            # See: h5py issue https://github.com/h5py/h5py/issues/279
+            shape_item["class"] = "H5S_NULL"
+        else:
+            if attrObj.shape:
+                shape_item["class"] = "H5S_SIMPLE"
+                shape_item["dims"] = attrObj.shape
+            else:
+                shape_item["class"] = "H5S_SCALAR"
+
+        item["shape"] = shape_item
+        if shape_item["class"] == "H5S_NULL":
+            include_data = False
+        elif isinstance(type_item, dict) and type_item["class"] in ("H5T_OPAQUE"):
+            # TBD - don't include data for OPAQUE until JSON serialization
+            # issues are addressed
+            include_data = False
+        else:
+            pass  # use include_data parameter
+
+        if include_data:
+            try:
+                data = obj.attrs[name] 
+            except TypeError:
+                self.log.warning("type error reading attribute")
+
+        if include_data and data is not None:
+            item["value"] = bytesArrayToList(data)
+             
+        # timestamps will be added by getAttributeItem()
+        return item
+
+    def getAttributes(self, obj_id, include_data=True):
+        h5obj = self._id_map[obj_id]
+        self.log.info(f"getAttributes: {obj_id} include_data={include_data}")
+        items = {}  # with python 3.7+, this will maintain the attribute order we got from h5py
+        attrs = h5obj.attrs
+        for name in attrs:
+            item = self.getAttribute(obj_id, name, include_data=include_data)
+            items[name] = item
+
+        return items
+    
+    def _getLink(self, parent, link_name):
+        if link_name not in parent:
+            return None
+
+        item = {"title": link_name}
+        # get the link object, one of HardLink, SoftLink, or ExternalLink
+        try:
+            linkObj = parent.get(link_name, None, False, True)
+            linkClass = linkObj.__class__.__name__
+        except TypeError:
+            # UDLink? set class as 'user'
+            linkClass = "UDLink"  # user defined links
+            item["class"] = "H5L_TYPE_USER_DEFINED"
+        if linkClass == "SoftLink":
+            item["class"] = "H5L_TYPE_SOFT"
+            item["h5path"] = linkObj.path
+        elif linkClass == "ExternalLink":
+            item["class"] = "H5L_TYPE_EXTERNAL"
+            item["h5path"] = linkObj.path
+            item["file"] = linkObj.filename
+        elif linkClass == "HardLink":
+            # Hardlink doesn't have any properties itself, just get the linked
+            # object
+            obj = parent[link_name]
+            addr = h5py.h5o.get_info(obj.id).addr
+            item["class"] = "H5L_TYPE_HARD"
+            if addr not in self._addr_map:
+                self.log.error(f"expected to find addr for link {link_name} in addr_map")
+                item["id"] = None
+            else:
+                item["id"] = self._addr_map[addr]
+             
+        return item
+
+    def _getLinks(self, grp):
+        items = {}  # with python 3.7+, this will maintain the link order we got from h5py
+        for link_name in grp:
+            item = self._getLink(grp, link_name)
+            items[link_name] = item
+        return items
+
+    def _getGroup(self, grp, include_links=True):
+        self.log.info("_getGroup alias: [{grp.name}]")
+
+        item = {"alias": grp.name}
+
+        if include_links:
+            links = self._getLinks(grp)
+            item["links"] = links
+        return item
+    
+    def _getDatatype(self, ctype, include_attrs=True):
+        self.log.info("getDatatype alias: ]{ctype.name}")
+        item = {"alias": ctype.name}
+        item["type"] = getTypeItem(ctype.dtype)
+
+        return item
+
+    
+    def _getDataset(self, dset):     
+        self.log.info("getDataset alias: [{dset.name}]")
+
+        item = {"alias": dset.name}
+
+        typeid = dset.id.get_type()
+        if h5py.h5t.TypeID.committed(typeid):
+            type_uuid = None
+            addr = h5py.h5o.get_info(typeid).addr
+            type_uuid = self.getObjIdByAddress(addr)
+            committedType = self.getObjectByid(type_uuid)
+            typeItem = committedType["type"]
+            typeItem["id"] = type_uuid
+        else:
+            typeItem = getTypeItem(dset.dtype)
+        item["type"] = typeItem
+        
+        shapeItem = {}
+        if dset.shape is None:
+            # new with h5py 2.6, null space datasets will return None for shape
+            shapeItem["class"] = "H5S_NULL"
+        elif len(dset.shape) == 0:
+            shapeItem["class"] = "H5S_SCALAR"
+        else:
+            shapeItem["class"] = "H5S_SIMPLE"
+            shapeItem["dims"] = list(dset.shape)
+            maxshape = []
+            include_maxdims = False
+            for i in range(len(dset.shape)):
+                extent = 0
+                if len(dset.maxshape) > i:
+                    extent = dset.maxshape[i]
+                    if extent is None:
+                        extent = 0
+                    if extent > dset.shape[i] or extent == 0:
+                        include_maxdims = True
+                maxshape.append(extent)
+            if include_maxdims:
+                shapeItem["maxdims"] = maxshape
+        item["shape"] = shapeItem
+        
+        return item
+    
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+        if obj_id not in self._id_map:
+            raise KeyError(f"{obj_id} not found")
+        h5obj = self._id_map[obj_id]
+        if isinstance(h5obj, h5py.Group):
+            obj_json = self._getGroup(h5obj, include_links=include_links)
+        elif isinstance(h5obj, h5py.Dataset):
+            obj_json = self._getDataset(h5obj)
+        elif isinstance(h5obj, h5py.Datatype):
+            obj_json = self._getDataType(h5obj)
+        else:
+            raise TypeError(f"unexpected object type: {type(h5obj)}")
+        
+        if include_attrs:
+            attributes = self.getAttributes(obj_id)
+            obj_json["attributes"] = attributes
+
+        return obj_json
+
+
+    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+        pass
+
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
new file mode 100644
index 00000000..6f504105
--- /dev/null
+++ b/src/h5json/h5reader.py
@@ -0,0 +1,61 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+from abc import ABC, abstractmethod
+
+
+
+
+class H5Reader(ABC):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for reading from an HDF5 
+    compatible storage medium.  
+    """
+
+
+    def __init__(
+        self,
+        filepath
+    ):
+        self._filepath = filepath
+       
+    @abstractmethod
+    def get_root_id(self):
+        """ Return root id """
+        pass
+
+    @abstractmethod 
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+        pass
+  
+    @abstractmethod
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        pass
+
+    @abstractmethod
+    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+        pass
+
+    @abstractmethod
+    def close(self):
+        """ close any open handles to the storage """
+        pass
+
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index e7ea8d9c..2dfd9374 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -17,11 +17,12 @@
 from .dset_util import make_new_dset, resize_dataset
 from .objid import createObjId, getCollectionForId
 from .apiversion import _apiver
+from .h5reader import H5Reader
 
 
 class Hdf5db:
     """
-    This class is used to manage UUID lookup tables for primary HDF objects (Groups, Datasets,
+    This class is used to manage id lookup tables for primary HDF objects (Groups, Datasets,
     and Datatypes).  By default all data is held in-memory.  Initialize with h5_reader to read from
     an HDF5 compatible storage pool, and or, h5_writer to write to an HDF5 compatible storage pool.
     """
@@ -34,7 +35,7 @@ def getVersionInfo():
 
     def __init__(
         self,
-        h5_reader = None,
+        h5_reader: H5Reader = None,
         h5_writer = None,
         app_logger = None,
     ):
@@ -49,18 +50,28 @@ def __init__(
         self._writer = h5_writer
     
         if self._reader:
-            root_id = self._reader.get_objid("/")
-            kwargs = {"include_attrs": True, "include_links": True}
-            group_json = self._reader.get_obj(root_id, **kwargs)
+            root_id = self._reader.get_root_id()
+            group_json = self._reader.getObjectById(root_id)
         else:
+            root_id = createObjId(obj_type="groups")
             # create a root group
             group_json = {"links": {}, "attributes": {}, "cpl": {}}
             group_json["created"] = time.time()
-            root_id = createObjId(obj_type="groups")
-            self._db[root_id] = group_json
         
+        self._db[root_id] = group_json
         self._root_id = root_id
            
+    def close(self):
+        """ close reader and writer handles """
+        self.log.info("Hdf5db __close")
+        if self._writer:
+            self._writer.flush()
+            self._writer.close()
+        if self._reader:
+            self._reader.close()
+        self._root_id = None
+        self._db = {}
+
     def __enter__(self):
         """ called on package init """
         self.log.info("Hdf5db __enter")
@@ -69,18 +80,15 @@ def __enter__(self):
     def __exit__(self, type, value, traceback):
         """ called on package exit """
         self.log.info("Hdf5db __exit")
-        if self._writer:
-            self._writer.flush()
-            self._writer.close()
+        self.close()
          
 
     def getObjectById(self, obj_id):
-        """ return objecct with given id """
+        """ return object with given id """
         if obj_id not in self._db:
             if self._reader:
                 # load the obj from the reader
-                kwargs = {"include_attrs": True, "include_links": True}
-                obj_json = self._reader.get_obj(obj_id, **kwargs)
+                obj_json = self._reader.getObjectById(obj_id)
                 self._db[obj_id] = obj_json
             else:
                 raise KeyError(f"obj_id: {obj_id} not found")
@@ -152,7 +160,7 @@ def getObjectIdByPath(self, h5path, parent_id=None):
     
     def getObjectByPath(self, path):
         """ Get Object JSON at given path """
-        obj_id = self.getObjectDByPath(path)
+        obj_id = self.getObjectIDByPath(path)
         obj_json = self.getObjectById(obj_id)
         return obj_json    
 
@@ -166,7 +174,6 @@ def getDtype(self, obj_id):
             raise TypeError(f"{obj_id} does not have a datatype")
         type_json = obj_json["type"]
         
-        # TBD: what about datasets using a committed type?
         dtype = createDataType(type_json)
         return dtype
  
@@ -253,7 +260,8 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
             if ctype_id not in self._db:
                 raise KeyError(f"ctype: {ctype_id} not found")
             ctype_json = self.getObjectById(ctype_id)
-            type_json = ctype_json["type"]
+            type_json = ctype_json["type"].copy()
+            type_json["id"] = ctype_id
             dtype = createDataType(type_json)
 
         # First, make sure we have a NumPy array.   
@@ -352,11 +360,6 @@ def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
         #TBD
       
 
-    """
-    createDataset - creates new dataset given shape and datatype
-    Returns obj_id
-    """
-
     def createDataset(
         self,
         shape=None,
@@ -369,6 +372,10 @@ def createDataset(
         fillvalue=None,
         cpl=None,
     ):
+        """
+        createDataset - creates new dataset given shape and datatype
+        Returns obj_id
+        """
         
         kwds = {}
         if chunks:
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
new file mode 100644
index 00000000..420909ca
--- /dev/null
+++ b/test/unit/h5py_reader_test.py
@@ -0,0 +1,126 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import os
+import time
+import errno
+import os.path as op
+import stat
+import logging
+import shutil
+from h5json import Hdf5db
+from h5json.h5py_reader import H5pyReader
+
+
+def getFile(name, tgt, ro=False):
+    src = "data/hdf5/" + name
+    logging.info("copying file to this directory: " + src)
+
+    filepath = "./out/" + tgt
+
+    if op.isfile(filepath):
+        # make sure it's writable, before we copy over it
+        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
+    shutil.copyfile(src, filepath)
+    if ro:
+        logging.info("make read-only")
+        os.chmod(filepath, stat.S_IREAD)
+    return filepath
+
+
+def removeFile(name):
+    try:
+        os.stat(name)
+    except OSError:
+        return
+        # file does not exist
+    os.remove(name)
+
+
+class H5pyReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.INFO)
+        handler = logging.FileHandler("./hdf5dbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = getFile("tall.h5", "tall.h5", ro=True)
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(h5_reader=H5pyReader(filepath, **kwargs), **kwargs) as db:
+            root_id = db.getObjectIdByPath("/")
+            root_json = db.getObjectById(root_id)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10,10])
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+            
+
+          
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
+
+
+
+
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index bee33014..2c2812dc 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -18,10 +18,6 @@
 from h5json.hdf5dtype import special_dtype, Reference
 
 
-UUID_LEN = 36  # length for uuid strings
-
-
-
 class Hdf5dbTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(Hdf5dbTest, self).__init__(*args, **kwargs)

From 4b9cb682988cbd55d9bb5ea775718b22e02755d2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 22 Feb 2025 20:03:56 -0800
Subject: [PATCH 009/129] added h5json_writer

---
 src/h5json/dset_util.py         |   2 +-
 src/h5json/h5json_writer.py     | 256 +++++++++++++++++++++++++
 src/h5json/h5py_reader.py       |  15 +-
 src/h5json/h5reader.py          |   9 +-
 src/h5json/h5writer.py          |  59 ++++++
 src/h5json/hdf5db.py            |  36 +++-
 src/h5json/objid.py             |  49 +++--
 test/unit/h5json_writer_test.py | 323 ++++++++++++++++++++++++++++++++
 test/unit/objid_test.py         |   3 +-
 9 files changed, 719 insertions(+), 33 deletions(-)
 create mode 100644 src/h5json/h5json_writer.py
 create mode 100644 src/h5json/h5writer.py
 create mode 100644 test/unit/h5json_writer_test.py

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 75854212..c5da3514 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -94,7 +94,7 @@ def make_new_dset(
     
 
     # TBD - other properties
-    dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl}
+    dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl, "attributes": {}}
     dset_json["created"] = time.time()
     dset_json["modified"] = None
 
diff --git a/src/h5json/h5json_writer.py b/src/h5json/h5json_writer.py
new file mode 100644
index 00000000..8add66bb
--- /dev/null
+++ b/src/h5json/h5json_writer.py
@@ -0,0 +1,256 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import json
+
+from .h5writer import H5Writer
+from .objid import stripId, getCollectionForId
+
+class H5JsonWriter(H5Writer):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5 
+    compatible storage medium.  
+    """
+
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        super().__init__(filepath, append=append, app_logger=app_logger)
+        self.alias_db = {}
+        self.json = {}
+        self._no_data = no_data
+        self._root_uuid = None
+       
+    def flush(self):
+        """ Write dirty items """
+        # json writer doesn't support incremental updates, so we'll wait
+        # for close to write out database
+        self.log.info("flush")
+  
+    def close(self):
+        """ close storage handle """
+        self.dumpFile()
+
+     
+    def _setAlias(self, obj_id, id_set, h5path):
+        """ add the given h5path to the object's alias list
+            If the object is a group, recurse through each hard link """
+        obj_json = self.db.getObjectById(obj_id)
+        alias_list = self.alias_db[obj_id]
+        if h5path in alias_list:
+            return  # nothing to do
+        alias_list.append(h5path)
+        if getCollectionForId(obj_id) != "groups":
+            return  # done
+        id_set.add(obj_id)  # keep track of objects we've visited to avoid loops
+        links = obj_json["links"]
+        if h5path[-1] != '/':
+            h5path += '/'
+
+        for link_name in links:
+            link_json = links[link_name]
+            if link_json["class"] == "H5L_TYPE_HARD":
+                tgt_id = link_json["id"]
+                if tgt_id in id_set:
+                    self.log.info(f"_setAlias - circular loop found")
+                else:
+                    self._setAlias(tgt_id, id_set, h5path+link_name)
+        id_set.remove(obj_id)
+
+    def getAliasList(self):
+        """ update the alias list for each object """
+        # clear exiting aliases
+        obj_ids = self.db.getCollection()
+        for obj_id in obj_ids:
+            self.alias_db[obj_id] = []
+
+        self._setAlias(self._root_uuid, set(), "/")
+
+
+    def dumpAttribute(self, obj_id, attr_name):
+        self.log.info(f"dumpAttribute: [{attr_name}]")
+        item = self.db.getAttribute(obj_id, attr_name)
+        response = {"name": attr_name}
+        response["type"] = item["type"]
+        response["shape"] = item["shape"]
+        if True:  #not self.options.D:
+            if "value" not in item:
+                self.log.warning("no value key in attribute: " + attr_name)
+            else:
+                # dump values unless header -D was passed
+                response["value"] = item["value"]  
+        return response
+
+    def dumpAttributes(self, obj_id):
+        attrs = self.db.getAttributes(obj_id)
+        self.log.info(f"dumpAttributes: {obj_id}")
+        items = []
+        for attr_name in attrs:
+            item = self.dumpAttribute(obj_id, attr_name)
+            items.append(item)
+
+        return items
+
+    def dumpLink(self, obj_id, name):
+        item = self.db.getLink(obj_id, name)
+        response = {"class": item["class"]}
+        if "id" in item:
+            tgt_id = item["id"]
+            response["collection"] = getCollectionForId(tgt_id)
+            response["id"] = stripId(tgt_id)
+
+        for key in item:
+            if key in ("id", "created", "modified"):
+                continue
+            response[key] = item[key]
+        response["title"] = name
+        return response
+
+    def dumpLinks(self, obj_id):
+        links = self.db.getLinks(obj_id)
+        items = []
+        for link_name in links:
+            item = self.dumpLink(obj_id, link_name)
+            items.append(item)
+        return items
+
+    def dumpGroup(self, obj_id):
+        item = self.db.getObjectById(obj_id)
+        response = {}
+        alias = self.alias_db[obj_id]
+        response["alias"] = alias
+         
+        if "cpl" in item:
+            item["creationProperties"] = item["cpl"]
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+        links = self.dumpLinks(obj_id)
+        if links:
+            response["links"] = links
+        return response
+
+    def dumpGroups(self):
+        groups = {}
+        item = self.dumpGroup(self._root_uuid)
+        root_uuid = stripId(self._root_uuid)
+        groups[root_uuid] = item
+        obj_ids = self.db.getCollection("groups")
+        for obj_id in obj_ids:
+            if obj_id == self._root_uuid:
+                continue
+            item = self.dumpGroup(obj_id)
+            obj_uuid = stripId(obj_id)
+            groups[obj_uuid] = item
+
+        self.json["groups"] = groups
+
+    def dumpDataset(self, obj_id):
+        response = {}
+        self.log.info("dumpDataset: " + obj_id)
+        item = self.db.getObjectById(obj_id)
+        if "alias" in item:
+            alias = item["alias"]
+            if alias:
+                self.log.info(f"dumpDataset alias: [{alias[0]}]")
+            response["alias"] = item["alias"]
+
+        response["type"] = item["type"]
+        shapeItem = item["shape"]
+        shape_rsp = {}
+        num_elements = 1
+        shape_rsp["class"] = shapeItem["class"]
+        if "dims" in shapeItem:
+            shape_rsp["dims"] = shapeItem["dims"]
+            for dim in shapeItem["dims"]:
+                num_elements *= dim
+        if "maxdims" in shapeItem:
+            maxdims = []
+            for dim in shapeItem["maxdims"]:
+                if dim == 0:
+                    maxdims.append("H5S_UNLIMITED")
+                else:
+                    maxdims.append(dim)
+            shape_rsp["maxdims"] = maxdims
+        response["shape"] = shape_rsp
+
+        if "cpl" in item:
+            response["creationProperties"] = item["cpl"]
+
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+
+        if not self._no_data:
+            if num_elements > 0:
+                value = self.db.getDatasetValues(obj_id)
+                response["value"] = value  # dump values unless header flag was passed
+            else:
+                response["value"] = []  # empty list
+        return response
+
+    def dumpDatasets(self):
+        obj_ids = self.db.getCollection("datasets")
+        if obj_ids:
+            datasets = {}
+            for obj_id in obj_ids:
+                item = self.dumpDataset(obj_id)
+                datasets[obj_id] = item
+
+            self.json["datasets"] = datasets
+
+    def dumpDatatype(self, obj_id):
+        response = {}
+        item = self.db.getObjectById(obj_id)
+        response["alias"] = item["alias"]
+        response["type"] = item["type"]
+        if "cpl" in item:
+            response["creationProperties"] = item["cpl"]
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+        return response
+
+    def dumpDatatypes(self):
+        obj_ids = self.db.getCollection("datatypes")
+        if obj_ids:
+            datatypes = {}
+            for obj_id in obj_ids:
+                item = self.dumpDatatype(obj_id)
+                datatypes[obj_id] = item
+
+            self.json["datatypes"] = datatypes
+
+
+    def dumpFile(self):
+        self._root_uuid = self.db.getObjectIdByPath("/")
+
+        db_version_info = self.db.getVersionInfo()
+
+        self.json["apiVersion"] = db_version_info["hdf5-json-version"]
+        self.json["root"] = stripId(self._root_uuid)
+        self.getAliasList()  # create alias_db with obj_id to alias list dict
+        self.dumpGroups()
+
+        self.dumpDatasets()
+
+        self.dumpDatatypes()
+
+        print(json.dumps(self.json, sort_keys=True, indent=4))
+
+
+
diff --git a/src/h5json/h5py_reader.py b/src/h5json/h5py_reader.py
index fc9bb07b..238c48e6 100644
--- a/src/h5json/h5py_reader.py
+++ b/src/h5json/h5py_reader.py
@@ -9,8 +9,6 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import logging
-
 import h5py
 import numpy as np
 
@@ -42,13 +40,16 @@ def __init__(
         filepath,
         app_logger=None
     ):
+        self._id_map = {}
+        self._addr_map = {}
+        """
         if app_logger:
             self.log = app_logger
         else:
             self.log = logging.getLogger()
-        self._id_map = {}
-        self._addr_map = {}
         self._filepath = filepath
+        """
+        super().__init__(filepath, app_logger=app_logger)
         f = h5py.File(self._filepath)
         self._f = f
         self._root_id = createObjId(obj_type="groups")
@@ -182,7 +183,7 @@ def _getLinks(self, grp):
         return items
 
     def _getGroup(self, grp, include_links=True):
-        self.log.info("_getGroup alias: [{grp.name}]")
+        self.log.info(f"_getGroup alias: [{grp.name}]")
 
         item = {"alias": grp.name}
 
@@ -192,7 +193,7 @@ def _getGroup(self, grp, include_links=True):
         return item
     
     def _getDatatype(self, ctype, include_attrs=True):
-        self.log.info("getDatatype alias: ]{ctype.name}")
+        self.log.info(f"getDatatype alias: ]{ctype.name}")
         item = {"alias": ctype.name}
         item["type"] = getTypeItem(ctype.dtype)
 
@@ -200,7 +201,7 @@ def _getDatatype(self, ctype, include_attrs=True):
 
     
     def _getDataset(self, dset):     
-        self.log.info("getDataset alias: [{dset.name}]")
+        self.log.info(f"getDataset alias: [{dset.name}]")
 
         item = {"alias": dset.name}
 
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index 6f504105..6a37a07a 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -11,7 +11,7 @@
 ##############################################################################
 from abc import ABC, abstractmethod
 
-
+import logging
 
 
 class H5Reader(ABC):
@@ -23,9 +23,14 @@ class H5Reader(ABC):
 
     def __init__(
         self,
-        filepath
+        filepath,
+        app_logger=None
     ):
         self._filepath = filepath
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
        
     @abstractmethod
     def get_root_id(self):
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
new file mode 100644
index 00000000..3aa77bb9
--- /dev/null
+++ b/src/h5json/h5writer.py
@@ -0,0 +1,59 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+from abc import ABC, abstractmethod
+import weakref
+import logging
+
+
+class H5Writer(ABC):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5 
+    compatible storage medium.  
+    """
+
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        app_logger=None
+    ):
+        self._filepath = filepath
+        self._append = append
+        self._filepath = filepath
+        self._db_ref = None
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+    
+    def set_db(self, db):
+        #TBD - use weak ref?
+        self._db_ref = weakref.ref(db)
+
+    @property
+    def db(self):
+        if not self._db_ref:
+            raise ValueError("db not available")
+        return self._db_ref()
+
+    @abstractmethod
+    def flush(self):
+        """ Write dirty items """
+        pass
+  
+    @abstractmethod
+    def close(self):
+        """ close storage handle """
+        pass
+
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 2dfd9374..283b31fa 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -18,6 +18,7 @@
 from .objid import createObjId, getCollectionForId
 from .apiversion import _apiver
 from .h5reader import H5Reader
+from .h5writer import H5Writer
 
 
 class Hdf5db:
@@ -57,15 +58,23 @@ def __init__(
             # create a root group
             group_json = {"links": {}, "attributes": {}, "cpl": {}}
             group_json["created"] = time.time()
+
+        if self._writer:
+            self._writer.set_db(self)
         
         self._db[root_id] = group_json
         self._root_id = root_id
+
+    def flush(self):
+        """ write out any changes """
+        if self._writer:
+            self._writer.flush()
            
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
-        if self._writer:
-            self._writer.flush()
+        self.flush()
+        if self._writer:                         
             self._writer.close()
         if self._reader:
             self._reader.close()
@@ -228,6 +237,17 @@ def getAttribute(self, obj_id, name, includeData=True):
         
         return attr_json
     
+    def getAttributes(self, obj_id):
+        """
+        Get attributes given an object id and name
+        returns: JSON object
+        """
+
+        obj_json = self.getObjectById(obj_id)
+        attrs = obj_json["attributes"]
+         
+        return attrs
+    
     def getAttributeValue(self, obj_id, name):
         """ Return NDArray of the given attribute value """
         attr_json = self.getAttribute(obj_id, name)
@@ -424,7 +444,8 @@ def getLinks(self, grp_id):
         grp_json = self.getObjectById(grp_id)
         if "links" not in grp_json:
             raise KeyError(f"No links - {grp_id} not a group?")
-        return grp_json["links"]
+        links = grp_json["links"]
+        return links
       
     def getLink(self, grp_id, name):
         """ Get the given link """
@@ -493,11 +514,18 @@ def createGroup(self, cpl=None):
             group_json["cpl"] = cpl
         else:
             group_json["cpl"] = {}
-        group_json["created"] = time.time
+        group_json["created"] = time.time()
         group_json["modified"] = None
         self._db[grp_id] = group_json
         return grp_id
+   
 
+    def getCollection(self, col_type=None):
+        obj_ids = []
+        for obj_id in self._db:
+            if not col_type or getCollectionForId(obj_id) == col_type:
+                obj_ids.append(obj_id)
+        return obj_ids
 
     def __len__(self):
         # return the number of objects
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index 8c62a752..e36e8a22 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -113,13 +113,42 @@ def hexRot(ch):
     return format((int(ch, base=16) + 8) % 16, "x")
 
 
+def getCollectionForId(obj_id):
+    """return groups/datasets/datatypes based on id"""
+    if not isinstance(obj_id, str):
+        raise ValueError("invalid object id")
+    
+    collection = None
+    if obj_id.startswith("g-"):
+        collection = "groups"
+    elif obj_id.startswith("d-"):
+        collection = "datasets"
+    elif obj_id.startswith("t-"):
+        collection = "datatypes"
+    else:
+        raise ValueError(f"{obj_id} not a collection id")
+    return collection
+
+def stripId(obj_id):
+    """ return just the base id without any prefix (e.g. 'g-') """
+    if len(obj_id) == UUID_LEN:
+        return obj_id  # just return as is
+    if len(obj_id) == UUID_LEN + 2:
+        return obj_id[2:]
+    else:
+        raise ValueError("unexpected obj_id: {obj_id}")
+
+
 def isRootObjId(id):
     """returns true if this is a root id (only for v2 schema)"""
     if not isSchema2Id(id):
         raise ValueError("isRootObjId can only be used with v2 ids")
     validateUuid(id)  # will throw ValueError exception if not a objid
-    if id[0] != "g":
-        return False  # not a group
+    try:
+        if getCollectionForId(id) != "groups":
+            return False  # not a group
+    except ValueError:
+        return False
     token = getIdHexChars(id)
     # root ids will have last 16 chars rotated version of the first 16
     is_root = True
@@ -358,22 +387,6 @@ def isS3ObjKey(s3key):
     return valid
 
 
-def getCollectionForId(obj_id):
-    """return groups/datasets/datatypes based on id"""
-    if not isinstance(obj_id, str):
-        raise ValueError("invalid object id")
-    collection = None
-    if obj_id.startswith("g-"):
-        collection = "groups"
-    elif obj_id.startswith("d-"):
-        collection = "datasets"
-    elif obj_id.startswith("t-"):
-        collection = "datatypes"
-    else:
-        raise ValueError("not a collection id")
-    return collection
-
-
 def validateUuid(id, obj_class=None):
     """ verify the UUID is well-formed
         schema can be:
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
new file mode 100644
index 00000000..710ffe16
--- /dev/null
+++ b/test/unit/h5json_writer_test.py
@@ -0,0 +1,323 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.h5json_writer import H5JsonWriter
+from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
+from h5json.hdf5dtype import special_dtype, Reference
+
+
+class H5JsonWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5JsonWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        # create logger
+
+        handler = logging.FileHandler("./hdf5dbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+        # self.log.propagate = False  # prevent log out going to stdout
+        self.log.info("init!")
+
+
+    def testGroup(self):
+    
+        with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=True), app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "attr1", value=[1,2,3,4])
+            db.createAttribute(root_id, "attr2", 42)
+            g1_id = db.createGroup()
+            db.createHardLink(root_id, "g1", g1_id)
+            g2_id = db.createGroup()
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+            dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32)
+            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+            db.flush()
+            
+
+
+
+    def testNullSpaceAttribute(self):
+
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in item)
+            shape_item = item["shape"]
+            self.assertTrue("class" in shape_item)
+            self.assertEqual(shape_item["class"], "H5S_NULL")
+            self.assertTrue(item["created"] > time.time() - 1.0)
+            self.assertEqual(item["modified"], None)
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(value, None)
+
+    def testScalarAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dims = ()
+            value = 42
+            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+            self.assertEqual(item["value"], 42)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+            shape = item["shape"]
+            self.assertEqual(shape["class"], "H5S_SCALAR")
+
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            
+
+    def testFixedStringAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            value = "Hello, world!"
+            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["length"], 13)
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+            ret_value = db.getAttributeValue(root_id, "A1")
+       
+
+    def testVlenAsciiAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=bytes)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+
+    def testVlenUtf8Attribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=str)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+
+ 
+
+    def testIntAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            value = [2, 3, 5, 7, 11]
+            db.createAttribute(root_id, "A1", value, dtype=np.int16)
+            item = db.getAttribute(root_id, "A1")
+            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(item_shape["dims"], [5,])
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+
+    def testCreateReferenceAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+
+            dt = special_dtype(ref=Reference)
+
+            ds1_ref = "datasets/" + dset_id
+            value = [ds1_ref,]
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            item = db.getAttribute(root_id, "A1")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in attr)
+            
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+            attr_value = item["value"]
+            self.assertEqual(len(attr_value), 1)
+            self.assertEqual(attr_value[0], ds1_ref)
+
+    def testCreateVlenReferenceAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+            grp_id = db.createGroup()
+            db.createHardLink(root_id, "G1", grp_id)
+
+            dt_base = special_dtype(ref=Reference)
+            dt = special_dtype(vlen=dt_base)
+             
+            ds1_ref = "datasets/" + dset_id
+            grp_ref = "groups/" + grp_id
+            ref_arr = np.zeros((2,), dtype=dt_base)
+            ref_arr[0] = ds1_ref
+            ref_arr[1] = grp_ref
+            vlen_arr = np.zeros((), dtype=dt)
+            vlen_arr[()] = ref_arr
+             
+            db.createAttribute(root_id, "A1", vlen_arr)
+            item = db.getAttribute(root_id, "A1")
+
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_VLEN")
+            self.assertEqual(item_type["size"], "H5T_VARIABLE")
+            base_type = item_type["base"]
+            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+            
+
+    def testCommittedType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dt = np.dtype("S15")
+             
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item_type["length"], 15)
+
+            # create an attribute using the committed type
+            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], "hello world!")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_STRING")
+            self.assertEqual(attr_type["length"], 15)
+            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+
+
+    def testCommittedCompoundType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dt_str = special_dtype(vlen=str)
+            fields = []
+            fields.append(("field_1", np.dtype(">i8")))
+            fields.append(("field_2", ">f8"))
+            fields.append(("field_3", np.dtype("S15")))
+            fields.append(("field_4", dt_str))
+            dt = np.dtype(fields)
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            self.assertEqual(item["modified"], None)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_COMPOUND")
+            fields = item_type["fields"]
+            self.assertEqual(len(fields), 4)
+
+            # create an attribute using the committed type
+            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], list(attr_value))
+            attr_shape = attr["shape"]
+            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+            
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(value, np.ndarray))
+   
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
index af4ac21e..1357c184 100755
--- a/test/unit/objid_test.py
+++ b/test/unit/objid_test.py
@@ -12,7 +12,7 @@
 import unittest
 
 from h5json.objid import isRootObjId, isValidUuid, validateUuid
-from h5json.objid import createObjId, getCollectionForId
+from h5json.objid import createObjId, getCollectionForId, stripId
 from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
 
 
@@ -134,6 +134,7 @@ def testGetCollection(self):
         self.assertEqual(getCollectionForId(group_id), "groups")
         self.assertEqual(getCollectionForId(dataset_id), "datasets")
         self.assertEqual(getCollectionForId(ctype_id), "datatypes")
+        self.assertEqual(stripId(group_id), "314d61b8-9954-11e6-a733-3c15c2da029e")
         try:
             getCollectionForId(bad_id)
             self.assertTrue(False)

From bad401207b724db71e6420fbebfc0ac14c6841bc Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 22 Feb 2025 20:22:54 -0800
Subject: [PATCH 010/129] create reader and writer packages

---
 pyproject.toml                           | 3 ++-
 src/h5json/hdf5db.py                     | 6 +++---
 src/h5json/reader/__init__.py            | 0
 src/h5json/{ => reader}/h5py_reader.py   | 8 ++++----
 src/h5json/{ => reader}/h5reader.py      | 0
 src/h5json/writer/__init__.py            | 0
 src/h5json/{ => writer}/h5json_writer.py | 3 +--
 src/h5json/{ => writer}/h5writer.py      | 0
 test/unit/h5json_writer_test.py          | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)
 create mode 100644 src/h5json/reader/__init__.py
 rename src/h5json/{ => reader}/h5py_reader.py (98%)
 rename src/h5json/{ => reader}/h5reader.py (100%)
 create mode 100644 src/h5json/writer/__init__.py
 rename src/h5json/{ => writer}/h5json_writer.py (99%)
 rename src/h5json/{ => writer}/h5writer.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 5ddb024f..4ea50247 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,6 @@ dependencies = [
     "numpy >= 2.0; python_version>='3.9'",
     "jsonschema >=4.4.0",
     "tomli; python_version<'3.11'",
-    "numpy >=1.20,<2.0.0; python_version=='3.8'",
 ]
 
 dynamic = ["version"]
@@ -53,6 +52,8 @@ build-backend = "setuptools.build_meta"
 package-dir = { "" = "src" }
 packages = [
     "h5json",
+    "h5json.reader",
+    "h5json.writer",
     "h5json.h5tojson",
     "h5json.jsontoh5",
     "h5json.schema",
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 283b31fa..39de3b60 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -17,8 +17,8 @@
 from .dset_util import make_new_dset, resize_dataset
 from .objid import createObjId, getCollectionForId
 from .apiversion import _apiver
-from .h5reader import H5Reader
-from .h5writer import H5Writer
+from .reader.h5reader import H5Reader
+from .writer.h5writer import H5Writer
 
 
 class Hdf5db:
@@ -37,7 +37,7 @@ def getVersionInfo():
     def __init__(
         self,
         h5_reader: H5Reader = None,
-        h5_writer = None,
+        h5_writer: H5Writer = None,
         app_logger = None,
     ):
         if app_logger:
diff --git a/src/h5json/reader/__init__.py b/src/h5json/reader/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/h5json/h5py_reader.py b/src/h5json/reader/h5py_reader.py
similarity index 98%
rename from src/h5json/h5py_reader.py
rename to src/h5json/reader/h5py_reader.py
index 238c48e6..dc9220ae 100644
--- a/src/h5json/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -12,10 +12,10 @@
 import h5py
 import numpy as np
 
-from .objid import createObjId
-from .hdf5dtype import getTypeItem
-from .array_util import bytesArrayToList
-from .h5reader import H5Reader
+from ..objid import createObjId
+from ..hdf5dtype import getTypeItem
+from ..array_util import bytesArrayToList
+from ..h5reader import H5Reader
 
 
 class H5pyReader(H5Reader):
diff --git a/src/h5json/h5reader.py b/src/h5json/reader/h5reader.py
similarity index 100%
rename from src/h5json/h5reader.py
rename to src/h5json/reader/h5reader.py
diff --git a/src/h5json/writer/__init__.py b/src/h5json/writer/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/h5json/h5json_writer.py b/src/h5json/writer/h5json_writer.py
similarity index 99%
rename from src/h5json/h5json_writer.py
rename to src/h5json/writer/h5json_writer.py
index 8add66bb..81f9b4f9 100644
--- a/src/h5json/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -13,7 +13,7 @@
 import json
 
 from .h5writer import H5Writer
-from .objid import stripId, getCollectionForId
+from ..objid import stripId, getCollectionForId
 
 class H5JsonWriter(H5Writer):
     """
@@ -21,7 +21,6 @@ class H5JsonWriter(H5Writer):
     compatible storage medium.  
     """
 
-
     def __init__(
         self,
         filepath,
diff --git a/src/h5json/h5writer.py b/src/h5json/writer/h5writer.py
similarity index 100%
rename from src/h5json/h5writer.py
rename to src/h5json/writer/h5writer.py
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index 710ffe16..47ff3b1e 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -14,7 +14,7 @@
 import logging
 import numpy as np
 from h5json import Hdf5db
-from h5json.h5json_writer import H5JsonWriter
+from h5json.writer.h5json_writer import H5JsonWriter
 from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
 from h5json.hdf5dtype import special_dtype, Reference
 

From c5c28a42e9ff8e1957ad56e0805d590f84676400 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 25 Feb 2025 22:31:05 -0800
Subject: [PATCH 011/129] basic dataset read/write methods added

---
 src/h5json/dset_util.py            |   4 +-
 src/h5json/hdf5db.py               | 296 +++++++---
 src/h5json/reader/h5py_reader.py   |  15 +-
 src/h5json/reader/h5reader.py      |   2 +-
 src/h5json/selections.py           | 834 +++++++++++++++++++++++++++++
 src/h5json/writer/h5json_writer.py |  24 +-
 test/unit/h5json_writer_test.py    |  18 +-
 test/unit/hdf5db_test.py           |  74 ++-
 8 files changed, 1159 insertions(+), 108 deletions(-)
 create mode 100644 src/h5json/selections.py

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index c5da3514..7a3a7aa3 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -107,7 +107,9 @@ def resize_dataset(dset_json, shape):
         raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
     if len(shape_class["dims"]) != len(shape):
         raise ValueError("Resize shape parameter doesn't match dataset's rank")
-    # TBD: validate shape
+    if shape_json["dims"] == list(shape):
+        # no change, just return
+        return
     shape_json["dims"] = list(shape)
     dset_json["modified"] = time.time()
         
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 39de3b60..991e7561 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -16,6 +16,7 @@
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import make_new_dset, resize_dataset
 from .objid import createObjId, getCollectionForId
+from . import selections
 from .apiversion import _apiver
 from .reader.h5reader import H5Reader
 from .writer.h5writer import H5Writer
@@ -49,6 +50,9 @@ def __init__(
 
         self._reader = h5_reader
         self._writer = h5_writer
+        
+        self._new_objects = set()  # set of obj_id's
+        self._dirty_objects = set()  # set of obj_id's
     
         if self._reader:
             root_id = self._reader.get_root_id()
@@ -65,19 +69,70 @@ def __init__(
         self._db[root_id] = group_json
         self._root_id = root_id
 
+    @property
+    def db(self):
+        """ return object db dictionary """
+        return self._db
+    
+    @property
+    def reader(self):
+        """ return reader instance """
+        return self._reader
+    
+    @property
+    def writer(self):
+        """ return writer instance """
+        return self._writer
+    
+    @property
+    def root_id(self):
+        """ return root uuid """
+        return self._root_id
+    
+    def is_new(self, obj_id):
+        """ return true if this is a new object (has not been persisted) """
+        return obj_id in self._new_objects
+    
+    def is_dirty(self, obj_id):
+        """ return true if this object has been modified """
+        if self.is_new(obj_id):
+            return True
+        return obj_id in self._dirty_objects
+    
+    def make_dirty(self, obj_id):
+        """ Mark the object as dirty and update the lastModified timestamp """
+        if self.is_new(obj_id):
+            # object hasn't been initially written yet, just return
+            return
+        if obj_id not in self.db:
+            self.log.error("make dirty called on deleted object")
+            raise KeyError(f"obj_id: {obj_id} not found")
+        if self.db[obj_id] is None:
+            # object deleted, just return
+            return
+        obj_json = self.db[obj_id]
+        now = time.time()
+        obj_json["lastModified"] = now
+        self._dirty_objects.add(obj_id)
+
+
     def flush(self):
         """ write out any changes """
-        if self._writer:
-            self._writer.flush()
+        if not self.writer:
+            return  # nothing to do
+        if self.writer.flush():
+            # reset new and dirty sets
+            self._new_objects = set()
+            self._dirty_objects = set()
            
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
         self.flush()
-        if self._writer:                         
-            self._writer.close()
-        if self._reader:
-            self._reader.close()
+        if self.writer:                         
+            self.writer.close()
+        if self.reader:
+            self.reader.close()
         self._root_id = None
         self._db = {}
 
@@ -94,14 +149,14 @@ def __exit__(self, type, value, traceback):
 
     def getObjectById(self, obj_id):
         """ return object with given id """
-        if obj_id not in self._db:
-            if self._reader:
+        if obj_id not in self.db:
+            if self.reader:
                 # load the obj from the reader
-                obj_json = self._reader.getObjectById(obj_id)
-                self._db[obj_id] = obj_json
+                obj_json = self.reader.getObjectById(obj_id)
+                self.db[obj_id] = obj_json
             else:
                 raise KeyError(f"obj_id: {obj_id} not found")
-        obj_json = self._db[obj_id]
+        obj_json = self.db[obj_id]
 
         return obj_json
 
@@ -110,10 +165,10 @@ def getObjectIdByPath(self, h5path, parent_id=None):
         otherwise the root_id """
 
         if h5path == "/":
-            return self._root_id  # just return root id
+            return self.root_id  # just return root id
 
         if parent_id is None:
-            parent_id = self._root_id
+            parent_id = self.root_id
         self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}")
         
         obj_json = self.getObjectById(parent_id)
@@ -175,9 +230,9 @@ def getObjectByPath(self, path):
 
     def getDtype(self, obj_id):
         """ Return numpy data type for given object id """
-        if obj_id not in self._db:
+        if obj_id not in self.db:
             raise KeyError(f"{obj_id} not found")
-        obj_json = self._db[obj_id]
+        obj_json = self.db[obj_id]
         if "type" not in obj_json:
             # group id?
             raise TypeError(f"{obj_id} does not have a datatype")
@@ -196,7 +251,7 @@ def createCommittedType(self, datatype, cpl=None):
         if cpl is None:
             cpl = {}
          
-        ctype_id = createObjId(obj_type="datatypes", root_id=self._root_id)
+        ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id)
         if isinstance(datatype, np.dtype):
             dt = datatype
         else:
@@ -207,7 +262,8 @@ def createCommittedType(self, datatype, cpl=None):
         ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
         ctype_json["created"] = time.time()
         ctype_json["modified"] = None
-        self._db[ctype_id] = ctype_json
+        self.db[ctype_id] = ctype_json
+        self._new_objects.add(ctype_id)
         return ctype_id
   
 
@@ -224,15 +280,19 @@ def getAttribute(self, obj_id, name, includeData=True):
             msg = f"Attribute: [{name }] not found in object: {obj_id}"
             self.log.info(msg)
             return None
+        if attrs[name] == None:
+            msg = f"Attribute: [{name}] has been deleted"
+            self.log.info(None)
+            return None
         
         attr_json = attrs[name]
 
         if includeData and "value" not in attr_json:
             # Reader may not have pre-loaded large attributes
             # fetch it now
-            if not self._reader:
+            if not self.reader:
                 raise RuntimeError(f"Expected to find value for attribute {name} of {obj_id}")
-            attr_json = self._reader.get_attribute(obj_id, name)
+            attr_json = self.reader.get_attribute(obj_id, name)
             attr_json["value"] = attr_json  # this will update the _db
         
         return attr_json
@@ -245,8 +305,12 @@ def getAttributes(self, obj_id):
 
         obj_json = self.getObjectById(obj_id)
         attrs = obj_json["attributes"]
+        names = []
+        for name in attrs:
+            if attrs[name] != None:
+                names.append(name)
          
-        return attrs
+        return names
     
     def getAttributeValue(self, obj_id, name):
         """ Return NDArray of the given attribute value """
@@ -277,7 +341,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
             ctype_id = dtype[len("datatypes/"):]
             if getCollectionForId(ctype_id) != "datatypes":
                 raise TypeError(f"unexpected dtype value for createAttribute: {dtype}")
-            if ctype_id not in self._db:
+            if ctype_id not in self.db:
                 raise KeyError(f"ctype: {ctype_id} not found")
             ctype_json = self.getObjectById(ctype_id)
             type_json = ctype_json["type"].copy()
@@ -345,21 +409,21 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         obj_json = self.getObjectById(obj_id)
         attrs_json = obj_json["attributes"]
         if name in attrs_json:
-                # replace, update modified timestamp
+            # replace, keep, created timestamp
             created = attrs_json["created"]
-            modified = time.time()
         else:
             created = time.time()
-            modified = None
         type_json = getTypeItem(dtype)
         # finally put it all together...
         attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
         attr_json["created"] = created
-        attr_json["modified"] = modified
 
         # slot into the obj_json["attrs"]
         attrs_json[name] = attr_json
 
+        # mark object as dirty
+        self.make_dirty(obj_id)
+
 
     def deleteAttribute(self, obj_id, name):
         """ delete the given attribute """
@@ -367,18 +431,93 @@ def deleteAttribute(self, obj_id, name):
         attrs_json = obj_json["attributes"]
         if name not in attrs_json:
             raise KeyError(f"attribute [{name}] not found in {obj_id}")
-        del attrs_json[name]
+        attrs_json[name] = None  # mark key for deletion
+        
+        self.make_dirty(obj_id)
 
 
-    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+    def getDatasetValues(self, dset_id, sel):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
         number of elements as the rank of the dataset.
         """
-        self.log.info(f"getDatasetValues obj_id: {obj_id}, slices: {slices} format: {format}")
-        #TBD
-      
+        self.log.info(f"getDatasetValues dset_id: {dset_id}, sel: {sel}")
+        dset_json = self.getObjectById(dset_id)
+        shape_json = dset_json["shape"]
+        if not isinstance(sel, selections.Selection):
+            raise TypeError("Expected Selection class")
+       
+        if shape_json["class"] == "H5S_NULL":
+            return None
+
+        if shape_json["class"] == "H5S_SCALAR":
+            if sel.select_type != sel.H5S_SELECT_ALL:
+                # TBD: support other selection types
+                raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
+            if sel.shape != ():
+                raise ValueError("Selection shape does not match dataset shape")
+        else:
+            dims = tuple(shape_json["dims"])
+            if sel.shape != dims:
+                raise ValueError("Selection shape does not match dataset shape")
+        rank = len(dims)  
+            
+        dtype = self.getDtype(dset_id)
+        if self.reader:
+            arr = self.reader.getDatasetValues(dset_id, sel)
+        else:
+            # TBD: Initialize with fill value if non-zero
+            arr = np.zeros(sel.shape, dtype=dtype)
+
+        if "updates" in dset_json:
+            # apply any non-flushed changes that intersect the current selection
+            updates = dset_json["updates"]
+            for (update_sel, update_val) in updates:
+                sel_inter = selections.intersect(sel, update_sel)
+                if sel_inter.nselect == 0:
+                    continue
+                # update portion of arr, that intersects update_val
+                slices = []
+                for dim in range(rank):
+                    start = sel_inter.start[dim] - sel.start[dim]
+                    stop = start + sel_inter.count[dim]
+                    slices.append(slice(start, stop, 1))
+                slices = tuple(slices)
+                arr[slices] = update_val
+
+        return arr
+    
+    def setDatasetValues(self, dset_id, sel, arr):
+        """
+        Write the given ndarray to the dataset using the selection
+        """
+        dset_json = self.getObjectById(dset_id)
+        shape_json = dset_json["shape"]
+        if not isinstance(sel, selections.Selection):
+            raise TypeError("Expected Selection class")
+        if sel.select_type not in (selections.H5S_SELECT_HYPERSLABS, selections.H5S_SELECT_ALL):
+            # TBD: support other selection types
+            raise ValueError("Only hyperslab selections are currently supported")
+        if not isinstance(arr, np.ndarray):
+            raise TypeError("Expected ndarray for data value")
+        if shape_json["class"] == "H5S_NULL":
+            raise ValueError("writing to null space dataset not supported")
+        if shape_json["class"] == "H5S_SCALAR":
+            if sel.shape != ():
+                raise ValueError("Selection shape does not match dataset shape")
+            if len(arr.shape) > 0:
+                raise TypeError("Expected scalar ndarray for scalar dataset")
+        else:
+            dims = tuple(shape_json["dims"])
+            if sel.shape != dims:
+                raise ValueError("Selection shape does not match dataset shape")
+        if "updates" not in dset_json or sel.select_type == selections.H5S_SELECT_ALL:
+            # for select all, throw out any existing updates since this will overwrite them
+            dset_json["updates"] = []
+        updates = dset_json["updates"]
+        updates.append((sel, arr.copy()))
+        self.make_dirty(dset_id)
 
     def createDataset(
         self,
@@ -414,8 +553,9 @@ def createDataset(
             kwds["cpl"] = cpl
         dset_json = make_new_dset(shape=shape, dtype=dtype, **kwds)
  
-        dset_id = createObjId("datasets", root_id=self._root_id)   
-        self._db[dset_id] = dset_json 
+        dset_id = createObjId("datasets", root_id=self.root_id)   
+        self.db[dset_id] = dset_json 
+        self._new_objects.add(dset_id)
         return dset_id
 
 
@@ -426,18 +566,25 @@ def resizeDataset(self, dset_id, shape):
         self.log.info(f"resizeDataset {dset_id}, {shape}")
         
         dset_json = self.getObjectById(dset_id)  # will throw exception if not found
-        resize_dataset(dset_json, shape)
+        if resize_dataset(dset_json, shape):
+            self._dirty_objects.add(dset_id)
          
 
     def deleteObject(self, obj_id):
         """ Delete the given object """
         self.log.info(f"deleteObject: {obj_id}")
-        if obj_id not in self._db:
+        if obj_id not in self.db:
             raise KeyError(f"Object {obj_id} not found for deletion")
-        if obj_id == self._root_id:
+        if obj_id == self.root_id:
             raise KeyError("Root group cannot be deleted")
-        del self._db[obj_id]
-        # TBD: add to pending deleted items
+        self.db[obj_id] = None
+        
+        if obj_id in self._new_objects:
+            self._new_objects.remove(obj_id)
+
+        if obj_id in self._dirty_objects:
+            self._dirty_objects.remove(obj_id)
+
         
     def getLinks(self, grp_id):
         """ Get the links for the given group """
@@ -445,100 +592,113 @@ def getLinks(self, grp_id):
         if "links" not in grp_json:
             raise KeyError(f"No links - {grp_id} not a group?")
         links = grp_json["links"]
-        return links
+        names = []
+        for name in links:
+            if links[name] != None:
+                names.append(name)
+        return names
       
     def getLink(self, grp_id, name):
         """ Get the given link """
         
-        links = self.getLinks(grp_id)
+        obj_json = self.getObjectById(grp_id)
+        links = obj_json["links"]
         if name not in links:
-            raise KeyError(f"Link [{name}] not found in {grp_id}")
+            self.log.info(f"Link [{name}] not found in {grp_id}")
+            return None
+        if links[name] == None:
+            self.log.info(f"Link {name} in {grp_id} has been deleted")
+            return None
+
         return links[name]
     
+    def _addLink(self, grp_id, name, link_json):
+        obj_json = self.getObjectById(grp_id)
+        links = obj_json["links"]
+        links[name] = link_json
+        self.make_dirty(grp_id)
+    
     def createHardLink(self, grp_id, name, tgt_id):
         """ Create a new hardlink """
-        links = self.getLinks(grp_id)
-        if name in links:
-            self.deleteLink(grp_id, name)
         link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id}
         link_json["created"] = time.time()
-        links[name] = link_json
+        self._addLink(grp_id, name, link_json)
 
     def createSoftLink(self, grp_id, name, h5path):
         """ Create a soft link """
-        links = self.getLinks(grp_id)
-        if name in links:
-            self.deleteLink(grp_id, name)
         link_json = {"class": "H5L_TYPE_SOFT", "h5path": h5path}
         link_json["created"] = time.time()
-        links[name] = link_json
+        self._addLink(grp_id, name, link_json)
 
     def createCustomLink(self, grp_id, name, link_json):
         """ create a custom link """
-        links = self.getLinks(grp_id)
-        if name in links:
-            self.deleteLink(grp_id, name)
         if link_json.get("class") != "H5L_TYPE_USER_DEFINED":
             link_json["class"] = "H5L_TYPE_USER_DEFINED"
         link_json["created"] = time.time()
-        links[name] = link_json
-
+        self._addLink(grp_id, name, link_json)
 
     def createExternalLink(self, grp_id, name, h5path, filepath):
         """ Create a external link link """
-        links = self.getLinks(grp_id)
-        if name in links:
-            self.deleteLink(grp_id, name)
         link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath}
         link_json["created"] = time.time()
-        links[name] = link_json
+        self._addLink(grp_id, name, link_json)
  
     def deleteLink(self, grp_id, name):
         """ Delete the given link """
         grp_json = self.getObjectById(grp_id)
         if "links" not in grp_json:
             raise KeyError(f"No links - {grp_id} not a group?")
-        links = self.getLinks(grp_id)
+        links = grp_json["links"]
         if name not in links:
             raise KeyError(f"Link [{name}] not found in {grp_id}")
-        del links[name]
-        grp_json["modified"] = time.time()
+        links[name] = None  # mark for deletion
+        self.make_dirty(grp_id)
  
 
     def createGroup(self, cpl=None):
         """ Create a new group """
 
-        grp_id = createObjId("groups", root_id=self._root_id)
+        grp_id = createObjId("groups", root_id=self.root_id)
         group_json = {"attributes": {}, "links": {}}
         if cpl:
             group_json["cpl"] = cpl
         else:
             group_json["cpl"] = {}
         group_json["created"] = time.time()
-        group_json["modified"] = None
-        self._db[grp_id] = group_json
+        self.db[grp_id] = group_json
+        self._new_objects.add(grp_id)
         return grp_id
    
 
     def getCollection(self, col_type=None):
         obj_ids = []
-        for obj_id in self._db:
+        for obj_id in self.db:
+            if self.db[obj_id] == None:
+                # skip deleted objects
+                continue
             if not col_type or getCollectionForId(obj_id) == col_type:
                 obj_ids.append(obj_id)
         return obj_ids
 
     def __len__(self):
         # return the number of objects
-        return len(self._db)
-
+        count = 0
+        for obj_id in self.db:
+            # skip deleted objects
+            if self.db[obj_id] != None:
+                count += 1
+        return count
 
     def __iter__(self):
         """ Iterate over object ids """
 
-        for obj_id in self._db:
+        for obj_id in self.db:
+            if self.db[obj_id] == None:
+                # skip deleted objects
+                continue
             yield obj_id
 
 
     def __contains__(self, obj_id):
         """ Test if a obj id  exists """
-        return obj_id in self._db
+        return obj_id in self.db and self.db[obj_id] != None
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index dc9220ae..4e7c9b55 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -42,13 +42,10 @@ def __init__(
     ):
         self._id_map = {}
         self._addr_map = {}
-        """
         if app_logger:
             self.log = app_logger
         else:
             self.log = logging.getLogger()
-        self._filepath = filepath
-        """
         super().__init__(filepath, app_logger=app_logger)
         f = h5py.File(self._filepath)
         self._f = f
@@ -264,11 +261,19 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         return obj_json
 
 
-    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+    def getDatasetValues(self, dset_id, selection):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
         number of elements as the rank of the dataset.
         """
-        pass
+        dset = self._id_map[dset_id]
+        self.log.info(f"getDatasetValues: {dset_id}")
+        if dset.shape is None:
+            # TBD: return something like h5py.Empty in this case?
+            return None
+        arr = dset[selection]
+        return arr
+
+       
 
diff --git a/src/h5json/reader/h5reader.py b/src/h5json/reader/h5reader.py
index 6a37a07a..69a45d07 100644
--- a/src/h5json/reader/h5reader.py
+++ b/src/h5json/reader/h5reader.py
@@ -51,7 +51,7 @@ def getAttribute(self, obj_id, name, includeData=True):
         pass
 
     @abstractmethod
-    def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"):
+    def getDatasetValues(self, obj_id, selection):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
new file mode 100644
index 00000000..4d700d94
--- /dev/null
+++ b/src/h5json/selections.py
@@ -0,0 +1,834 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+# We use __getitem__ side effects, which pylint doesn't like.
+# pylint: disable=pointless-statement
+
+"""
+    High-level access to HDF5 dataspace selections
+"""
+
+from __future__ import absolute_import
+
+import numpy as np
+
+H5S_SEL_POINTS = 0
+H5S_SELECT_SET = 1
+H5S_SELECT_APPEND = 2
+H5S_SELECT_PREPEND = 3
+H5S_SELECT_OR = 4
+H5S_SELECT_NONE = 5
+H5S_SELECT_ALL = 6
+H5S_SELECT_HYPERSLABS = 7
+H5S_SELECT_NOTB = 8
+H5S_SELLECT_FANCY = 9
+
+
+def select(obj, args):
+    """ High-level routine to generate a selection from arbitrary arguments
+    to __getitem__.  The arguments should be the following:
+
+    obj
+        Datatset object
+
+    args
+        Either a single argument or a tuple of arguments.  See below for
+        supported classes of argument.
+
+    Argument classes:
+
+    Single Selection instance
+        Returns the argument.
+
+    numpy.ndarray
+        Must be a boolean mask.  Returns a PointSelection instance.
+
+    RegionReference
+        Returns a Selection instance.
+
+    Indices, slices, ellipses only
+        Returns a SimpleSelection instance
+
+    Indices, slices, ellipses, lists or boolean index arrays
+        Returns a FancySelection instance.
+    """
+    if not isinstance(args, tuple):
+        args = (args,)
+
+    if hasattr(obj, "shape") and obj.shape == ():
+        # scalar object
+        sel = ScalarSelection(obj.shape, args)
+        return sel
+
+    # "Special" indexing objects
+    if len(args) == 1:
+
+        arg = args[0]
+
+        if isinstance(arg, Selection):
+            if arg.shape != obj.shape:
+                raise TypeError("Mismatched selection shape")
+            return arg
+
+        elif isinstance(arg, np.ndarray) or isinstance(arg, list):
+            sel = PointSelection(obj.shape)
+            sel[arg]
+            return sel
+        """
+        #todo - RegionReference
+        elif isinstance(arg, h5r.RegionReference):
+            sid = h5r.get_region(arg, dsid)
+            if shape != sid.shape:
+                raise TypeError("Reference shape does not match dataset shape")
+
+            return Selection(shape, spaceid=sid)
+        """
+
+    for a in args:
+        use_fancy = False
+        if isinstance(a, np.ndarray):
+            use_fancy = True
+        elif a is []:
+            use_fancy = True
+        elif not isinstance(a, slice) and a is not Ellipsis:
+            try:
+                int(a)
+            except Exception:
+                use_fancy = True
+        if use_fancy and hasattr(obj, "shape"):
+            sel = FancySelection(obj.shape)
+            sel[args]
+            return sel
+    if hasattr(obj, "shape"):
+        sel = SimpleSelection(obj.shape)
+    else:
+        sel = SimpleSelection(obj)
+    sel[args]
+    return sel
+
+def intersect(s1, s2):
+    """ Return the intersection of two selections """
+    # TBD: this is currently only working for simple selections with stride 1
+    valid_select_types = (H5S_SELECT_HYPERSLABS, H5S_SELECT_ALL)
+    if not isinstance(s1, Selection):
+        raise TypeError("Expected selection type for first arg")
+    if not isinstance(s2, Selection):
+        raise TypeError("Expected selection type for second arg")
+    if s1.select_type not in valid_select_types:
+        raise TypeError("Expected hyperslab selection for first arg")
+    if s2.select_type not in valid_select_types:
+        raise TypeError("Expected hyperslab selection for second arg")
+    if s1.shape != s2.shape:
+        raise ValueError("selections have incompatible shapes")
+    
+    slices = []
+    rank = len(s1.shape)
+    for dim in range(rank):
+        start = max(s1.start[dim], s2.start[dim])
+        stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim])
+        msg = "stepped slices not currently supported"
+        if s1.step[dim] > 1:
+            raise ValueError(msg)
+        if s2.step[dim] > 1:
+            raise ValueError("stepped slices not currently supported")
+        if start > stop:
+            stop = start
+        slices.append(slice(start, stop, 1))
+    slices = tuple(slices)
+
+    return select(s1.shape, slices)
+
+
+class Selection(object):
+
+    """
+        Base class for HDF5 dataspace selections.  Subclasses support the
+        "selection protocol", which means they have at least the following
+        members:
+
+        __init__(shape)   => Create a new selection on "shape"-tuple
+        __getitem__(args) => Perform a selection with the range specified.
+                             What args are allowed depends on the
+                             particular subclass in use.
+
+        id (read-only) =>      h5py.h5s.SpaceID instance
+        shape (read-only) =>   The shape of the dataspace.
+        mshape  (read-only) => The shape of the selection region.
+                               Not guaranteed to fit within "shape", although
+                               the total number of points is less than
+                               product(shape).
+        nselect (read-only) => Number of selected points.  Always equal to
+                               product(mshape).
+
+        broadcast(target_shape) => Return an iterable which yields dataspaces
+                                   for read, based on target_shape.
+
+        The base class represents "unshaped" selections (1-D).
+    """
+
+    def __init__(self, shape, *args, **kwds):
+        """ Create a selection.   """
+
+        shape = tuple(shape)
+        self._shape = shape
+
+        self._select_type = H5S_SELECT_ALL
+
+    @property
+    def select_type(self):
+        """ SpaceID instance """
+        return self._select_type
+
+    @property
+    def shape(self):
+        """ Shape of whole dataspace """
+        return self._shape
+
+    @property
+    def nselect(self):
+        """ Number of elements currently selected """
+
+        return self.getSelectNpoints()
+
+    @property
+    def mshape(self):
+        """ Shape of selection (always 1-D for this class) """
+        return (self.nselect,)
+
+    def getSelectNpoints(self):
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def broadcast(self, target_shape):
+        """ Get an iterable for broadcasting """
+        if np.product(target_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+        yield self._id
+
+    def __getitem__(self, args):
+        raise NotImplementedError("This class does not support indexing")
+
+    def __repr__(self):
+        return f"Selection(shape:{self._shape})"
+
+
+class PointSelection(Selection):
+
+    """
+        Represents a point-wise selection.  You can supply sequences of
+        points to the three methods append(), prepend() and set(), or a
+        single boolean array to __getitem__.
+    """
+    def __init__(self, shape, *args, **kwds):
+        """ Create a Point selection.   """
+        Selection.__init__(self, shape, *args, **kwds)
+        self._points = []
+
+    @property
+    def points(self):
+        """ selection points """
+        return self._points
+
+    def getSelectNpoints(self):
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        elif self._select_type == H5S_SEL_POINTS:
+            dims = self._shape
+            rank = len(dims)
+            if len(self._points) == rank and not type(self._points[0]) in (list, tuple, np.ndarray):
+                npoints = 1
+            else:
+                npoints = len(self._points)
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def _perform_selection(self, points, op):
+        """ Internal method which actually performs the selection """
+        if isinstance(points, np.ndarray) or True:
+            points = np.asarray(points, order='C', dtype='u8')
+            if len(points.shape) == 1:
+                # points.shape = (1,points.shape[0])
+                pass
+
+        if self._select_type != H5S_SEL_POINTS:
+            op = H5S_SELECT_SET
+        self._select_type = H5S_SEL_POINTS
+
+        if op == H5S_SELECT_SET:
+            self._points = points
+        elif op == H5S_SELECT_APPEND:
+            self._points.extent(points)
+        elif op == H5S_SELECT_PREPEND:
+            tmp = self._points
+            self._points = points
+            self._points.extend(tmp)
+        else:
+            raise ValueError("Unsupported operation")
+
+    # def _perform_list_selection(points, H5S_SELECT_SET):
+
+    def __getitem__(self, arg):
+        """ Perform point-wise selection from a NumPy boolean array """
+        if isinstance(arg, list):
+            points = arg
+        else:
+            if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'):
+                raise TypeError("PointSelection __getitem__ only works with bool arrays")
+            if not arg.shape == self._shape:
+                raise TypeError("Boolean indexing array has incompatible shape")
+
+            points = np.transpose(arg.nonzero())
+        self.set(points)
+        return self
+
+    def append(self, points):
+        """ Add the sequence of points to the end of the current selection """
+        self._perform_selection(points, H5S_SELECT_APPEND)
+
+    def prepend(self, points):
+        """ Add the sequence of points to the beginning of the current selection """
+        self._perform_selection(points, H5S_SELECT_PREPEND)
+
+    def set(self, points):
+        """ Replace the current selection with the given sequence of points"""
+        """
+        if isinstance(points, list):
+            # selection with list of points
+            self._perform_list_selection(points, H5S_SELECT_SET)
+
+        else:
+            # selection with boolean ndarray
+        """
+        self._perform_selection(points, H5S_SELECT_SET)
+
+    def __repr__(self):
+        return f"PointSelection(shape:{self._shape}, {len(self._points)} points)"
+
+
+class SimpleSelection(Selection):
+
+    """ A single "rectangular" (regular) selection composed of only slices
+        and integer arguments.  Can participate in broadcasting.
+    """
+
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._mshape
+
+    @property
+    def start(self):
+        return self._sel[0]
+
+    @property
+    def count(self):
+        return self._sel[1]
+
+    @property
+    def step(self):
+        return self._sel[2]
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        rank = len(self._shape)
+        self._sel = ((0,) * rank, self._shape, (1,) * rank, (False,) * rank)
+        self._mshape = self._shape
+        self._select_type = H5S_SELECT_ALL
+
+    def __getitem__(self, args):
+
+        if not isinstance(args, tuple):
+            args = (args,)
+
+        if self._shape == ():
+            if len(args) > 0 and args[0] not in (Ellipsis, ()):
+                raise TypeError("Invalid index for scalar dataset (only ..., () allowed)")
+            self._select_type = H5S_SELECT_ALL
+            return self
+
+        start, count, step, scalar = _handle_simple(self._shape, args)
+        self._sel = (start, count, step, scalar)
+
+        # self._id.select_hyperslab(start, count, step)
+        self._select_type = H5S_SELECT_HYPERSLABS
+
+        self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
+
+        return self
+
+    def getSelectNpoints(self):
+        """Return number of elements in current selection
+        """
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        elif self._select_type == H5S_SELECT_HYPERSLABS:
+            dims = self._shape
+            npoints = 1
+            rank = len(dims)
+            for i in range(rank):
+                npoints *= self.count[i]
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def getQueryParam(self):
+        """ Get select param for use with HDF Rest API"""
+        param = ''
+        rank = len(self._shape)
+        if rank == 0:
+            return None
+
+        param += "["
+        for i in range(rank):
+            start = self.start[i]
+            stop = start + (self.count[i] * self.step[i])
+            if stop > self._shape[i]:
+                stop = self._shape[i]
+            dim_sel = str(start) + ':' + str(stop)
+            if self.step[i] != 1:
+                dim_sel += ':' + str(self.step[i])
+            if i != rank - 1:
+                dim_sel += ','
+            param += dim_sel
+        param += ']'
+        return param
+
+    def broadcast(self, target_shape):
+        """ Return an iterator over target dataspaces for broadcasting.
+
+        Follows the standard NumPy broadcasting rules against the current
+        selection shape (self._mshape).
+        """
+        if self._shape == ():
+            if np.product(target_shape) != 1:
+                raise TypeError(f"Can't broadcast {target_shape} to scalar")
+            self._id.select_all()
+            yield self._id
+            return
+
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        target = list(target_shape)
+
+        tshape = []
+        for idx in range(1, rank + 1):
+            if len(target) == 0 or scalar[-idx]:     # Skip scalar axes
+                tshape.append(1)
+            else:
+                t = target.pop()
+                if t == 1 or count[-idx] == t:
+                    tshape.append(t)
+                else:
+                    raise TypeError(f"Can't broadcast {target_shape} -> {count}")
+        tshape.reverse()
+        tshape = tuple(tshape)
+
+        chunks = tuple(x // y for x, y in zip(count, tshape))
+        nchunks = int(np.product(chunks))
+
+        if nchunks == 1:
+            yield self._id
+        else:
+            sid = self._id.copy()
+            sid.select_hyperslab((0,) * rank, tshape, step)
+            for idx in range(nchunks):
+                offset = tuple(x * y * z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
+                sid.offset_simple(offset)
+                yield sid
+
+    def __repr__(self):
+        s = f"SimpleSelection(shape:{self._shape}, start: {self._sel[0]},"
+        s += f" count: {self._sel[1]}, step: {self._sel[2]}"
+        return s
+
+
+class FancySelection(Selection):
+
+    """
+        Implements advanced NumPy-style selection operations in addition to
+        the standard slice-and-int behavior.
+
+        Indexing arguments may be ints, slices, lists of indicies, or
+        per-axis (1D) boolean arrays.
+
+        Broadcasting is not supported for these selections.
+    """
+
+    @property
+    def slices(self):
+        return self._slices
+
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._mshape
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        self._slices = []
+
+    def __getitem__(self, args):
+
+        if not isinstance(args, tuple):
+            args = (args,)
+
+        args = _expand_ellipsis(args, len(self._shape))
+        select_type = H5S_SELECT_HYPERSLABS  # will adjust if we have a coord
+
+        # Create list of slices and/or coordinates
+        slices = []
+        mshape = []
+        num_coordinates = None
+        for idx, arg in enumerate(args):
+            length = self._shape[idx]
+            if isinstance(arg, slice):
+                _, count, _ = _translate_slice(arg, length)  # raise exception for invalid slice
+                if arg.start is None:
+                    start = 0
+                else:
+                    start = arg.start
+                if arg.stop is None:
+                    stop = length
+                else:
+                    stop = arg.stop
+                if arg.step is None:
+                    step = 1
+                else:
+                    step = arg.step
+                slices.append(slice(start, stop, step))
+                mshape.append(count)
+
+            elif hasattr(arg, 'dtype') and arg.dtype == np.dtype('bool'):
+                if len(arg.shape) != 1:
+                    raise TypeError("Boolean indexing arrays must be 1-D")
+                arg = arg.nonzero()[0]
+                try:
+                    slices.append(list(arg))
+                except TypeError:
+                    pass
+                else:
+                    if sorted(arg) != list(arg):
+                        raise TypeError("Indexing elements must be in increasing order")
+                mshape.append(len(arg))
+                select_type = H5S_SELLECT_FANCY
+            elif isinstance(arg, list) or hasattr(arg, 'dtype'):
+                # coordinate selection
+                slices.append(arg)
+                for x in arg:
+                    if x < 0 or x >= length:
+                        raise IndexError(f"Index ({arg}) out of range (0-{length - 1})")
+                if num_coordinates is None:
+                    num_coordinates = len(arg)
+                elif num_coordinates == len(arg):
+                    # second set of coordinates doesn't effect mshape
+                    continue
+                else:
+                    # this shouldn't happen since HSDS would have thrown an error
+                    raise ValueError("coordinate num element missmatch")
+                mshape.append(len(arg))
+                select_type = H5S_SELLECT_FANCY
+            elif isinstance(arg, int):
+                if arg < 0 or arg >= length:
+                    raise IndexError(f"Index ({arg}) out of range (0-{length - 1})")
+                slices.append(arg)
+            elif isinstance(arg, type(Ellipsis)):
+                slices.append(slice(0, length, 1))
+            else:
+                raise TypeError(f"Unexpected arg type: {arg} - {type(arg)}")
+        self._slices = slices
+        self._select_type = select_type
+        self._mshape = tuple(mshape)
+
+    def getSelectNpoints(self):
+        """Return number of elements in current selection
+        """
+        npoints = 1
+        for idx, s in enumerate(self._slices):
+            if isinstance(s, slice):
+                length = self._shape[idx]
+                _, count, _ = _translate_slice(s, length)
+            elif isinstance(s, list):
+                count = len(s)
+            else:
+                # scalar selection
+                count = 1
+            npoints *= count
+
+        return npoints
+
+    def getQueryParam(self):
+        """ Get select param for use with HDF Rest API"""
+        query = []
+        query.append('[')
+        rank = len(self._slices)
+        for dim, s in enumerate(self._slices):
+            if isinstance(s, slice):
+                if s.start is None and s.stop is None:
+                    query.append(':')
+                elif s.stop is None:
+                    query.append(f"{s.start}:")
+                else:
+                    query.append(f"{s.start}:{s.stop}")
+                if s.step and s.step != 1:
+                    query.append(f":{s.step}")
+            elif isinstance(s, list) or hasattr(s, 'dtype'):
+                query.append('[')
+                for idx, n in enumerate(s):
+                    query.append(str(n))
+                    if idx + 1 < len(s):
+                        query.append(',')
+                query.append(']')
+            else:
+                # scalar selection
+                query.append(str(s))
+            if dim + 1 < rank:
+                query.append(',')
+        query.append(']')
+        return "".join(query)
+
+    def broadcast(self, target_shape):
+        raise TypeError("Broadcasting is not supported for complex selections")
+
+    def __repr__(self):
+        return f"FancySelection(shape:{self._shape}, slices: {self._slices})"
+
+
+def _expand_ellipsis(args, rank):
+    """ Expand ellipsis objects and fill in missing axes.
+    """
+    n_el = sum(1 for arg in args if arg is Ellipsis)
+    if n_el > 1:
+        raise ValueError("Only one ellipsis may be used.")
+    elif n_el == 0 and len(args) != rank:
+        args = args + (Ellipsis,)
+
+    final_args = []
+    n_args = len(args)
+    for arg in args:
+
+        if arg is Ellipsis:
+            final_args.extend((slice(None, None, None),) * (rank - n_args + 1))
+        else:
+            final_args.append(arg)
+
+    if len(final_args) > rank:
+        raise TypeError("Argument sequence too long")
+
+    return final_args
+
+
+def _handle_simple(shape, args):
+    """ Process a "simple" selection tuple, containing only slices and
+        integer objects.  Return is a 4-tuple with tuples for start,
+        count, step, and a flag which tells if the axis is a "scalar"
+        selection (indexed by an integer).
+
+        If "args" is shorter than "shape", the remaining axes are fully
+        selected.
+    """
+    args = _expand_ellipsis(args, len(shape))
+
+    start = []
+    count = []
+    step = []
+    scalar = []
+
+    for arg, length in zip(args, shape):
+        if isinstance(arg, slice):
+            x, y, z = _translate_slice(arg, length)
+            s = False
+        else:
+            try:
+                x, y, z = _translate_int(int(arg), length)
+                s = True
+            except TypeError:
+                raise TypeError(f'Illegal index "{arg}" (must be a slice or number)')
+        start.append(x)
+        count.append(y)
+        step.append(z)
+        scalar.append(s)
+
+    return tuple(start), tuple(count), tuple(step), tuple(scalar)
+
+
+def _translate_int(exp, length):
+    """ Given an integer index, return a 3-tuple
+        (start, count, step)
+        for hyperslab selection
+    """
+    if exp < 0:
+        exp = length + exp
+
+    if not 0 <= exp < length:
+        raise IndexError(f"Index ({exp}) out of range (0-{length - 1})")
+
+    return exp, 1, 1
+
+
+def _translate_slice(exp, length):
+    """ Given a slice object, return a 3-tuple
+        (start, count, step)
+        for use with the hyperslab selection routines
+    """
+    start, stop, step = exp.indices(length)
+    # Now if step > 0, then start and stop are in [0, length];
+    # if step < 0, they are in [-1, length - 1] (Python 2.6b2 and later;
+    # Python issue 3004).
+
+    if step < 1:
+        raise ValueError("Step must be >= 1 (got %d)" % step)
+    if stop < start:
+        stop = start
+
+    count = 1 + (stop - start - 1) // step
+
+    return start, count, step
+
+
+def guess_shape(sid):
+    """ Given a dataspace, try to deduce the shape of the selection.
+
+    Returns one of:
+        * A tuple with the selection shape, same length as the dataspace
+        * A 1D selection shape for point-based and multiple-hyperslab selections
+        * None, for unselected scalars and for NULL dataspaces
+    """
+
+    sel_class = sid.get_simple_extent_type()    # Dataspace class
+    sel_type = sid.get_select_type()            # Flavor of selection in use
+
+    if sel_class == 'H5S_NULL':
+        # NULL dataspaces don't support selections
+        return None
+
+    elif sel_class == 'H5S_SCALAR':
+        # NumPy has no way of expressing empty 0-rank selections, so we use None
+        if sel_type == H5S_SELECT_NONE:
+            return None
+        if sel_type == H5S_SELECT_ALL:
+            return tuple()
+
+    elif sel_class != 'H5S_SIMPLE':
+        raise TypeError(f"Unrecognized dataspace class {sel_class}")
+
+    # We have a "simple" (rank >= 1) dataspace
+
+    N = sid.get_select_npoints()
+    rank = len(sid.shape)
+
+    if sel_type == H5S_SELECT_NONE:
+        return (0,) * rank
+
+    elif sel_type == H5S_SELECT_ALL:
+        return sid.shape
+
+    elif sel_type == H5S_SEL_POINTS:
+        # Like NumPy, point-based selections yield 1D arrays regardless of
+        # the dataspace rank
+        return (N,)
+
+    elif sel_type != H5S_SELECT_HYPERSLABS:
+        raise TypeError(f"Unrecognized selection method {sel_type}")
+
+    # We have a hyperslab-based selection
+
+    if N == 0:
+        return (0,) * rank
+
+    bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds())
+
+    # Shape of full selection box
+    boxshape = topcorner - bottomcorner + np.ones((rank,))
+
+    def get_n_axis(sid, axis):
+        """ Determine the number of elements selected along a particular axis.
+
+        To do this, we "mask off" the axis by making a hyperslab selection
+        which leaves only the first point along the axis.  For a 2D dataset
+        with selection box shape (X, Y), for axis 1, this would leave a
+        selection of shape (X, 1).  We count the number of points N_leftover
+        remaining in the selection and compute the axis selection length by
+        N_axis = N/N_leftover.
+        """
+
+        if (boxshape[axis]) == 1:
+            return 1
+
+        start = bottomcorner.copy()
+        start[axis] += 1
+        count = boxshape.copy()
+        count[axis] -= 1
+
+        # Throw away all points along this axis
+        masked_sid = sid.copy()
+        masked_sid.select_hyperslab(tuple(start), tuple(count), op=H5S_SELECT_NOTB)
+
+        N_leftover = masked_sid.get_select_npoints()
+
+        return N // N_leftover
+
+    shape = tuple(get_n_axis(sid, x) for x in range(rank))
+
+    if np.product(shape) != N:
+        # This means multiple hyperslab selections are in effect,
+        # so we fall back to a 1D shape
+        return (N,)
+
+    return shape
+
+
+class ScalarSelection(Selection):
+
+    """
+        Implements slicing for scalar datasets.
+    """
+
+    @property
+    def mshape(self):
+        return self._mshape
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        arg = None
+        if len(args) > 0:
+            arg = args[0]
+        if arg == ():
+            self._mshape = None
+            self._select_type = H5S_SELECT_ALL
+        elif arg == (Ellipsis,):
+            self._mshape = ()
+            self._select_type = H5S_SELECT_ALL
+        else:
+            raise ValueError("Illegal slicing argument for scalar dataspace")
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 81f9b4f9..fb2c8a73 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -14,6 +14,8 @@
 
 from .h5writer import H5Writer
 from ..objid import stripId, getCollectionForId
+from ..array_util import bytesArrayToList
+from .. import selections
 
 class H5JsonWriter(H5Writer):
     """
@@ -39,6 +41,7 @@ def flush(self):
         # json writer doesn't support incremental updates, so we'll wait
         # for close to write out database
         self.log.info("flush")
+        return False
   
     def close(self):
         """ close storage handle """
@@ -86,7 +89,7 @@ def dumpAttribute(self, obj_id, attr_name):
         response = {"name": attr_name}
         response["type"] = item["type"]
         response["shape"] = item["shape"]
-        if True:  #not self.options.D:
+        if True:
             if "value" not in item:
                 self.log.warning("no value key in attribute: " + attr_name)
             else:
@@ -173,10 +176,18 @@ def dumpDataset(self, obj_id):
         shape_rsp = {}
         num_elements = 1
         shape_rsp["class"] = shapeItem["class"]
-        if "dims" in shapeItem:
+        if shapeItem["class"] == "H5S_NULL":
+            dims = None
+            num_elements = 0
+        elif shapeItem["class"] == "H5S_SCALAR":
+            dims = ()
+            num_elements = 1
+        else:
             shape_rsp["dims"] = shapeItem["dims"]
-            for dim in shapeItem["dims"]:
-                num_elements *= dim
+            dims = tuple(shapeItem["dims"])
+            for extent in dims:
+                num_elements *= extent
+
         if "maxdims" in shapeItem:
             maxdims = []
             for dim in shapeItem["maxdims"]:
@@ -196,8 +207,9 @@ def dumpDataset(self, obj_id):
 
         if not self._no_data:
             if num_elements > 0:
-                value = self.db.getDatasetValues(obj_id)
-                response["value"] = value  # dump values unless header flag was passed
+                sel_all = selections.select(dims, ...)
+                arr = self.db.getDatasetValues(obj_id, sel_all)
+                response["value"] = bytesArrayToList(arr)  # dump values unless header flag was passed
             else:
                 response["value"] = []  # empty list
         return response
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index 47ff3b1e..df69f029 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -17,6 +17,7 @@
 from h5json.writer.h5json_writer import H5JsonWriter
 from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
 from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
 
 
 class H5JsonWriterTest(unittest.TestCase):
@@ -45,7 +46,7 @@ def __init__(self, *args, **kwargs):
 
     def testGroup(self):
     
-        with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=True), app_logger=self.log) as db:
+        with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False), app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "attr1", value=[1,2,3,4])
             db.createAttribute(root_id, "attr2", 42)
@@ -57,6 +58,12 @@ def testGroup(self):
             g1_1_id = db.createGroup()
             db.createHardLink(g1_id, "g1.1", g1_1_id)
             dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32)
+            arr = np.zeros((10, 10), dtype=np.int32)
+            for i in range(10):
+                for j in range(10):
+                    arr[i, j] = i * j
+            sel_all = selections.select((10, 10), ...)
+            db.setDatasetValues(dset_111_id, sel_all, arr)
             db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
             db.createSoftLink(g2_id, "slink", "somewhere")
             db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
@@ -77,7 +84,6 @@ def testNullSpaceAttribute(self):
             self.assertTrue("class" in shape_item)
             self.assertEqual(shape_item["class"], "H5S_NULL")
             self.assertTrue(item["created"] > time.time() - 1.0)
-            self.assertEqual(item["modified"], None)
             value = db.getAttributeValue(root_id, "A1")
             self.assertEqual(value, None)
 
@@ -98,7 +104,6 @@ def testScalarAttribute(self):
             self.assertEqual(item["value"], 42)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             shape = item["shape"]
             self.assertEqual(shape["class"], "H5S_SCALAR")
 
@@ -122,7 +127,6 @@ def testFixedStringAttribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             ret_value = db.getAttributeValue(root_id, "A1")
        
 
@@ -147,7 +151,6 @@ def testVlenAsciiAttribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
     def testVlenUtf8Attribute(self):
         with Hdf5db(app_logger=self.log) as db:
@@ -170,8 +173,6 @@ def testVlenUtf8Attribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
-
  
 
     def testIntAttribute(self):
@@ -183,7 +184,6 @@ def testIntAttribute(self):
             self.assertEqual(item["value"], [2, 3, 5, 7, 11])
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SIMPLE")
             self.assertEqual(item_shape["dims"], [5,])
@@ -257,7 +257,6 @@ def testCommittedType(self):
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
@@ -294,7 +293,6 @@ def testCommittedCompoundType(self):
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 2c2812dc..8931dd9c 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -14,6 +14,7 @@
 import logging
 import numpy as np
 from h5json import Hdf5db
+from h5json import selections
 from h5json.objid import isRootObjId, isValidUuid, isSchema2Id
 from h5json.hdf5dtype import special_dtype, Reference
 
@@ -43,7 +44,6 @@ def __init__(self, *args, **kwargs):
 
 
     def testGroup(self):
-    
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
             self.assertTrue(isSchema2Id(root_id))
@@ -120,15 +120,11 @@ def testGroup(self):
             except KeyError:
                 pass  # expected
 
-            try:
-                db.getLink(g2_id, "not_a_link")
-                self.assertTrue(False)
-            except KeyError:
-                pass  # expected
+            ret = db.getLink(g2_id, "not_a_link")
+            self.assertTrue(ret is None)
 
 
     def testNullSpaceAttribute(self):
-
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
@@ -138,7 +134,6 @@ def testNullSpaceAttribute(self):
             self.assertTrue("class" in shape_item)
             self.assertEqual(shape_item["class"], "H5S_NULL")
             self.assertTrue(item["created"] > time.time() - 1.0)
-            self.assertEqual(item["modified"], None)
             value = db.getAttributeValue(root_id, "A1")
             self.assertEqual(value, None)
 
@@ -159,7 +154,6 @@ def testScalarAttribute(self):
             self.assertEqual(item["value"], 42)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             shape = item["shape"]
             self.assertEqual(shape["class"], "H5S_SCALAR")
 
@@ -183,8 +177,8 @@ def testFixedStringAttribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             ret_value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(ret_value, value.encode("ascii"))
        
 
     def testVlenAsciiAttribute(self):
@@ -208,7 +202,6 @@ def testVlenAsciiAttribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
     def testVlenUtf8Attribute(self):
         with Hdf5db(app_logger=self.log) as db:
@@ -231,8 +224,6 @@ def testVlenUtf8Attribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
-
  
 
     def testIntAttribute(self):
@@ -244,7 +235,6 @@ def testIntAttribute(self):
             self.assertEqual(item["value"], [2, 3, 5, 7, 11])
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SIMPLE")
             self.assertEqual(item_shape["dims"], [5,])
@@ -318,7 +308,6 @@ def testCommittedType(self):
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
@@ -337,7 +326,6 @@ def testCommittedType(self):
             self.assertEqual(attr_type["length"], 15)
             self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
-
     def testCommittedCompoundType(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
@@ -355,7 +343,6 @@ def testCommittedCompoundType(self):
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            self.assertEqual(item["modified"], None)
 
             item_type = item["type"]
 
@@ -376,6 +363,59 @@ def testCommittedCompoundType(self):
             
             value = db.getAttributeValue(root_id, "A1")
             self.assertTrue(isinstance(value, np.ndarray))
+
+    def testSimpleDataset(self):
+        with Hdf5db(app_logger=self.log) as db:
+            nrows = 8
+            ncols = 10
+            shape = (nrows, ncols)
+            dtype = np.int32
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape, dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+            sel_all = selections.select(shape, ...)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, shape)
+            self.assertEqual(arr.min(), 0)
+            self.assertEqual(arr.max(), 0)
+            row = np.zeros((ncols,), dtype=dtype)
+            for i in range(nrows):
+                row[:] = list(range(i*10, (i + 1)*10))
+                row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
+                db.setDatasetValues(dset_id, row_sel, row)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            for i in range(nrows):
+                row = np.array(list(range(i*10, (i + 1)*10)), dtype=dtype)
+                np.testing.assert_array_equal(arr[i, :],  row)
+            
+
+    def testScalarDataset(self):
+        dtype = np.int32
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset((), dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+            sel_all = selections.select((), ...)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, ())
+            self.assertEqual(arr[()], 0)
+            db.setDatasetValues(dset_id, sel_all, np.array(42, dtype=dtype))
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, ())
+            self.assertEqual(arr.min(), 42)
+            self.assertEqual(arr.max(), 42)
+
+            
+
+
+
+
+
    
 
 if __name__ == "__main__":

From c0a6cc369de5268d9a2c504690c1618fe6c9c0e2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 26 Feb 2025 13:39:31 -0800
Subject: [PATCH 012/129] update h5tojson script

---
 src/h5json/dset_util.py            |  42 ------
 src/h5json/h5tojson/h5tojson.py    | 215 ++---------------------------
 src/h5json/hdf5db.py               | 120 ++++++++--------
 src/h5json/reader/h5py_reader.py   | 161 ++++++++++++++++++++-
 src/h5json/writer/h5json_writer.py |  42 ++++--
 5 files changed, 252 insertions(+), 328 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 7a3a7aa3..c89f141f 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -58,48 +58,6 @@
 _H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
 """
 
-def make_new_dset(
-        shape=None,
-        dtype=None,
-        chunks=None,
-        compression=None,
-        shuffle=None,
-        maxshape=None,
-        compression_opts=None,
-        fillvalue=None,
-        cpl=None
-    ):
-
-    type_json = getTypeItem(dtype)
-    if shape == "H5S_NULL":
-        shape_json = {"class": "H5S_NULL"}
-    else:
-        shape_json = {"class": "H5S_SIMPLE"}
-        shape_json["dims"] = list(shape)
-
-    if maxshape:
-        shape_json["maxshape"] = maxshape
-    if cpl is None:
-        cpl = {}
-    if chunks:
-        cpl["chunks"] = chunks
-    if compression:
-        cpl["compression"] = compression
-    if shuffle:
-        cpl["shuffle"] = shuffle
-    if compression_opts: 
-        cpl["compression_opts"] = compression_opts
-    if fillvalue:
-        cpl["fillvalue"] = fillvalue
-    
-
-    # TBD - other properties
-    dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl, "attributes": {}}
-    dset_json["created"] = time.time()
-    dset_json["modified"] = None
-
-    return dset_json
-
 def resize_dataset(dset_json, shape):
     shape_json = dset_json["shape"]
     shape_class = shape_json["class"]
diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index 89a65bdd..44a7a88c 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -10,216 +10,29 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import sys
-import json
 import argparse
 import os.path as op
-import tempfile
 import logging
 import logging.handlers
-from h5json import Hdf5db
-from h5json import hdf5dtype
-
-
-class DumpJson:
-    """
-    DumpJson - return json representation of all objects within the given file
-    """
-
-    def __init__(self, db, app_logger=None, options=None):
-        self.options = options
-        self.db = db
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-        self.json = {}
-
-    def dumpAttribute(self, col_name, uuid, attr_name):
-        self.log.info("dumpAttribute: [" + attr_name + "]")
-        item = self.db.getAttributeItem(col_name, uuid, attr_name)
-        response = {"name": attr_name}
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        response["shape"] = item["shape"]
-        if not self.options.D:
-            if "value" not in item:
-                self.log.warning("no value key in attribute: " + attr_name)
-            else:
-                response["value"] = item[
-                    "value"
-                ]  # dump values unless header -D was passed
-        return response
-
-    def dumpAttributes(self, col_name, uuid):
-        attr_list = self.db.getAttributeItems(col_name, uuid)
-        self.log.info("dumpAttributes: " + uuid)
-        items = []
-        for attr in attr_list:
-            item = self.dumpAttribute(col_name, uuid, attr["name"])
-            items.append(item)
-
-        return items
-
-    def dumpLink(self, uuid, name):
-        item = self.db.getLinkItemByUuid(uuid, name)
-        for key in ("ctime", "mtime", "href"):
-            if key in item:
-                del item[key]
-        return item
-
-    def dumpLinks(self, uuid):
-        link_list = self.db.getLinkItems(uuid)
-        items = []
-        for link in link_list:
-            item = self.dumpLink(uuid, link["title"])
-            items.append(item)
-        return items
-
-    def dumpGroup(self, uuid):
-        item = self.db.getGroupItemByUuid(uuid)
-        if "alias" in item:
-            alias = item["alias"]
-            if alias:
-                self.log.info("dumpGroup alias: [" + alias[0] + "]")
-        for key in ("ctime", "mtime", "linkCount", "attributeCount", "id"):
-            if key in item:
-                del item[key]
-        attributes = self.dumpAttributes("groups", uuid)
-        if attributes:
-            item["attributes"] = attributes
-        links = self.dumpLinks(uuid)
-        if links:
-            item["links"] = links
-        return item
-
-    def dumpGroups(self):
-        groups = {}
-        item = self.dumpGroup(self.root_uuid)
-        groups[self.root_uuid] = item
-        uuids = self.db.getCollection("groups")
-        for uuid in uuids:
-            item = self.dumpGroup(uuid)
-            groups[uuid] = item
-
-        self.json["groups"] = groups
-
-    def dumpDataset(self, uuid):
-        response = {}
-        self.log.info("dumpDataset: " + uuid)
-        item = self.db.getDatasetItemByUuid(uuid)
-        if "alias" in item:
-            alias = item["alias"]
-            if alias:
-                self.log.info("dumpDataset alias: [" + alias[0] + "]")
-            response["alias"] = item["alias"]
-
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        shapeItem = item["shape"]
-        shape_rsp = {}
-        num_elements = 1
-        shape_rsp["class"] = shapeItem["class"]
-        if "dims" in shapeItem:
-            shape_rsp["dims"] = shapeItem["dims"]
-            for dim in shapeItem["dims"]:
-                num_elements *= dim
-        if "maxdims" in shapeItem:
-            maxdims = []
-            for dim in shapeItem["maxdims"]:
-                if dim == 0:
-                    maxdims.append("H5S_UNLIMITED")
-                else:
-                    maxdims.append(dim)
-            shape_rsp["maxdims"] = maxdims
-        response["shape"] = shape_rsp
-
-        if "creationProperties" in item:
-            response["creationProperties"] = item["creationProperties"]
-
-        attributes = self.dumpAttributes("datasets", uuid)
-        if attributes:
-            response["attributes"] = attributes
-
-        if not (self.options.D or self.options.d):
-            if num_elements > 0:
-                value = self.db.getDatasetValuesByUuid(uuid)
-                response["value"] = value  # dump values unless header flag was passed
-            else:
-                response["value"] = []  # empty list
-        return response
-
-    def dumpDatasets(self):
-        uuids = self.db.getCollection("datasets")
-        if uuids:
-            datasets = {}
-            for uuid in uuids:
-                item = self.dumpDataset(uuid)
-                datasets[uuid] = item
-
-            self.json["datasets"] = datasets
-
-    def dumpDatatype(self, uuid):
-        response = {}
-        item = self.db.getCommittedTypeItemByUuid(uuid)
-        response["alias"] = item["alias"]
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        attributes = self.dumpAttributes("datatypes", uuid)
-        if attributes:
-            response["attributes"] = attributes
-        return response
-
-    def dumpDatatypes(self):
-        uuids = self.db.getCollection("datatypes")
-        if uuids:
-            datatypes = {}
-            for uuid in uuids:
-                item = self.dumpDatatype(uuid)
-                datatypes[uuid] = item
-
-            self.json["datatypes"] = datatypes
-
-    def dumpFile(self):
-
-        self.root_uuid = self.db.getUUIDByPath("/")
-
-        db_version_info = self.db.getVersionInfo()
-
-        self.json["apiVersion"] = db_version_info["hdf5-json-version"]
-        self.json["root"] = self.root_uuid
-
-        self.dumpGroups()
-
-        self.dumpDatasets()
-
-        self.dumpDatatypes()
-
-        print(json.dumps(self.json, sort_keys=True, indent=4))
-
-
-def getTempFileName():
-    """
-    Generate a temporary filename to avoid problems with trying to create a dbfile
-    in a read-only directory.  (See: https://github.com/HDFGroup/h5serv/issues/37)
-    """
-    f = tempfile.NamedTemporaryFile(delete=False)
-    f.close()
-    return f.name
 
+from h5json import Hdf5db
+from h5json.writer.h5json_writer import H5JsonWriter
+from h5json.reader.h5py_reader import H5pyReader
+ 
 
 def main():
     parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-D|-d] <hdf5_file>")
-    parser.add_argument("-D", action="store_true", help="surpress all data output")
+    parser.add_argument("-D", action="store_true", help="suppress all data output")
     parser.add_argument(
         "-d",
         action="store_true",
-        help="surpress data output for" + " datasets (but not attribute values)",
+        help="suppress data output for" + " datasets (but not attribute values)",
     )
     parser.add_argument("filename", nargs="+", help="HDF5 to be converted to json")
     args = parser.parse_args()
 
     # create logger
-    log = logging.getLogger("h5serv")
+    log = logging.getLogger("h5tojson")
     # log.setLevel(logging.WARN)
     log.setLevel(logging.INFO)
     # add log handler
@@ -230,16 +43,14 @@ def main():
 
     filename = args.filename[0]
     if not op.isfile(filename):
-        sys.exit("Cannot find file: %s" % filename)
-
-    log.info("h5tojson " + filename)
+        sys.exit(f"Cannot find file: {filename}")
 
-    dbFilename = getTempFileName()
-    log.info("Using dbFile: " + dbFilename)
-    with Hdf5db(filename, dbFilePath=dbFilename, readonly=True, app_logger=log) as db:
-        dumper = DumpJson(db, app_logger=log, options=args)
-        dumper.dumpFile()
+    log.info(f"h5tojson {filename}")
 
+    kwargs = {"app_logger": log}
+    
+    with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False, **kwargs), **kwargs) as db:
+        pass
 
 if __name__ == "__main__":
     main()
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 991e7561..714059a6 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -14,7 +14,7 @@
 import logging
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
 from .array_util import jsonToArray, bytesArrayToList
-from .dset_util import make_new_dset, resize_dataset
+from .dset_util import resize_dataset
 from .objid import createObjId, getCollectionForId
 from . import selections
 from .apiversion import _apiver
@@ -242,31 +242,6 @@ def getDtype(self, obj_id):
         return dtype
  
  
-    def createCommittedType(self, datatype, cpl=None):
-        """
-        createCommittedType - creates new named datatype
-        Returns item
-        """
-        self.log.info("createCommittedType")
-        if cpl is None:
-            cpl = {}
-         
-        ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id)
-        if isinstance(datatype, np.dtype):
-            dt = datatype
-        else:
-            dt = createDataType(datatype)
-
-        type_json = getTypeItem(dt)  # get canonical json description of datatype
-
-        ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
-        ctype_json["created"] = time.time()
-        ctype_json["modified"] = None
-        self.db[ctype_id] = ctype_json
-        self._new_objects.add(ctype_id)
-        return ctype_id
-  
-
     def getAttribute(self, obj_id, name, includeData=True):
         """
         Get attribute given an object id and name
@@ -519,45 +494,6 @@ def setDatasetValues(self, dset_id, sel, arr):
         updates.append((sel, arr.copy()))
         self.make_dirty(dset_id)
 
-    def createDataset(
-        self,
-        shape=None,
-        dtype=None,
-        chunks=None,
-        compression=None,
-        shuffle=None,
-        maxshape=None,
-        compression_opts=None,
-        fillvalue=None,
-        cpl=None,
-    ):
-        """
-        createDataset - creates new dataset given shape and datatype
-        Returns obj_id
-        """
-        
-        kwds = {}
-        if chunks:
-            kwds["chunks"] = chunks
-        if compression:
-            kwds["compression"] = compression
-        if shuffle:
-            kwds["shuffle"] = shuffle
-        if compression_opts:
-            kwds["compression_opts"] = compression_opts
-        if maxshape:
-            kwds["maxshape"] = maxshape
-        if fillvalue:
-            kwds["fillvalue"] = fillvalue
-        if cpl:
-            kwds["cpl"] = cpl
-        dset_json = make_new_dset(shape=shape, dtype=dtype, **kwds)
- 
-        dset_id = createObjId("datasets", root_id=self.root_id)   
-        self.db[dset_id] = dset_json 
-        self._new_objects.add(dset_id)
-        return dset_id
-
 
     def resizeDataset(self, dset_id, shape):
         """
@@ -668,7 +604,59 @@ def createGroup(self, cpl=None):
         self.db[grp_id] = group_json
         self._new_objects.add(grp_id)
         return grp_id
-   
+    
+
+    def createCommittedType(self, datatype, cpl=None):
+        """
+        createCommittedType - creates new named datatype
+        Returns item
+        """
+        self.log.info("createCommittedType")
+        if cpl is None:
+            cpl = {}
+         
+        ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id)
+        if isinstance(datatype, np.dtype):
+            dt = datatype
+        else:
+            dt = createDataType(datatype)
+
+        type_json = getTypeItem(dt)  # get canonical json description of datatype
+
+        ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
+        ctype_json["created"] = time.time()
+        self.db[ctype_id] = ctype_json
+        self._new_objects.add(ctype_id)
+        return ctype_id
+  
+    
+    def createDataset(
+        self,
+        shape=None,
+        dtype=None,
+        cpl=None,
+    ):
+        """
+        createDataset - creates new dataset given shape and datatype
+        Returns obj_id
+        """
+        type_json = getTypeItem(dtype)
+        if shape == "H5S_NULL":
+            shape_json = {"class": "H5S_NULL"}
+        else:
+            shape_json = {"class": "H5S_SIMPLE"}
+            shape_json["dims"] = list(shape)
+
+        dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
+        if cpl:
+            dset_json["cpl"] = cpl
+        else:
+            dset_json["cpl"] = {}
+ 
+        dset_id = createObjId("datasets", root_id=self.root_id)   
+        self.db[dset_id] = dset_json 
+        self._new_objects.add(dset_id)
+        return dset_id
 
     def getCollection(self, col_type=None):
         obj_ids = []
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index 4e7c9b55..040d0ae4 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -11,12 +11,56 @@
 ##############################################################################
 import h5py
 import numpy as np
+import logging
 
 from ..objid import createObjId
 from ..hdf5dtype import getTypeItem
 from ..array_util import bytesArrayToList
+from .. import selections
 from ..h5reader import H5Reader
 
+_HDF_FILTERS = {
+    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
+    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
+    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
+    4: {
+        "class": "H5Z_FILTER_SZIP",
+        "alias": "szip",
+        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
+    },
+    5: {"class": "H5Z_FILTER_NBIT"},
+    6: {
+        "class": "H5Z_FILTER_SCALEOFFSET",
+        "alias": "scaleoffset",
+        "options": ["scaleType", "scaleOffset"],
+    },
+    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
+}
+
+_HDF_FILTER_OPTION_ENUMS = {
+    "coding": {
+        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
+        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
+    },
+    "scaleType": {
+        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
+        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
+        h5py.h5z.SO_INT: "H5Z_SO_INT",
+    },
+}
+
+# h5py supported filters
+_H5PY_FILTERS = {
+    "gzip": 1,
+    "shuffle": 2,
+    "fletcher32": 3,
+    "szip": 4,
+    "scaleoffset": 6,
+    "lzf": 32000,
+}
+
+_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
+
 
 class H5pyReader(H5Reader):
     """
@@ -196,6 +240,97 @@ def _getDatatype(self, ctype, include_attrs=True):
 
         return item
 
+
+    def _getHDF5DatasetCreationProperties(self, dset, type_class):
+        """ Get dataset creation properties maintained by HDF5 library """
+
+        #
+        # Fill in creation properties
+        #
+        creationProps = {}
+        plist = h5py.h5d.DatasetID.get_create_plist(dset.id)
+
+        # alloc time
+        nAllocTime = plist.get_alloc_time()
+        if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_DEFAULT"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_LATE"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_EARLY"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_INCR"
+        else:
+            self.log.warning(f"Unknown alloc time value: {nAllocTime}")
+
+        # fill time
+        nFillTime = plist.get_fill_time()
+        if nFillTime == h5py.h5d.FILL_TIME_ALLOC:
+            creationProps["fillTime"] = "H5D_FILL_TIME_ALLOC"
+        elif nFillTime == h5py.h5d.FILL_TIME_NEVER:
+            creationProps["fillTime"] = "H5D_FILL_TIME_NEVER"
+        elif nFillTime == h5py.h5d.FILL_TIME_IFSET:
+            creationProps["fillTime"] = "H5D_FILL_TIME_IFSET"
+        else:
+            self.log.warning(f"unknown fill time value: {nFillTime}")
+
+        if type_class == "H5T_OPAQUE":
+            # TBD: store opaque fill value as a hex string
+            self.log.warning("Opaque fill value not supported")
+        else:
+            if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED:
+                creationProps["fillValue"] = bytesArrayToList(dset.fillvalue)
+
+        # layout
+        nLayout = plist.get_layout()
+        if nLayout == h5py.h5d.COMPACT:
+            creationProps["layout"] = {"class": "H5D_COMPACT"}
+        elif nLayout == h5py.h5d.CONTIGUOUS:
+            creationProps["layout"] = {"class": "H5D_CONTIGUOUS"}
+        elif nLayout == h5py.h5d.CHUNKED:
+            creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
+        else:
+            self.log.warning(f"Unknown layout value: {nLayout}")
+
+        num_filters = plist.get_nfilters()
+        filter_props = []
+        if num_filters:
+            for n in range(num_filters):
+                filter_info = plist.get_filter(n)
+                opt_values = filter_info[2]
+                filter_prop = {}
+                filter_id = filter_info[0]
+                filter_prop["id"] = filter_id
+                if filter_info[3]:
+                    filter_prop["name"] = self.bytesArrayToList(filter_info[3])
+                if filter_id in _HDF_FILTERS:
+                    hdf_filter = _HDF_FILTERS[filter_id]
+                    filter_prop["class"] = hdf_filter["class"]
+                    if "options" in hdf_filter:
+                        filter_opts = hdf_filter["options"]
+                        for i in range(len(filter_opts)):
+                            if len(opt_values) <= i:
+                                break  # end of option values
+                            opt_value = opt_values[i]
+                            opt_value_enum = None
+                            option_name = filter_opts[i]
+                            if option_name in _HDF_FILTER_OPTION_ENUMS:
+                                option_enums = _HDF_FILTER_OPTION_ENUMS[option_name]
+                                if opt_value in option_enums:
+                                    opt_value_enum = option_enums[opt_value]
+                            if opt_value_enum:
+                                filter_prop[option_name] = opt_value_enum
+                            else:
+                                filter_prop[option_name] = opt_value
+                else:
+                    # custom filter
+                    filter_prop["class"] = "H5Z_FILTER_USER"
+                    if opt_values:
+                        filter_prop["parameters"] = opt_values
+                filter_props.append(filter_prop)
+            creationProps["filters"] = filter_props
+
+        return creationProps
     
     def _getDataset(self, dset):     
         self.log.info(f"getDataset alias: [{dset.name}]")
@@ -207,7 +342,7 @@ def _getDataset(self, dset):
             type_uuid = None
             addr = h5py.h5o.get_info(typeid).addr
             type_uuid = self.getObjIdByAddress(addr)
-            committedType = self.getObjectByid(type_uuid)
+            committedType = self.getObjectById(type_uuid)
             typeItem = committedType["type"]
             typeItem["id"] = type_uuid
         else:
@@ -237,7 +372,10 @@ def _getDataset(self, dset):
             if include_maxdims:
                 shapeItem["maxdims"] = maxshape
         item["shape"] = shapeItem
-        
+
+        item["cpl"] = self._getHDF5DatasetCreationProperties(dset, typeItem["class"])
+
+
         return item
     
     def getObjectById(self, obj_id, include_attrs=True, include_links=True):
@@ -261,7 +399,7 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         return obj_json
 
 
-    def getDatasetValues(self, dset_id, selection):
+    def getDatasetValues(self, dset_id, sel):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
@@ -272,7 +410,22 @@ def getDatasetValues(self, dset_id, selection):
         if dset.shape is None:
             # TBD: return something like h5py.Empty in this case?
             return None
-        arr = dset[selection]
+        if sel.select_type == selections.H5S_SELECT_ALL:
+            arr = dset[...]
+        elif sel.select_type == selections.H5S_SELECT_HYPERSLABS:
+            rank = len(dset.shape)
+
+            slices = []
+            for dim in range(rank):
+                start = sel.start[dim]
+                stop = start + sel.count[dim]
+                step = sel.step[dim]
+                slices.append(slice(start, stop, step))
+            slices = tuple(slices)
+            arr = dset[slices]
+        else:
+            raise TypeError("selection type not supported")
+        
         return arr
 
        
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index fb2c8a73..4ca75cbe 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -47,12 +47,28 @@ def close(self):
         """ close storage handle """
         self.dumpFile()
 
+    def getAliasList(self, obj_id):
+        """ return list of alias """
+        if obj_id not in self.alias_db:
+            self.alias_db[obj_id] = []
+        return self.alias_db[obj_id]
+         
+    
+    def updateAliasList(self):
+        """ update the alias list for each object """
+        # clear exiting aliases
+        obj_ids = self.db.getCollection()
+        for obj_id in obj_ids:
+            self.alias_db[obj_id] = []
+
+        self._setAlias(self._root_uuid, set(), "/")
+
      
     def _setAlias(self, obj_id, id_set, h5path):
         """ add the given h5path to the object's alias list
             If the object is a group, recurse through each hard link """
         obj_json = self.db.getObjectById(obj_id)
-        alias_list = self.alias_db[obj_id]
+        alias_list = self.getAliasList(obj_id)
         if h5path in alias_list:
             return  # nothing to do
         alias_list.append(h5path)
@@ -73,15 +89,6 @@ def _setAlias(self, obj_id, id_set, h5path):
                     self._setAlias(tgt_id, id_set, h5path+link_name)
         id_set.remove(obj_id)
 
-    def getAliasList(self):
-        """ update the alias list for each object """
-        # clear exiting aliases
-        obj_ids = self.db.getCollection()
-        for obj_id in obj_ids:
-            self.alias_db[obj_id] = []
-
-        self._setAlias(self._root_uuid, set(), "/")
-
 
     def dumpAttribute(self, obj_id, attr_name):
         self.log.info(f"dumpAttribute: [{attr_name}]")
@@ -133,7 +140,8 @@ def dumpLinks(self, obj_id):
     def dumpGroup(self, obj_id):
         item = self.db.getObjectById(obj_id)
         response = {}
-        alias = self.alias_db[obj_id]
+
+        alias = self.getAliasList(obj_id)
         response["alias"] = alias
          
         if "cpl" in item:
@@ -220,7 +228,8 @@ def dumpDatasets(self):
             datasets = {}
             for obj_id in obj_ids:
                 item = self.dumpDataset(obj_id)
-                datasets[obj_id] = item
+                obj_uuid = stripId(obj_id)
+                datasets[obj_uuid] = item
 
             self.json["datasets"] = datasets
 
@@ -242,7 +251,8 @@ def dumpDatatypes(self):
             datatypes = {}
             for obj_id in obj_ids:
                 item = self.dumpDatatype(obj_id)
-                datatypes[obj_id] = item
+                obj_uuid = stripId(obj_id)
+                datatypes[obj_uuid] = item
 
             self.json["datatypes"] = datatypes
 
@@ -254,13 +264,17 @@ def dumpFile(self):
 
         self.json["apiVersion"] = db_version_info["hdf5-json-version"]
         self.json["root"] = stripId(self._root_uuid)
-        self.getAliasList()  # create alias_db with obj_id to alias list dict
+
+        self.updateAliasList()  # create alias_db with obj_id to alias list dict
+
         self.dumpGroups()
 
         self.dumpDatasets()
 
         self.dumpDatatypes()
 
+
+
         print(json.dumps(self.json, sort_keys=True, indent=4))
 
 

From 48d43e4968b1df20ffde35e1d165b84669312753 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 26 Feb 2025 18:19:58 -0800
Subject: [PATCH 013/129] added h5json read

---
 src/h5json/reader/h5json_reader.py | 186 +++++++++++++++++++++++++++++
 src/h5json/reader/h5py_reader.py   |  19 +--
 src/h5json/reader/h5reader.py      |   2 +-
 src/h5json/selections.py           |  12 ++
 src/h5json/writer/h5json_writer.py |   2 -
 test/unit/h5json_reader_test.py    | 121 +++++++++++++++++++
 test/unit/h5py_reader_test.py      |   9 +-
 7 files changed, 327 insertions(+), 24 deletions(-)
 create mode 100644 src/h5json/reader/h5json_reader.py
 create mode 100644 test/unit/h5json_reader_test.py

diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
new file mode 100644
index 00000000..44d178a5
--- /dev/null
+++ b/src/h5json/reader/h5json_reader.py
@@ -0,0 +1,186 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import json
+import logging
+
+from ..objid import getCollectionForId, stripId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray
+from .. import selections
+from ..h5reader import H5Reader
+  
+
+class H5JsonReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+        super().__init__(filepath, app_logger=app_logger)
+
+        with open(filepath) as f:
+            text = f.read()
+
+        # parse the json file
+        h5json = json.loads(text)
+
+        self._h5json = h5json
+
+        if "root" not in h5json:
+            raise Exception("no root key in input file")
+        self._root_id = "g-" + h5json["root"]
+
+    def close(self):
+        pass
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+    
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+        collection = getCollectionForId(obj_id)
+        if collection not in self._h5json:
+            self.log.warning(f"getObjectBId - collection: {collection} not found")
+            return None
+        json_objs = self._h5json[collection]
+        obj_uuid = stripId(obj_id)
+        if obj_uuid not in json_objs:
+            self.log.warning(f"getObjectById - {obj_id} not found")
+            return None
+        json_obj = json_objs[obj_uuid]
+
+        resp = {}
+        # selectively copy from the db dict
+        for k in json_obj:
+            for k in ("shape", "type", "cpl", "dcpl"):
+                if k in json_obj:
+                    resp[k] = json_obj[k]
+        if include_attrs and "attributes" in json_obj:
+            attrs = {}
+            attr_list = json_obj["attributes"]
+            for item in attr_list:
+                if "name" not in item:
+                    self.log.warning(f"expected to find name key for {obj_id} attributes")
+                    continue
+                name = item["name"]
+                attr = {}
+                for k in ("type", "shape", "value"):
+                    attr[k] = item[k]
+                attrs[name] = attr
+            resp["attributes"] = attrs
+
+        if include_links and "links" in json_obj:
+            links = {}
+            link_list = json_obj["links"]
+            for item in link_list:
+                if "title" not in item:
+                    self.log.warning(f"expected to find title key for {obj_id} links")
+                    continue
+                title = item["title"]
+                link = {}
+                for k in ("class", "file", "h5path"):
+                    if k in item:
+                        link[k] = item[k]
+                if "collection" in item:
+                    collection = item["collection"]
+                    if "id" not in item:
+                        self.log.warning(f"expected to find id key for {obj_id} link item")
+                        continue
+                    obj_uuid = item["id"]
+                    if collection == "groups":
+                        obj_id = "g-" + obj_uuid
+                    elif collection == "datasets":
+                        obj_id = "d-" + obj_uuid
+                    elif collection == "datatypes":
+                        obj_id = "t-" + obj_uuid
+                    else:
+                        self.log.warning(f"unexpected collection type: {collection}")
+                        continue
+                    item["id"] = obj_id
+                links[title] = item
+            resp["links"] = links
+
+        return resp
+
+  
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        self.log.debug(f"getAttribute({obj_id}), [{name}], include_data={includeData})")
+        json_obj = self.getObjectById(obj_id)
+        if json_obj is None:
+            return None
+        if "attributes" not in json_obj:
+            self.log.warning(f"obj: {obj_id} has no attributes collection")
+            return None
+        attributes = json_obj["attributes"]
+        if name not in attributes:
+            self.log.info(f"attr: [{name}] of {obj_id} not found")
+            return None
+        return attributes[name]
+        
+
+    def getDatasetValues(self, obj_id, sel=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        self.log.debug(f"getDatasetValues({obj_id}), sel={sel}")
+        json_obj = self.getObjectById(obj_id)
+        if json_obj is None:
+            return None
+        if "value" not in json_obj:
+            self.log.warning("value key not found for {obj_id}")
+            return None
+        json_value = json_obj["value"]
+        shape_json = json_obj["shape"]
+        if shape_json["class"] == "H5S_NULL":
+            self.log.warning("getDatasetValues called for null space object: {obj_id}")
+            return None
+        elif shape_json["class"] == "H5S_SCALAR":
+            dims = ()
+        else:
+            dims = shape_json["dims"]
+
+        type_item = json_obj["type"]
+        dtype = createDataType(type_item)
+        arr = jsonToArray(dims, dtype, json_value)
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+            pass  # just return the entire array
+        elif isinstance(sel, selections.SimpleSelection):
+            arr = arr[sel.slices]
+        else:
+            raise NotImplementedError("selection type not supported")
+        
+        return arr
+            
+
+
+        
+  
+       
+
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index 040d0ae4..6d06e6c1 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -399,7 +399,7 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         return obj_json
 
 
-    def getDatasetValues(self, dset_id, sel):
+    def getDatasetValues(self, dset_id, sel=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
@@ -410,21 +410,12 @@ def getDatasetValues(self, dset_id, sel):
         if dset.shape is None:
             # TBD: return something like h5py.Empty in this case?
             return None
-        if sel.select_type == selections.H5S_SELECT_ALL:
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
             arr = dset[...]
-        elif sel.select_type == selections.H5S_SELECT_HYPERSLABS:
-            rank = len(dset.shape)
-
-            slices = []
-            for dim in range(rank):
-                start = sel.start[dim]
-                stop = start + sel.count[dim]
-                step = sel.step[dim]
-                slices.append(slice(start, stop, step))
-            slices = tuple(slices)
-            arr = dset[slices]
+        elif isinstance(sel, selections.SimpleSelection):
+            arr = dset[sel.slices]
         else:
-            raise TypeError("selection type not supported")
+            raise NotImplementedError("selection type not supported")
         
         return arr
 
diff --git a/src/h5json/reader/h5reader.py b/src/h5json/reader/h5reader.py
index 69a45d07..3923bb15 100644
--- a/src/h5json/reader/h5reader.py
+++ b/src/h5json/reader/h5reader.py
@@ -51,7 +51,7 @@ def getAttribute(self, obj_id, name, includeData=True):
         pass
 
     @abstractmethod
-    def getDatasetValues(self, obj_id, selection):
+    def getDatasetValues(self, obj_id, sel=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 4d700d94..ef296d70 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -468,6 +468,18 @@ def broadcast(self, target_shape):
                 sid.offset_simple(offset)
                 yield sid
 
+    @property
+    def slices(self):
+        """ return tuple of slices for this selection """
+        rank = len(self.shape)
+        slices = []
+        for dim in range(rank):
+            start = self.start[dim]
+            stop = start + self.count[dim]
+            step = self.step[dim]
+            slices.append(slice(start, stop, step))
+        return tuple(slices)
+
     def __repr__(self):
         s = f"SimpleSelection(shape:{self._shape}, start: {self._sel[0]},"
         s += f" count: {self._sel[1]}, step: {self._sel[2]}"
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 4ca75cbe..85dd8e38 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -273,8 +273,6 @@ def dumpFile(self):
 
         self.dumpDatatypes()
 
-
-
         print(json.dumps(self.json, sort_keys=True, indent=4))
 
 
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
new file mode 100644
index 00000000..effa0e58
--- /dev/null
+++ b/test/unit/h5json_reader_test.py
@@ -0,0 +1,121 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import os
+import os.path as op
+import stat
+import logging
+import shutil
+from h5json import Hdf5db
+from h5json.reader.h5json_reader import H5JsonReader
+
+
+def getFile(name, tgt, ro=False):
+    src = "data/json/" + name
+    logging.info("copying file to this directory: " + src)
+
+    filepath = "./out/" + tgt
+
+    if op.isfile(filepath):
+        # make sure it's writable, before we copy over it
+        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
+    shutil.copyfile(src, filepath)
+    if ro:
+        logging.info("make read-only")
+        os.chmod(filepath, stat.S_IREAD)
+    return filepath
+
+
+def removeFile(name):
+    try:
+        os.stat(name)
+    except OSError:
+        return
+        # file does not exist
+    os.remove(name)
+
+
+class H5pyReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.INFO)
+        handler = logging.FileHandler("./h5json_reader_test.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = getFile("tall.json", "tall.json", ro=True)
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(h5_reader=H5JsonReader(filepath, **kwargs), **kwargs) as db:
+            root_id = db.getObjectIdByPath("/")
+            root_json = db.getObjectById(root_id)
+            print("root_json:", root_json)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10,10])
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
+
+
+
+
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index 420909ca..c612adc6 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -11,14 +11,13 @@
 ##############################################################################
 import unittest
 import os
-import time
-import errno
+
 import os.path as op
 import stat
 import logging
 import shutil
 from h5json import Hdf5db
-from h5json.h5py_reader import H5pyReader
+from h5json.reader.h5py_reader import H5pyReader
 
 
 def getFile(name, tgt, ro=False):
@@ -111,10 +110,6 @@ def testSimple(self):
 
             db.close()
 
-            
-
-          
-
 
 if __name__ == "__main__":
     # setup test files

From 06b5a6fe0e5ee1b390cdb13f584e136f2d012e88 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 27 Feb 2025 00:21:42 -0800
Subject: [PATCH 014/129] added h5py writer

---
 src/h5json/h5tojson/h5tojson.py    |  26 +--
 src/h5json/hdf5db.py               |   2 +-
 src/h5json/jsontoh5/jsontoh5.py    | 277 +++----------------------
 src/h5json/reader/h5py_reader.py   |   8 +-
 src/h5json/writer/h5json_writer.py |   3 +-
 src/h5json/writer/h5py_writer.py   | 186 +++++++++++++++++
 src/h5json/writer/h5writer.py      |   2 +
 test/unit/h5py_writer_test.py      | 321 +++++++++++++++++++++++++++++
 8 files changed, 559 insertions(+), 266 deletions(-)
 create mode 100644 src/h5json/writer/h5py_writer.py
 create mode 100644 test/unit/h5py_writer_test.py

diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index 44a7a88c..48a4b83b 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -10,7 +10,6 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import sys
-import argparse
 import os.path as op
 import logging
 import logging.handlers
@@ -21,16 +20,18 @@
  
 
 def main():
-    parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-D|-d] <hdf5_file>")
-    parser.add_argument("-D", action="store_true", help="suppress all data output")
-    parser.add_argument(
-        "-d",
-        action="store_true",
-        help="suppress data output for" + " datasets (but not attribute values)",
-    )
-    parser.add_argument("filename", nargs="+", help="HDF5 to be converted to json")
-    args = parser.parse_args()
-
+    if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+        print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file>")
+        sys.exit(0)
+
+    no_data = False
+    filename = None
+    for i in range(1, len(sys.argv)):
+        if sys.argv[i] == "--nodata":
+            no_data = True
+        else:
+            filename = sys.argv[i]
+        
     # create logger
     log = logging.getLogger("h5tojson")
     # log.setLevel(logging.WARN)
@@ -41,7 +42,6 @@ def main():
     # add handler to logger
     log.addHandler(handler)
 
-    filename = args.filename[0]
     if not op.isfile(filename):
         sys.exit(f"Cannot find file: {filename}")
 
@@ -49,7 +49,7 @@ def main():
 
     kwargs = {"app_logger": log}
     
-    with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False, **kwargs), **kwargs) as db:
+    with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter(None, no_data=no_data, **kwargs), **kwargs) as db:
         pass
 
 if __name__ == "__main__":
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 714059a6..e1194264 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -224,7 +224,7 @@ def getObjectIdByPath(self, h5path, parent_id=None):
     
     def getObjectByPath(self, path):
         """ Get Object JSON at given path """
-        obj_id = self.getObjectIDByPath(path)
+        obj_id = self.getObjectIdByPath(path)
         obj_json = self.getObjectById(obj_id)
         return obj_json    
 
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index c12d037a..bd1455e8 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -9,238 +9,35 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import json
-import argparse
-import h5py
+import sys
+import os.path as op
 import logging
 import logging.handlers
 
 from h5json import Hdf5db
+from h5json.writer.h5py_writer import H5pyWriter
+from h5json.reader.h5json_reader import H5JsonReader
 
-
-"""
-Writeh5 - return json representation of all objects within the given file
-    h5writer = Writeh5(db, h5json)
-        h5writer.writeFile()
-"""
-
-
-class Writeh5:
-    def __init__(self, db, json, options=None):
-        self.options = options
-        self.db = db
-        self.json = json
-        self.root_uuid = None
-
-    #
-    # Create a hard, soft, or external link
-    #
-    def createLink(self, link_obj, parent_uuid):
-        title = link_obj["title"]
-        link_class = link_obj["class"]
-        if link_class == "H5L_TYPE_HARD":
-            child_uuid = link_obj["id"]
-            self.db.linkObject(parent_uuid, child_uuid, title)
-        elif link_class == "H5L_TYPE_SOFT":
-            h5path = link_obj["h5path"]
-            self.db.createSoftLink(parent_uuid, h5path, title)
-        elif link_class == "H5L_TYPE_EXTERNAL":
-            h5path = link_obj["h5path"]
-            link_file = link_obj["file"]
-            self.db.createExternalLink(parent_uuid, link_file, h5path, title)
-        else:
-            print("Unable to create link with class:", link_class)
-
-    #
-    # Create HDF5 dataset object and write data values
-    #
-    def createDataset(self, uuid, body):
-        datatype = body["type"]
-        if isinstance(datatype, str) and datatype.startswith("datatypes/"):
-            # committed datatype, just pass in the UUID part
-            datatype = datatype[len("datatypes/") :]
-        dims = ()  # if no space in body, default to scalar
-        max_shape = None
-        creation_props = None
-        if "creationProperties" in body:
-            creation_props = body["creationProperties"]
-        if "shape" in body:
-            shape = body["shape"]
-            if shape["class"] == "H5S_SIMPLE":
-                dims = shape["dims"]
-                if isinstance(dims, int):
-                    # convert int to array
-                    dim1 = shape
-                    dims = [dim1]
-                if "maxdims" in shape:
-                    max_shape = shape["maxdims"]
-                    if isinstance(max_shape, int):
-                        # convert to array
-                        dim1 = max_shape
-                        max_shape = [dim1]
-                    # convert H5S_UNLIMITED's to None's
-                    for i in range(len(max_shape)):
-                        if max_shape[i] == "H5S_UNLIMITED":
-                            max_shape[i] = None
-            elif shape["class"] == "H5S_NULL":
-                dims = None
-
-        self.db.createDataset(
-            datatype,
-            dims,
-            max_shape=max_shape,
-            creation_props=creation_props,
-            obj_uuid=uuid,
-        )
-
-        if "value" in body:
-            data = body["value"]
-            if data:
-                data = self.db.toRef(len(dims), datatype, data)
-                self.db.setDatasetValuesByUuid(uuid, data)
-
-    def createAttribute(self, attr_json, col_name, uuid):
-        attr_name = attr_json["name"]
-        datatype = attr_json["type"]
-        if isinstance(datatype, str) and datatype.startswith("datatypes/"):
-            # committed datatype, just pass in the UUID part
-            datatype = datatype[len("datatypes/") :]
-
-        attr_value = None
-        if "value" in attr_json:
-            attr_value = attr_json["value"]
-        dims = None
-        if "shape" in attr_json:
-            shape = attr_json["shape"]
-            if shape["class"] == "H5S_SIMPLE":
-                dims = shape["dims"]
-                if isinstance(dims, int):
-                    # convert int to array
-                    dim1 = shape
-                    dims = [dim1]
-            elif shape["class"] == "H5S_SCALAR":
-                dims = ()  # empty tuple for scalar
-        self.db.createAttribute(col_name, uuid, attr_name, dims, datatype, attr_value)
-
-    #
-    # create committed datatype HDF5 object
-    #
-    def createDatatype(self, uuid, body):
-        datatype = body["type"]
-        self.db.createCommittedType(datatype, obj_uuid=uuid)
-
-    #
-    # Create HDF5 group object  (links and attributes will be added later)
-    #
-    def createGroup(self, uuid, body):
-        if uuid != self.root_uuid:
-            self.db.createGroup(obj_uuid=uuid)
-
-    #
-    # Create all the HDF5 objects defined in the JSON file
-    #
-    def createObjects(self):
-        # create datatypes
-        if "datatypes" in self.json:
-            datatypes = self.json["datatypes"]
-            for uuid in datatypes:
-                json_obj = datatypes[uuid]
-                self.createDatatype(uuid, json_obj)
-        # create groups
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                json_obj = groups[uuid]
-                self.createGroup(uuid, json_obj)
-        # create datasets
-        if "datasets" in self.json:
-            datasets = self.json["datasets"]
-            for uuid in datasets:
-                json_obj = datasets[uuid]
-                self.createDataset(uuid, json_obj)
-
-    #
-    # Create all the attributes for HDF5 objects defined in the JSON file
-    # Note: this needs to be done after createObjects since an attribute
-    # may use a committed datatype
-    #
-    def createAttributes(self):
-        dimension_list_attrs = []  # track dimension list attributes
-        # create datatype attributes
-        if "datatypes" in self.json:
-            datatypes = self.json["datatypes"]
-            for uuid in datatypes:
-                body = datatypes[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        self.createAttribute(attribute, "datatypes", uuid)
-        # create group attributes
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                body = groups[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        self.createAttribute(attribute, "groups", uuid)
-        # create datasets
-        if "datasets" in self.json:
-            datasets = self.json["datasets"]
-            for uuid in datasets:
-                body = datasets[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        if attribute["name"] == "DIMENSION_LIST":
-                            # defer dimension list creation until after we've created all other
-                            # attributes (otherwsie attach_scale may fail)
-                            dimension_list_attrs.append(
-                                {"attribute": attribute, "uuid": uuid}
-                            )
-                        else:
-                            self.createAttribute(attribute, "datasets", uuid)
-
-        # finally, do dimension_list attributes
-        for item in dimension_list_attrs:
-            attribute = item["attribute"]
-            uuid = item["uuid"]
-            self.createAttribute(attribute, "datasets", uuid)
-
-    #
-    # Link all the objects
-    # Note: this will "de-anonymous-ize" objects defined in the HDF5 file
-    #   Any non-linked objects will be deleted when the __db__ group is deleted
-    #
-    def createLinks(self):
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                json_obj = groups[uuid]
-                if "links" in json_obj:
-                    links = json_obj["links"]
-                    for link in links:
-                        self.createLink(link, uuid)
-
-    def writeFile(self):
-
-        self.root_uuid = self.json["root"]
-
-        self.createObjects()  # create datasets, groups, committed datatypes
-        self.createAttributes()  # create attributes for objects
-        self.createLinks()  # link it all together
-
+    
 
 def main():
-    parser = argparse.ArgumentParser(usage="%(prog)s [-h] <json_file> <h5_file>")
-    parser.add_argument(
-        "in_filename", nargs="+", help="JSon file to be converted to h5"
-    )
-    parser.add_argument("out_filename", nargs="+", help="name of HDF5 output file")
-    args = parser.parse_args()
-
+    if len(sys.argv) < 3 or sys.argv[1] in ("-h", "--help"):
+        print(f"usage: {sys.argv[0]} [-h] [--nodata] <json_file> <h5_file>")
+        sys.exit(0)
+
+    no_data = False
+    json_filename = None
+    hdf5_filename = None
+    for i in range(1, len(sys.argv)):
+        if sys.argv[i] == "--nodata":
+            no_data = True
+        elif not json_filename:
+            json_filename = sys.argv[i]
+        else:
+            hdf5_filename = sys.argv[i]
+ 
     # create logger
-    log = logging.getLogger("h5serv")
+    log = logging.getLogger("h5json")
     # log.setLevel(logging.WARN)
     log.setLevel(logging.INFO)
     # add log handler
@@ -249,34 +46,16 @@ def main():
     # add handler to logger
     log.addHandler(handler)
 
-    text = open(args.in_filename[0]).read()
-
-    # parse the json file
-    h5json = json.loads(text)
-
-    if "root" not in h5json:
-        raise Exception("no root key in input file")
-    root_uuid = h5json["root"]
-
-    filename = args.out_filename[0]
-
-    # create the file, will raise IOError if there's a problem
-    Hdf5db.createHDF5File(filename)
+    if not op.isfile(json_filename):
+        sys.exit(f"Cannot find file: {json_filename}")
 
-    with Hdf5db(
-        filename, root_uuid=root_uuid, update_timestamps=False, app_logger=log
-    ) as db:
-        h5writer = Writeh5(db, h5json)
-        h5writer.writeFile()
+    log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")
 
-    # open with h5py and remove the _db_ group
-    # Note: this will delete any anonymous (un-linked) objects
-    f = h5py.File(filename, "a")
-    if "__db__" in f:
-        del f["__db__"]
-    f.close()
+    kwargs = {"app_logger": log}
+    
+    with Hdf5db(h5_reader=H5JsonReader(json_filename, **kwargs), h5_writer=H5pyWriter(hdf5_filename, no_data=no_data, **kwargs), **kwargs) as db:
+        pass
 
-    print("done!")
 
 
 if __name__ == "__main__":
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index 6d06e6c1..57f0f3a0 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -108,6 +108,12 @@ def get_root_id(self):
         """ Return root id """
         return self._root_id
     
+    def getObjIdByAddress(self, addr):
+        if addr in self._addr_map:
+            return self._addr_map[addr]
+        else:
+            return None
+    
     def getAttribute(self, obj_id, name, include_data=True):
         """ Return JSON for the given attribute """
 
@@ -130,7 +136,7 @@ def getAttribute(self, obj_id, name, include_data=True):
             type_uuid = None
             addr = h5py.h5o.get_info(typeid).addr
             type_uuid = self.getObjIdByAddress(addr)
-            committedType = self.getCommittedTypeItemByUuid(type_uuid)
+            committedType = self._id_map[type_uuid]
             type_item = committedType["type"].copy()
             type_item["id"] = type_uuid
         else:
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 85dd8e38..8c5ce6af 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -30,10 +30,9 @@ def __init__(
         no_data=False,
         app_logger=None
     ):
-        super().__init__(filepath, append=append, app_logger=app_logger)
+        super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
         self.alias_db = {}
         self.json = {}
-        self._no_data = no_data
         self._root_uuid = None
        
     def flush(self):
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
new file mode 100644
index 00000000..571dc37f
--- /dev/null
+++ b/src/h5json/writer/h5py_writer.py
@@ -0,0 +1,186 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import h5py
+
+from ..objid import getCollectionForId
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray
+
+from .h5writer import H5Writer
+
+
+
+class H5pyWriter(H5Writer):
+    """
+    This class saves state from the Hdf5Db class into an HDF5 file.  
+    """
+
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
+
+        if append:
+            self._mode = "a"
+        else:
+            self._mode = "w"
+
+        self._f = None
+        self._id_map = {}
+
+    def _createGroup(self, parent, grp_json, name=None):
+        """ create the group and any links it contains """
+        grp = parent.create_group(name)
+        if "links" in grp_json:
+            grp_links = grp_json["links"]
+            self._createLinks(grp, grp_links)
+        
+
+    def _createDataset(self, parent, dset_json, name=None):
+        """ create a dataset object """
+
+        type_item = dset_json["type"]
+        dtype = createDataType(type_item)
+        kwds = {"dtype": dtype}
+        shape_json = dset_json["shape"]
+        if shape_json["class"] == "H5S_NULL":
+            # skip the shape keyword to create a null space dataset
+            pass
+        elif shape_json["class"] == "H5S_SCALAR":
+            kwds["shape"] = ()
+        else:
+            kwds["shape"] = shape_json["dims"]
+        parent.create_dataset(name, **kwds)
+
+
+    def _createDatatype(self, parent, ctype_json, name=None):
+        """ create a datatype object """
+
+        type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        parent[name] = dtype
+
+
+    def _createLinks(self, parent, links_json):
+        """ create links in the given group """
+        for title in links_json:
+            if title in parent:
+                # TBD: this will do the wrong thing if the link tgt has changed
+                continue
+            link_json = links_json[title]
+            link_class = link_json["class"]
+            if link_class == "H5L_TYPE_SOFT":
+                h5path = link_json["h5path"]
+                parent[title] = h5py.SoftLink(h5path)
+            elif link_class == "H5L_TYPE_EXTERNAL":
+                h5path = link_json["h5path"]
+                filename = link_json["file"]
+                parent[title] = h5py.ExternalLink(filename, h5path)
+            elif link_class == "H5L_TYPE_USER_DEFINED":
+                self.log.warning("unable to create user-defined link: {title}")
+            elif link_class == "H5L_TYPE_HARD":
+                tgt_id = link_json["id"]
+                if tgt_id in self._id_map:
+                    tgt_path = self._id_map[tgt_id]
+                    tgt_obj = parent[tgt_path]
+                    parent[title] = tgt_obj
+                else:
+                    obj_json = self.db.getObjectById(tgt_id)
+                    parent_path = parent.name
+                    if parent_path[-1] != '/':
+                        parent_path += '/'
+                    self._id_map[tgt_id] = parent_path + title
+                    collection = getCollectionForId(tgt_id)
+                    kwds = {"name": title}
+                    if collection == "groups":
+                        tgt_obj = self._createGroup(parent, obj_json, **kwds)
+                    elif collection == "datasets":
+                        tgt_obj = self._createDataset(parent, obj_json, **kwds)
+                    elif collection == "datatypes":
+                        tgt_obj = self._createDatatype(parent, obj_json, **kwds)
+                    else:
+                        self.log.warning(f"unexpected collection: {collection}")
+                        tgt_obj = None
+                    if tgt_obj:
+                        parent[title] = tgt_obj
+            else:
+                self.log.warning(f"unexpected link class: {link_class}")
+
+    def createAttribute(self, obj, name, attr_json):
+        """ add the given attribute to obj """
+
+        dtype = createDataType(attr_json["type"])
+        shape_json = attr_json["shape"]
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
+            dims = None
+        elif shape_class == "H5S_SCALAR":
+            dims = ()
+        else:
+            dims = tuple(shape_json["dims"])
+
+        if dims is None:
+            obj.attrs[name] = h5py.Empty(dtype)
+        else:
+            json_value = attr_json["value"]
+            arr = jsonToArray(dims, dtype, json_value)
+            obj.attrs[name] = arr
+
+
+    def createAttributes(self, obj, obj_json):
+        """ create attributes """
+
+        if "attributes" not in obj_json:
+            # no attributes
+            return
+        
+        attrs = obj_json["attributes"]
+        for name in attrs:
+            attr_json = attrs[name]
+            self.createAttribute(obj, name, attr_json)
+
+
+    def visitAttributes(self, path, obj):
+        name = obj.__class__.__name__
+        self.log.info(f"visit: {path} name: {name}")
+
+        obj_json = self.db.getObjectByPath(path)
+        self.createAttributes(obj, obj_json)
+
+    def flush(self):
+        """ Write dirty items """
+        if not self.db:
+            # no db set yet
+            return
+        
+        root_id = self.db.root_id
+        self._id_map[root_id] = "/"
+        with h5py.File(self._filepath, mode=self._mode) as f:
+            root_json = self.db.getObjectById(root_id)
+            if "links" in root_json:
+                root_links = root_json["links"]
+                self._createLinks(f, root_links)
+            # update attributes
+            self.createAttributes(f, root_json)
+            f.visititems(self.visitAttributes)
+        self._mode = "a"  # use append mode for future updates
+
+  
+    def close(self):
+        """ close storage handle """
+        self.flush()
+
diff --git a/src/h5json/writer/h5writer.py b/src/h5json/writer/h5writer.py
index 3aa77bb9..4e57048f 100644
--- a/src/h5json/writer/h5writer.py
+++ b/src/h5json/writer/h5writer.py
@@ -25,10 +25,12 @@ def __init__(
         self,
         filepath,
         append=False,
+        no_data=False,
         app_logger=None
     ):
         self._filepath = filepath
         self._append = append
+        self._no_data = no_data
         self._filepath = filepath
         self._db_ref = None
         if app_logger:
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
new file mode 100644
index 00000000..38447aff
--- /dev/null
+++ b/test/unit/h5py_writer_test.py
@@ -0,0 +1,321 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.writer.h5py_writer import H5pyWriter
+from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
+from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
+
+
+class H5pyWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        # create logger
+
+        handler = logging.FileHandler("./hdf5dbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+        # self.log.propagate = False  # prevent log out going to stdout
+        self.log.info("init!")
+
+
+    def testGroup(self):
+    
+        with Hdf5db(h5_writer=H5pyWriter("/tmp/foo2.h5", no_data=False), app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "attr1", value=[1,2,3,4])
+            db.createAttribute(root_id, "attr2", 42)
+            g1_id = db.createGroup()
+            db.createHardLink(root_id, "g1", g1_id)
+            g2_id = db.createGroup()
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+            dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32)
+            arr = np.zeros((10, 10), dtype=np.int32)
+            for i in range(10):
+                for j in range(10):
+                    arr[i, j] = i * j
+            sel_all = selections.select((10, 10), ...)
+            db.setDatasetValues(dset_111_id, sel_all, arr)
+            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+            db.flush()
+            
+
+
+
+    def testNullSpaceAttribute(self):
+
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in item)
+            shape_item = item["shape"]
+            self.assertTrue("class" in shape_item)
+            self.assertEqual(shape_item["class"], "H5S_NULL")
+            self.assertTrue(item["created"] > time.time() - 1.0)
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(value, None)
+
+    def testScalarAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dims = ()
+            value = 42
+            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+            self.assertEqual(item["value"], 42)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            shape = item["shape"]
+            self.assertEqual(shape["class"], "H5S_SCALAR")
+
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            
+
+    def testFixedStringAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            value = "Hello, world!"
+            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["length"], 13)
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            ret_value = db.getAttributeValue(root_id, "A1")
+       
+
+    def testVlenAsciiAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=bytes)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+    def testVlenUtf8Attribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+ 
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=str)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+ 
+
+    def testIntAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            value = [2, 3, 5, 7, 11]
+            db.createAttribute(root_id, "A1", value, dtype=np.int16)
+            item = db.getAttribute(root_id, "A1")
+            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(item_shape["dims"], [5,])
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+
+    def testCreateReferenceAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+
+            dt = special_dtype(ref=Reference)
+
+            ds1_ref = "datasets/" + dset_id
+            value = [ds1_ref,]
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            item = db.getAttribute(root_id, "A1")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in attr)
+            
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+            attr_value = item["value"]
+            self.assertEqual(len(attr_value), 1)
+            self.assertEqual(attr_value[0], ds1_ref)
+
+    def testCreateVlenReferenceAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            db.createHardLink(root_id, "DS1", dset_id)
+            grp_id = db.createGroup()
+            db.createHardLink(root_id, "G1", grp_id)
+
+            dt_base = special_dtype(ref=Reference)
+            dt = special_dtype(vlen=dt_base)
+             
+            ds1_ref = "datasets/" + dset_id
+            grp_ref = "groups/" + grp_id
+            ref_arr = np.zeros((2,), dtype=dt_base)
+            ref_arr[0] = ds1_ref
+            ref_arr[1] = grp_ref
+            vlen_arr = np.zeros((), dtype=dt)
+            vlen_arr[()] = ref_arr
+             
+            db.createAttribute(root_id, "A1", vlen_arr)
+            item = db.getAttribute(root_id, "A1")
+
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_VLEN")
+            self.assertEqual(item_type["size"], "H5T_VARIABLE")
+            base_type = item_type["base"]
+            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+            
+
+    def testCommittedType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dt = np.dtype("S15")
+             
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item_type["length"], 15)
+
+            # create an attribute using the committed type
+            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], "hello world!")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_STRING")
+            self.assertEqual(attr_type["length"], 15)
+            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+
+
+    def testCommittedCompoundType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dt_str = special_dtype(vlen=str)
+            fields = []
+            fields.append(("field_1", np.dtype(">i8")))
+            fields.append(("field_2", ">f8"))
+            fields.append(("field_3", np.dtype("S15")))
+            fields.append(("field_4", dt_str))
+            dt = np.dtype(fields)
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_COMPOUND")
+            fields = item_type["fields"]
+            self.assertEqual(len(fields), 4)
+
+            # create an attribute using the committed type
+            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], list(attr_value))
+            attr_shape = attr["shape"]
+            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+            
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(value, np.ndarray))
+   
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()

From 8fceb5f4efad7dc528d3c32dfb60b1e52a60fd3c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 27 Feb 2025 11:14:08 -0800
Subject: [PATCH 015/129] added filters.py

---
 src/h5json/filters.py            |  56 ++++++++++++++++
 src/h5json/reader/h5py_reader.py |  55 ++--------------
 src/h5json/writer/h5py_writer.py | 106 +++++++++++++++++++++++++++++--
 3 files changed, 162 insertions(+), 55 deletions(-)
 create mode 100644 src/h5json/filters.py

diff --git a/src/h5json/filters.py b/src/h5json/filters.py
new file mode 100644
index 00000000..e6511366
--- /dev/null
+++ b/src/h5json/filters.py
@@ -0,0 +1,56 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+ 
+import h5py
+
+_HDF_FILTERS = {
+    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
+    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
+    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
+    4: {
+        "class": "H5Z_FILTER_SZIP",
+        "alias": "szip",
+        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
+    },
+    5: {"class": "H5Z_FILTER_NBIT"},
+    6: {
+        "class": "H5Z_FILTER_SCALEOFFSET",
+        "alias": "scaleoffset",
+        "options": ["scaleType", "scaleOffset"],
+    },
+    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
+}
+
+_HDF_FILTER_OPTION_ENUMS = {
+    "coding": {
+        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
+        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
+    },
+    "scaleType": {
+        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
+        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
+        h5py.h5z.SO_INT: "H5Z_SO_INT",
+    },
+}
+
+# h5py supported filters
+_H5PY_FILTERS = {
+    "gzip": 1,
+    "shuffle": 2,
+    "fletcher32": 3,
+    "szip": 4,
+    "scaleoffset": 6,
+    "lzf": 32000,
+}
+
+_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
+
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index 57f0f3a0..2100dec6 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -17,50 +17,9 @@
 from ..hdf5dtype import getTypeItem
 from ..array_util import bytesArrayToList
 from .. import selections
-from ..h5reader import H5Reader
-
-_HDF_FILTERS = {
-    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
-    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
-    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
-    4: {
-        "class": "H5Z_FILTER_SZIP",
-        "alias": "szip",
-        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
-    },
-    5: {"class": "H5Z_FILTER_NBIT"},
-    6: {
-        "class": "H5Z_FILTER_SCALEOFFSET",
-        "alias": "scaleoffset",
-        "options": ["scaleType", "scaleOffset"],
-    },
-    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
-}
-
-_HDF_FILTER_OPTION_ENUMS = {
-    "coding": {
-        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
-        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
-    },
-    "scaleType": {
-        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
-        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
-        h5py.h5z.SO_INT: "H5Z_SO_INT",
-    },
-}
-
-# h5py supported filters
-_H5PY_FILTERS = {
-    "gzip": 1,
-    "shuffle": 2,
-    "fletcher32": 3,
-    "szip": 4,
-    "scaleoffset": 6,
-    "lzf": 32000,
-}
-
-_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-
+from .. import filters
+from .h5reader import H5Reader
+  
 
 class H5pyReader(H5Reader):
     """
@@ -309,8 +268,8 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
                 filter_prop["id"] = filter_id
                 if filter_info[3]:
                     filter_prop["name"] = self.bytesArrayToList(filter_info[3])
-                if filter_id in _HDF_FILTERS:
-                    hdf_filter = _HDF_FILTERS[filter_id]
+                if filter_id in filters._HDF_FILTERS:
+                    hdf_filter = filters._HDF_FILTERS[filter_id]
                     filter_prop["class"] = hdf_filter["class"]
                     if "options" in hdf_filter:
                         filter_opts = hdf_filter["options"]
@@ -320,8 +279,8 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
                             opt_value = opt_values[i]
                             opt_value_enum = None
                             option_name = filter_opts[i]
-                            if option_name in _HDF_FILTER_OPTION_ENUMS:
-                                option_enums = _HDF_FILTER_OPTION_ENUMS[option_name]
+                            if option_name in filters._HDF_FILTER_OPTION_ENUMS:
+                                option_enums = filters._HDF_FILTER_OPTION_ENUMS[option_name]
                                 if opt_value in option_enums:
                                     opt_value_enum = option_enums[opt_value]
                             if opt_value_enum:
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 571dc37f..59865d5c 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -14,7 +14,7 @@
 from ..objid import getCollectionForId
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray
-
+from .. import filters
 from .h5writer import H5Writer
 
 
@@ -55,17 +55,109 @@ def _createDataset(self, parent, dset_json, name=None):
 
         type_item = dset_json["type"]
         dtype = createDataType(type_item)
-        kwds = {"dtype": dtype}
+        kwargs = {"dtype": dtype}
         shape_json = dset_json["shape"]
-        if shape_json["class"] == "H5S_NULL":
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
             # skip the shape keyword to create a null space dataset
             pass
-        elif shape_json["class"] == "H5S_SCALAR":
-            kwds["shape"] = ()
+        elif shape_class == "H5S_SCALAR":
+            kwargs["shape"] = ()
         else:
-            kwds["shape"] = shape_json["dims"]
-        parent.create_dataset(name, **kwds)
+            kwargs["shape"] = shape_json["dims"]
+        if "dcpl" in dset_json and shape_class != "H5S_NULL":
+            creation_props = dset_json["dcpl"]
+            if "fillValue" in creation_props:
+                fillvalue = creation_props["fillValue"]
+                if fillvalue and len(dtype) > 1 and type(fillvalue) in (list, tuple):
+                    # for compound types, need to convert from list to dataset compatible element
+
+                    if len(dtype) != len(fillvalue):
+                        msg = "fillvalue has incorrect number of elements"
+                        self.log.warning(msg)
+                        raise ValueError(msg)
+                    
+                    fillvalue = jsonToArray((), dtype, fillvalue)
+
+                kwargs["fillvalue"] = fillvalue
 
+            if "trackTimes" in creation_props:
+                kwargs["track_times"] = creation_props["trackTimes"]
+            if "layout" in creation_props:
+                layout = creation_props["layout"]
+                if "dims" in layout:
+                    kwargs["chunks"] = tuple(layout["dims"])
+            if "filters" in creation_props:
+                filter_props = creation_props["filters"]
+                for filter_prop in filter_props:
+                    if "id" not in filter_prop:
+                        self.log.warning("filter id not provided")
+                        continue
+                    filter_id = filter_prop["id"]
+                    if filter_id not in filters._HDF_FILTERS:
+                        self.log.warning(f"unknown filter id: {filter_id} ignoring")
+                        continue
+
+                    hdf_filter = filters._HDF_FILTERS[filter_id]
+
+                    self.log.info(f"got filter: {filter_id}")
+                    if "alias" not in hdf_filter:
+                        self.log.warning(f"unsupported filter id: {filter_id} ignoring")
+                        continue
+
+                    filter_alias = hdf_filter["alias"]
+                    if not h5py.h5z.filter_avail(filter_id):
+                        msg = "compression filter not available, filter: {filter_alias}, ignoring"
+                        self.log.warning(msg)
+                        continue
+                    if filter_alias in filters._H5PY_COMPRESSION_FILTERS:
+                        if kwargs.get("compression"):
+                            msg = f"compression filter already set for {filter_alias}, ignoring"
+                            self.log.info(msg)
+                            continue
+
+                        kwargs["compression"] = filter_alias
+                        self.log.info("setting compression filter to: {filter_alias}")
+                        if filter_alias == "gzip":
+                            # check for an optional compression value
+                            if "level" in filter_prop:
+                                kwargs["compression_opts"] = filter_prop["level"]
+                        elif filter_alias == "szip":
+                            bitsPerPixel = None
+                            coding = "nn"
+
+                            if "bitsPerPixel" in filter_prop:
+                                bitsPerPixel = filter_prop["bitsPerPixel"]
+                            if "coding" in filter_prop:
+                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
+                                    coding = "ec"
+                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
+                                    coding = "nn"
+                                else:
+                                    self.log.warning("invalid szip option: 'coding'")
+                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
+                            # so these options will be ignored
+                            if "pixelsPerBlock" in filter_props:
+                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
+                            if "pixelsPerScanline" in filter_props:
+                                self.log.info("ignoring szip option: 'pixelsPerScanline'")
+                            if bitsPerPixel:
+                                kwargs["compression_opts"] = (coding, bitsPerPixel)
+                    else:
+                        if filter_alias == "shuffle":
+                            kwargs["shuffle"] = True
+                        elif filter_alias == "fletcher32":
+                            kwargs["fletcher32"] = True
+                        elif filter_alias == "scaleoffset":
+                            if "scaleOffset" not in filter_prop:
+                                msg = "No scale_offset provided for scale offset filter, ignoring"
+                                self.log(msg)
+                                continue
+                            kwargs["scaleoffset"] = filter_prop["scaleOffset"]
+                        else:
+                            self.log.info(f"Unexpected filter name: {filter_alias}, ignoring")
+                            
+        parent.create_dataset(name, **kwargs)
 
     def _createDatatype(self, parent, ctype_json, name=None):
         """ create a datatype object """

From af4d46a2842affe7c684475150f218781f114a52 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 4 Mar 2025 12:28:15 -0800
Subject: [PATCH 016/129] updates for h5py_writer to write dataset values

---
 src/h5json/dset_util.py          | 48 +------------------------
 src/h5json/hdf5db.py             | 26 ++++++++++----
 src/h5json/writer/h5py_writer.py | 60 +++++++++++++++++++++-----------
 test/unit/h5json_reader_test.py  | 32 +----------------
 test/unit/h5py_reader_test.py    | 27 +-------------
 test/unit/h5py_writer_test.py    | 23 +++++++++++-
 6 files changed, 85 insertions(+), 131 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index c89f141f..6cd51c3d 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -2,7 +2,7 @@
 # Copyright by The HDF Group.                                                #
 # All rights reserved.                                                       #
 #                                                                            #
-# This file is part of HSDS (HDF5 REST Server) Service, Libraries and      #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
 # Utilities.  The full HDF5 REST Server copyright notice, including          #
 # terms governing use, modification, and redistribution, is contained in     #
 # the file COPYING, which can be found at the root of the source code        #
@@ -11,52 +11,6 @@
 ##############################################################################
 
 import time
-from .hdf5dtype import getTypeItem
-
-"""
-# standard compress filters
-_HDF_FILTERS = {
-    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
-    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
-    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
-    4: {
-        "class": "H5Z_FILTER_SZIP",
-        "alias": "szip",
-        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
-    },
-    5: {"class": "H5Z_FILTER_NBIT"},
-    6: {
-        "class": "H5Z_FILTER_SCALEOFFSET",
-        "alias": "scaleoffset",
-        "options": ["scaleType", "scaleOffset"],
-    },
-    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
-}
-
-_HDF_FILTER_OPTION_ENUMS = {
-    "coding": {
-        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
-        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
-    },
-    "scaleType": {
-        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
-        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
-        h5py.h5z.SO_INT: "H5Z_SO_INT",
-    },
-}
-
-# h5py supported filters
-_H5PY_FILTERS = {
-    "gzip": 1,
-    "shuffle": 2,
-    "fletcher32": 3,
-    "szip": 4,
-    "scaleoffset": 6,
-    "lzf": 32000,
-}
-
-_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-"""
 
 def resize_dataset(dset_json, shape):
     shape_json = dset_json["shape"]
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index e1194264..352d6794 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -111,8 +111,7 @@ def make_dirty(self, obj_id):
             # object deleted, just return
             return
         obj_json = self.db[obj_id]
-        now = time.time()
-        obj_json["lastModified"] = now
+        obj_json["lastModified"] = time.time()
         self._dirty_objects.add(obj_id)
 
 
@@ -120,10 +119,25 @@ def flush(self):
         """ write out any changes """
         if not self.writer:
             return  # nothing to do
-        if self.writer.flush():
-            # reset new and dirty sets
-            self._new_objects = set()
-            self._dirty_objects = set()
+        
+        print("self._new_objects:", self._new_objects)
+        print("self._dirty_objects:", self._dirty_objects)
+        obj_ids = self._new_objects.union(self._dirty_objects)
+        print(f"hdf5db_flush {len(obj_ids)} objects")
+
+        if not self.writer.flush():
+            # flush not successful, don't clear dirty set
+            return  
+
+
+        for obj_id in obj_ids:
+            obj_json = self._db[obj_id]
+            if "values" in obj_json:
+                obj_json["values"] = []
+
+        # reset new and dirty sets
+        self._new_objects = set()
+        self._dirty_objects = set()
            
     def close(self):
         """ close reader and writer handles """
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 59865d5c..247098a8 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -47,8 +47,7 @@ def _createGroup(self, parent, grp_json, name=None):
         grp = parent.create_group(name)
         if "links" in grp_json:
             grp_links = grp_json["links"]
-            self._createLinks(grp, grp_links)
-        
+            self._createObjects(grp, grp_links)
 
     def _createDataset(self, parent, dset_json, name=None):
         """ create a dataset object """
@@ -167,8 +166,8 @@ def _createDatatype(self, parent, ctype_json, name=None):
         parent[name] = dtype
 
 
-    def _createLinks(self, parent, links_json):
-        """ create links in the given group """
+    def _createObjects(self, parent, links_json):
+        """ create child object in the given group, recurse for any sub-groups """
         for title in links_json:
             if title in parent:
                 # TBD: this will do the wrong thing if the link tgt has changed
@@ -212,8 +211,27 @@ def _createLinks(self, parent, links_json):
             else:
                 self.log.warning(f"unexpected link class: {link_class}")
 
+    def updateDatasetValues(self, dset_id, dset):
+        """ write any pending dataset values """
+        dset_json = self.db.getObjectById(dset_id)
+        if "updates" not in dset_json:
+            return
+        updates = dset_json["updates"]
+        for (sel, val) in updates:
+            slices = []
+            for dim in range(len(sel.shape)):
+                start = sel.start[dim]
+                stop = start + sel.count[dim]
+                step = sel.step[dim]
+                slices.append(slice(start, stop, step))
+            slices = tuple(slices)  
+            dset[slices] = val
+            self.log.debug(f"h5py_writer dset {dset.name} updated")
+
+
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
+        print(f"h5py_writer.createAttribute {obj.name}: {name}")
 
         dtype = createDataType(attr_json["type"])
         shape_json = attr_json["shape"]
@@ -233,9 +251,11 @@ def createAttribute(self, obj, name, attr_json):
             obj.attrs[name] = arr
 
 
-    def createAttributes(self, obj, obj_json):
-        """ create attributes """
+    def updateAttributes(self, obj_id, obj):
+        """ create/replace any modified attributes """
 
+        obj_json = self.db.getObjectById(obj_id)
+        
         if "attributes" not in obj_json:
             # no attributes
             return
@@ -245,31 +265,31 @@ def createAttributes(self, obj, obj_json):
             attr_json = attrs[name]
             self.createAttribute(obj, name, attr_json)
 
-
-    def visitAttributes(self, path, obj):
-        name = obj.__class__.__name__
-        self.log.info(f"visit: {path} name: {name}")
-
-        obj_json = self.db.getObjectByPath(path)
-        self.createAttributes(obj, obj_json)
-
+ 
     def flush(self):
         """ Write dirty items """
         if not self.db:
             # no db set yet
-            return
-        
+            return False
+   
+        self.log.info("h5py_writer.flush()")
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
         with h5py.File(self._filepath, mode=self._mode) as f:
             root_json = self.db.getObjectById(root_id)
             if "links" in root_json:
                 root_links = root_json["links"]
-                self._createLinks(f, root_links)
-            # update attributes
-            self.createAttributes(f, root_json)
-            f.visititems(self.visitAttributes)
+                self._createObjects(f, root_links)
+            # update attributes, dataset values
+            for obj_id in self._id_map:
+                if self.db.is_dirty(obj_id):
+                    h5path = self._id_map[obj_id]
+                    obj = f[h5path]
+                    self.updateAttributes(obj_id, obj)
+                    self.updateDatasetValues(obj_id, obj)
+
         self._mode = "a"  # use append mode for future updates
+        return True  # all objects written successfully
 
   
     def close(self):
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index effa0e58..5027232e 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -10,40 +10,11 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import os
-import os.path as op
-import stat
 import logging
-import shutil
 from h5json import Hdf5db
 from h5json.reader.h5json_reader import H5JsonReader
 
 
-def getFile(name, tgt, ro=False):
-    src = "data/json/" + name
-    logging.info("copying file to this directory: " + src)
-
-    filepath = "./out/" + tgt
-
-    if op.isfile(filepath):
-        # make sure it's writable, before we copy over it
-        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
-    shutil.copyfile(src, filepath)
-    if ro:
-        logging.info("make read-only")
-        os.chmod(filepath, stat.S_IREAD)
-    return filepath
-
-
-def removeFile(name):
-    try:
-        os.stat(name)
-    except OSError:
-        return
-        # file does not exist
-    os.remove(name)
-
-
 class H5pyReaderTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(H5pyReaderTest, self).__init__(*args, **kwargs)
@@ -64,12 +35,11 @@ def __init__(self, *args, **kwargs):
             self.log.removeHandler(lhStdout)
 
     def testSimple(self):
-        filepath = getFile("tall.json", "tall.json", ro=True)
+        filepath = "data/json/tall.json"
         kwargs = {"app_logger": self.log}
         with Hdf5db(h5_reader=H5JsonReader(filepath, **kwargs), **kwargs) as db:
             root_id = db.getObjectIdByPath("/")
             root_json = db.getObjectById(root_id)
-            print("root_json:", root_json)
 
             root_attrs = root_json["attributes"]
             self.assertEqual(len(root_attrs), 2)
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index c612adc6..b878434e 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -20,31 +20,6 @@
 from h5json.reader.h5py_reader import H5pyReader
 
 
-def getFile(name, tgt, ro=False):
-    src = "data/hdf5/" + name
-    logging.info("copying file to this directory: " + src)
-
-    filepath = "./out/" + tgt
-
-    if op.isfile(filepath):
-        # make sure it's writable, before we copy over it
-        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
-    shutil.copyfile(src, filepath)
-    if ro:
-        logging.info("make read-only")
-        os.chmod(filepath, stat.S_IREAD)
-    return filepath
-
-
-def removeFile(name):
-    try:
-        os.stat(name)
-    except OSError:
-        return
-        # file does not exist
-    os.remove(name)
-
-
 class H5pyReaderTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(H5pyReaderTest, self).__init__(*args, **kwargs)
@@ -65,7 +40,7 @@ def __init__(self, *args, **kwargs):
             self.log.removeHandler(lhStdout)
 
     def testSimple(self):
-        filepath = getFile("tall.h5", "tall.h5", ro=True)
+        filepath = "data/hdf5/tall.h5"
         kwargs = {"app_logger": self.log}
         with Hdf5db(h5_reader=H5pyReader(filepath, **kwargs), **kwargs) as db:
             root_id = db.getObjectIdByPath("/")
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 38447aff..9d595673 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -12,6 +12,7 @@
 import unittest
 import time
 import logging
+import h5py
 import numpy as np
 from h5json import Hdf5db
 from h5json.writer.h5py_writer import H5pyWriter
@@ -46,12 +47,14 @@ def __init__(self, *args, **kwargs):
 
     def testGroup(self):
     
-        with Hdf5db(h5_writer=H5pyWriter("/tmp/foo2.h5", no_data=False), app_logger=self.log) as db:
+        filepath = "test/unit/out/h5py_writer_test_testGroup.h5"
+        with Hdf5db(h5_writer=H5pyWriter(filepath, no_data=False), app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "attr1", value=[1,2,3,4])
             db.createAttribute(root_id, "attr2", 42)
             g1_id = db.createGroup()
             db.createHardLink(root_id, "g1", g1_id)
+            db.createAttribute(g1_id, "a1", "hello")
             g2_id = db.createGroup()
             db.createHardLink(root_id, "g2", g2_id)
 
@@ -69,6 +72,24 @@ def testGroup(self):
             db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
             db.createCustomLink(g2_id, "cust", {"foo": "bar"})
             db.flush()
+            with h5py.File(filepath) as f:
+                self.assertTrue("attr1", f.attrs)
+                self.assertTrue("attr2", f.attrs)
+                self.assertTrue("g1" in f)
+                g1 = f["g1"]
+                self.assertTrue("a1" in g1.attrs)
+                self.assertTrue("g1.1" in g1)
+                g11 = g1["g1.1"]
+                self.assertTrue("dset1.1.1" in g11)
+                dset = g11["dset1.1.1"]
+                self.assertEqual(dset.shape, (10,10))
+                for i in range(10):
+                    for j in range(10):
+                        self.assertEqual(dset[i, j], i*j)
+                self.assertTrue("g2" in f)
+                g2 = f["g2"]
+                self.assertTrue("extlink" in g2)
+                self.assertTrue("slink" in g2)
             
 
 

From 7c393b6c9a01069ba95b75ede407f7ddf5c07b0e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 4 Mar 2025 18:10:26 -0800
Subject: [PATCH 017/129] revert to using members for dtype enums

---
 src/h5json/hdf5db.py             |  8 ++++
 src/h5json/hdf5dtype.py          | 14 ++++--
 src/h5json/reader/h5py_reader.py |  4 +-
 src/h5json/writer/h5py_writer.py | 76 +++++++++++++++++++++-----------
 test/unit/h5py_writer_test.py    | 38 ++++++++++++++++
 test/unit/hdf5dtype_test.py      | 24 ++++++----
 6 files changed, 125 insertions(+), 39 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 352d6794..0d19ef7a 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -99,6 +99,14 @@ def is_dirty(self, obj_id):
             return True
         return obj_id in self._dirty_objects
     
+    @property
+    def new_objects(self):
+        return self._new_objects
+    
+    @property
+    def dirty_objects(self):
+        return self._dirty_objects
+    
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
         if self.is_new(obj_id):
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index be1ffd62..47f53a68 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -424,9 +424,11 @@ def getTypeItem(dt, metadata=None):
         if dt.base.byteorder == ">":
             byteorder = "BE"
         # this mapping is an h5py convention for boolean support
-        mapping = {"FALSE": 0, "TRUE": 1}
+        bool_false = {"name": "FALSE", "value": 0}
+        bool_true = {"name": "TRUE", "value": 1}
+        members = [bool_false, bool_true]
         type_info["class"] = "H5T_ENUM"
-        type_info["mapping"] = mapping
+        type_info["members"] = members
         base_info = {"class": "H5T_INTEGER"}
         base_info["base"] = "H5T_STD_I8" + byteorder
         type_info["base"] = base_info
@@ -456,7 +458,13 @@ def getTypeItem(dt, metadata=None):
             # yes, this is an enum!
             mapping = metadata["enum"]
             type_info["class"] = "H5T_ENUM"
-            type_info["mapping"] = mapping
+            members = []
+            for name in mapping:
+                value = mapping[name]
+                item = {"name": name, "value": value}
+                members.append(item)
+            type_info["members"] = members
+            #type_info["mapping"] = mapping
             if dt.name not in predefined_int_types:
                 raise TypeError("Unexpected integer type: " + dt.name)
             # maps to one of the HDF5 predefined types
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index 2100dec6..cfae72cc 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -96,7 +96,7 @@ def getAttribute(self, obj_id, name, include_data=True):
             addr = h5py.h5o.get_info(typeid).addr
             type_uuid = self.getObjIdByAddress(addr)
             committedType = self._id_map[type_uuid]
-            type_item = committedType["type"].copy()
+            type_item = getTypeItem(committedType.dtype)
             type_item["id"] = type_uuid
         else:
             type_item = getTypeItem(attrObj.dtype)
@@ -353,7 +353,7 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         elif isinstance(h5obj, h5py.Dataset):
             obj_json = self._getDataset(h5obj)
         elif isinstance(h5obj, h5py.Datatype):
-            obj_json = self._getDataType(h5obj)
+            obj_json = self._getDatatype(h5obj)
         else:
             raise TypeError(f"unexpected object type: {type(h5obj)}")
         
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 247098a8..b932b2f5 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -45,9 +45,8 @@ def __init__(
     def _createGroup(self, parent, grp_json, name=None):
         """ create the group and any links it contains """
         grp = parent.create_group(name)
-        if "links" in grp_json:
-            grp_links = grp_json["links"]
-            self._createObjects(grp, grp_links)
+        return grp
+
 
     def _createDataset(self, parent, dset_json, name=None):
         """ create a dataset object """
@@ -156,7 +155,8 @@ def _createDataset(self, parent, dset_json, name=None):
                         else:
                             self.log.info(f"Unexpected filter name: {filter_alias}, ignoring")
                             
-        parent.create_dataset(name, **kwargs)
+        dset = parent.create_dataset(name, **kwargs)
+        return dset
 
     def _createDatatype(self, parent, ctype_json, name=None):
         """ create a datatype object """
@@ -164,50 +164,76 @@ def _createDatatype(self, parent, ctype_json, name=None):
         type_item = ctype_json["type"]
         dtype = createDataType(type_item)
         parent[name] = dtype
+        return parent[name]
 
 
-    def _createObjects(self, parent, links_json):
+    def _createObjects(self, parent, links_json, visited=set()):
         """ create child object in the given group, recurse for any sub-groups """
+
         for title in links_json:
-            if title in parent:
-                # TBD: this will do the wrong thing if the link tgt has changed
-                continue
+            #if title in parent:
+            #    # TBD: this will do the wrong thing if the link tgt has changed
+            #    continue
             link_json = links_json[title]
             link_class = link_json["class"]
-            if link_class == "H5L_TYPE_SOFT":
+            if link_class == "H5L_TYPE_SOFT" and title not in parent:
                 h5path = link_json["h5path"]
                 parent[title] = h5py.SoftLink(h5path)
-            elif link_class == "H5L_TYPE_EXTERNAL":
+            elif link_class == "H5L_TYPE_EXTERNAL" and title not in parent:
                 h5path = link_json["h5path"]
                 filename = link_json["file"]
                 parent[title] = h5py.ExternalLink(filename, h5path)
-            elif link_class == "H5L_TYPE_USER_DEFINED":
+            elif link_class == "H5L_TYPE_USER_DEFINED" and title not in parent:
                 self.log.warning("unable to create user-defined link: {title}")
             elif link_class == "H5L_TYPE_HARD":
                 tgt_id = link_json["id"]
+                """
+                if tgt_id in visited:
+                    # we've already processed this object
+                    if title not in parent:
+                        if tgt_id in self._id_map:
+                            tgt_obj = self._id_map[tgt_id]
+                            parent[title] = tgt_obj
+                    else:
+                        self.log.warning("h5py_writer - expected to find {tgt_id} in id_map")
+                    continue
+                """
+                
+                collection = getCollectionForId(tgt_id)
+
+                obj_json = self.db.getObjectById(tgt_id)
+            
                 if tgt_id in self._id_map:
+                    # object has already been created
                     tgt_path = self._id_map[tgt_id]
                     tgt_obj = parent[tgt_path]
-                    parent[title] = tgt_obj
+                    if title not in parent:
+                        parent[title] = tgt_obj
+                    if collection == "groups" and tgt_id not in visited:
+                        # recurse over sub-objects to pick up any new links
+                        grp_links = obj_json["links"]
+                        visited.add(tgt_id)
+                        self._createObjects(tgt_obj, grp_links, visited=visited)
                 else:
-                    obj_json = self.db.getObjectById(tgt_id)
                     parent_path = parent.name
                     if parent_path[-1] != '/':
                         parent_path += '/'
                     self._id_map[tgt_id] = parent_path + title
-                    collection = getCollectionForId(tgt_id)
                     kwds = {"name": title}
                     if collection == "groups":
-                        tgt_obj = self._createGroup(parent, obj_json, **kwds)
+                        tgt_grp = self._createGroup(parent, obj_json, **kwds)
+                        if "links" in obj_json:
+                            grp_links = obj_json["links"]
+                            visited.add(tgt_id)
+                            self._createObjects(tgt_grp, grp_links, visited=visited)
                     elif collection == "datasets":
-                        tgt_obj = self._createDataset(parent, obj_json, **kwds)
+                        self._createDataset(parent, obj_json, **kwds)
                     elif collection == "datatypes":
-                        tgt_obj = self._createDatatype(parent, obj_json, **kwds)
+                        self._createDatatype(parent, obj_json, **kwds)
                     else:
                         self.log.warning(f"unexpected collection: {collection}")
-                        tgt_obj = None
-                    if tgt_obj:
-                        parent[title] = tgt_obj
+                visited.add(tgt_id)
+
             else:
                 self.log.warning(f"unexpected link class: {link_class}")
 
@@ -231,7 +257,6 @@ def updateDatasetValues(self, dset_id, dset):
 
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
-        print(f"h5py_writer.createAttribute {obj.name}: {name}")
 
         dtype = createDataType(attr_json["type"])
         shape_json = attr_json["shape"]
@@ -276,10 +301,11 @@ def flush(self):
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
         with h5py.File(self._filepath, mode=self._mode) as f:
-            root_json = self.db.getObjectById(root_id)
-            if "links" in root_json:
-                root_links = root_json["links"]
-                self._createObjects(f, root_links)
+            if self.db.new_objects:
+                root_json = self.db.getObjectById(root_id)
+                if "links" in root_json:
+                    root_links = root_json["links"]
+                    self._createObjects(f, root_links, visited=set(root_id))
             # update attributes, dataset values
             for obj_id in self._id_map:
                 if self.db.is_dirty(obj_id):
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 9d595673..3d81011c 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -90,6 +90,44 @@ def testGroup(self):
                 g2 = f["g2"]
                 self.assertTrue("extlink" in g2)
                 self.assertTrue("slink" in g2)
+
+            db.createAttribute(g1_id, "a2", "bye-bye")
+            db.flush()
+
+            with h5py.File(filepath) as f:
+                g1 = f["g1"]
+                self.assertEqual(len(g1.attrs), 2)
+                self.assertTrue("a1" in g1.attrs)
+                self.assertTrue("a2" in g1.attrs)
+
+            print("create group /g2/g2.1")
+            g21 = db.createGroup()
+            db.createHardLink(g2_id, "g2.1", g21)
+            db.flush()
+
+            with h5py.File(filepath) as f:
+                g2 = f["g2"]
+                self.assertTrue("g2.1" in g2)
+            
+            sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+            arr = np.zeros((), dtype=np.int32)
+            arr[()] = 42
+            db.setDatasetValues(dset_111_id, sel, arr)
+            db.flush()
+
+            with h5py.File(filepath) as f:
+                dset = f["/g1/g1.1/dset1.1.1"]
+                for i in range(10):
+                    for j in range(10):
+                        if i == 4 and j == 4:
+                            # this is the one element that was updated
+                            expected = 42
+                        else:
+                            expected = i * j
+                        self.assertEqual(dset[i, j], expected)
+
+
+            
             
 
 
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index dbc806bb..63efc239 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -125,8 +125,10 @@ def testBaseEnumTypeItem(self):
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("mapping" in typeItem)
-        self.assertEqual(typeItem["mapping"]["GREEN"], 1)
+        self.assertTrue("members" in typeItem)
+        members = typeItem["members"]
+        expected = [{'name': 'RED', 'value': 0}, {'name': 'GREEN', 'value': 1}, {'name': 'BLUE', 'value': 2}]
+        self.assertEqual(members, expected)
         self.assertEqual(typeSize, 1)
 
     def testBaseBoolTypeItem(self):
@@ -136,11 +138,11 @@ def testBaseBoolTypeItem(self):
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("mapping" in typeItem)
-        mapping = typeItem["mapping"]
-        self.assertEqual(len(mapping), 2)
-        self.assertEqual(mapping["FALSE"], 0)
-        self.assertEqual(mapping["TRUE"], 1)
+        self.assertTrue("members" in typeItem)
+        members = typeItem["members"]
+        self.assertEqual(len(members), 2)
+        self.assertEqual(members[0], {"name": "FALSE", "value": 0})
+        self.assertEqual(members[1], {"name": "TRUE", "value": 1})
         self.assertEqual(typeSize, 1)
 
     def testBaseArrayTypeItem(self):
@@ -205,8 +207,12 @@ def testEnumArrayTypeItem(self):
         self.assertEqual(typeItem["dims"], (2, 3))
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_ENUM")
-        self.assertTrue("mapping" in baseItem)
-        self.assertEqual(baseItem["mapping"]["GREEN"], 1)
+        self.assertTrue("members" in baseItem)
+        members = baseItem["members"]
+        self.assertEqual(len(members), 3)
+        self.assertEqual(members[0], {"name": "RED", "value": 0})
+        self.assertEqual(members[1], {"name": "GREEN", "value": 1})
+        self.assertEqual(members[2], {"name": "BLUE", "value": 2})
         self.assertTrue("base" in baseItem)
         basePrim = baseItem["base"]
         self.assertEqual(basePrim["class"], "H5T_INTEGER")

From 825fc89f3522929611a35cdebb1cfdcd0ed89a1f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 6 Mar 2025 22:00:14 -0800
Subject: [PATCH 018/129] add support for reference types

---
 src/h5json/h5py_util.py          | 108 +++++++++++++++++++++++++
 src/h5json/hdf5db.py             |  22 ++++++
 src/h5json/hdf5dtype.py          |   4 +-
 src/h5json/objid.py              |  19 ++---
 src/h5json/writer/h5py_writer.py | 131 +++++++++++++++++++++++++++----
 test/unit/h5py_writer_test.py    | 113 +++++++++++++++++++++-----
 6 files changed, 355 insertions(+), 42 deletions(-)
 create mode 100644 src/h5json/h5py_util.py

diff --git a/src/h5json/h5py_util.py b/src/h5json/h5py_util.py
new file mode 100644
index 00000000..22df9ee0
--- /dev/null
+++ b/src/h5json/h5py_util.py
@@ -0,0 +1,108 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import h5py
+import numpy as np
+
+from . import hdf5dtype
+
+def is_reference(val):
+    """ Return True if the type or value is a Reference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "Reference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "Reference":
+        return True
+ 
+    return False
+
+
+def is_regionreference(val):
+    """ Return True if the type or value is a RegionReference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "RegionReference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "RegionReference":
+        return True
+
+    return False
+
+
+def has_reference(dtype):
+    """ return True if the dtype (or a sub-type) is a Reference type """
+    has_ref = False
+    if not isinstance(dtype, np.dtype):
+        return False
+    if len(dtype) > 0:
+        for name in dtype.fields:
+            item = dtype.fields[name]
+            if has_reference(item[0]):
+                has_ref = True
+                break
+    elif dtype.metadata and "ref" in dtype.metadata:
+        basedt = dtype.metadata["ref"]
+        has_ref = is_reference(basedt)
+    elif dtype.metadata and "vlen" in dtype.metadata:
+        basedt = dtype.metadata["vlen"]
+        has_ref = has_reference(basedt)
+    return has_ref
+
+
+def convert_dtype(srcdt, to_h5py=True):
+    """Return a dtype based on input dtype, converting any Reference types from
+    h5py style to h5pyd and vice-versa.
+    """
+
+    if len(srcdt) > 0:
+        fields = []
+        for name in srcdt.fields:
+            item = srcdt.fields[name]
+            # item is a tuple of dtype and integer offset
+            field_dt = convert_dtype(item[0], to_h5py=to_h5py)
+            fields.append((name, field_dt))
+        tgt_dt = np.dtype(fields)
+    else:
+        # check if this a "special dtype"
+        if srcdt.metadata and "ref" in srcdt.metadata:
+            ref = srcdt.metadata["ref"]
+            if is_reference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.Reference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.Reference)
+            elif is_regionreference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.RegionReference)
+            else:
+                msg = f"Unexpected ref type: {srcdt}"
+                raise TypeError(msg)
+        elif srcdt.metadata and "vlen" in srcdt.metadata:
+            src_vlen = srcdt.metadata["vlen"]
+            if isinstance(src_vlen, np.dtype):
+                tgt_base = convert_dtype(src_vlen, to_h5py=to_h5py)
+            else:
+                tgt_base = src_vlen
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=tgt_base)
+            else:
+                tgt_dt = h5pyd.special_dtype(vlen=tgt_base)
+        elif srcdt.kind == "U":
+            # use vlen for unicode strings
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=str)
+            else:
+                tgt_dt = hdf5dtype.special_dtype(vlen=str)
+        else:
+            tgt_dt = srcdt
+    return tgt_dt
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 0d19ef7a..cc9d4220 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -79,11 +79,29 @@ def reader(self):
         """ return reader instance """
         return self._reader
     
+    @reader.setter
+    def reader(self, value: H5Reader):
+        """ set the reader """
+        if self._reader:
+            self._reader.close()
+        self._reader = value
+        if self._reader:
+            self._reader.set_db(self)
+            
     @property
     def writer(self):
         """ return writer instance """
         return self._writer
     
+    @writer.setter
+    def writer(self, value: H5Reader):
+        """ set the writer """
+        if self._writer:
+            self._writer.close()
+        self._writer = value
+        if self._writer:
+            self._writer.set_db(self)
+    
     @property
     def root_id(self):
         """ return root uuid """
@@ -321,8 +339,12 @@ def getAttributeValue(self, obj_id, name):
         else:
             dims = shape_json["dims"]
         dtype = createDataType(attr_json["type"])
+        print("getAttributeValue dtype, metadata:", dtype.metadata)
+
         value = attr_json["value"]
         arr = jsonToArray(dims, dtype, value)
+        print("getAttributeValue returning arr.dtype, metadata:", arr.dtype.metadata)
+
         return arr
 
 
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index 47f53a68..acbb2d21 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -17,6 +17,7 @@
 numpy_integer_types = (np.int8, np.uint8, np.int16, np.int16, np.int32, np.uint32, np.int64, np.uint64)
 numpy_float_types = (np.float16, np.float32, np.float64)
 
+
 class Reference:
     """
     Represents an HDF5 object reference
@@ -743,7 +744,7 @@ def createBaseDataType(typeItem):
                 type_code = "S"
             elif typeItem["charSet"] == "H5T_CSET_UTF8":
                 # use the same type_code as ascii strings
-                # (othewise, numpy will reserve bytes for UTF32 representation)
+                # (otherwise, numpy will reserve bytes for UTF32 representation)
                 type_code = "S"
             else:
                 raise TypeError("unexpected 'charSet' value")
@@ -804,6 +805,7 @@ def createBaseDataType(typeItem):
             raise KeyError("'base' not provided")
         if typeItem["base"] == "H5T_STD_REF_OBJ":
             dtRet = special_dtype(ref=Reference)
+            print("special dtype, metadata:", dtRet.metadata)
         elif typeItem["base"] == "H5T_STD_REF_DSETREG":
             dtRet = special_dtype(ref=RegionReference)
         else:
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index e36e8a22..bd34bc56 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -129,15 +129,6 @@ def getCollectionForId(obj_id):
         raise ValueError(f"{obj_id} not a collection id")
     return collection
 
-def stripId(obj_id):
-    """ return just the base id without any prefix (e.g. 'g-') """
-    if len(obj_id) == UUID_LEN:
-        return obj_id  # just return as is
-    if len(obj_id) == UUID_LEN + 2:
-        return obj_id[2:]
-    else:
-        raise ValueError("unexpected obj_id: {obj_id}")
-
 
 def isRootObjId(id):
     """returns true if this is a root id (only for v2 schema)"""
@@ -494,3 +485,13 @@ def getUuidFromId(id):
         return id[2:]
     else:
         raise ValueError(f"Unexpected obj_id: {id}")
+
+def stripId(obj_id):
+    """ return just the base id without any prefix (e.g. 'g-') """
+    if len(obj_id) == UUID_LEN:
+        return obj_id  # just return as is
+    if len(obj_id) == UUID_LEN + 2:
+        return obj_id[2:]
+    else:
+        raise ValueError("unexpected obj_id: {obj_id}")
+    
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index b932b2f5..07717ddf 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -10,9 +10,11 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import h5py
+import numpy as np
 
-from ..objid import getCollectionForId
+from ..objid import getCollectionForId, isValidUuid
 from ..hdf5dtype import createDataType
+from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
 from ..array_util import jsonToArray
 from .. import filters
 from .h5writer import H5Writer
@@ -39,9 +41,108 @@ def __init__(
         else:
             self._mode = "w"
 
-        self._f = None
         self._id_map = {}
 
+    
+    def _copy_element(self, val, src_dt, tgt_dt, fout=None):
+        """ convert the given dataset or attribute element to h5py equivalent """
+
+        out = None
+        if len(src_dt) > 0:
+            out_fields = []
+            i = 0
+            for name in src_dt.fields:
+                field_src_dt = src_dt.fields[name][0]
+                field_tgt_dt = tgt_dt.fields[name][0]
+                field_val = val[i]
+                i += 1
+                out_field = self._copy_element(field_val, field_src_dt, field_tgt_dt)
+                out_fields.append(out_field)
+            out = tuple(out_fields)
+        elif src_dt.metadata and "ref" in src_dt.metadata:
+            if not tgt_dt.metadata or "ref" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be ref, but got: {tgt_dt}")
+            ref = tgt_dt.metadata["ref"]
+            if is_reference(ref):
+                # initialize out to null ref
+                out = h5py.Reference()  # null h5py ref
+             
+                if ref and val:
+                    if isinstance(val, bytes):
+                        val = val.decode("ascii")
+                    # strip out collection prefix if present
+                    parts = val.split("/")
+                    obj_uuid = parts[-1]
+                    if not isValidUuid(obj_uuid):
+                        msg = f"invalid uuid: {obj_uuid}"
+                        self.log.warning(msg)
+                    elif obj_uuid not in self._id_map:
+                        self.log.warning(f"ref object {obj_uuid} not found")
+                    else:
+                        h5path = self._id_map[obj_uuid]
+                        try:
+                            obj = fout[h5path]
+                            out = obj.ref
+                        except KeyError:
+                            self.log.warning(f"referenced object: {h5path} not found")
+
+            elif is_regionreference(ref):
+                self.log.warning("region reference not supported")
+                # TBD: just return a null region reference till we have support
+                out = h5py.RegionReference()
+            else:
+                raise TypeError(f"Unexpected ref type: {type(ref)}")
+        elif src_dt.metadata and "vlen" in src_dt.metadata:
+            if not isinstance(val, np.ndarray):
+                raise TypeError(f"Expecting ndarray or vlen element, but got: {type(val)}")
+            if not tgt_dt.metadata or "vlen" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be vlen, but got: {tgt_dt}")
+            src_vlen_dt = src_dt.metadata["vlen"]
+            tgt_vlen_dt = tgt_dt.metadata["vlen"]
+            if has_reference(src_vlen_dt):
+                if len(val.shape) == 0:
+                    # scalar array
+                    e = val[()]
+                    v = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fout=fout)
+                    out = np.array(v, dtype=tgt_dt)
+                else:
+                    out = np.zeros(val.shape, dtype=tgt_dt)
+                    for i in range(len(out)):
+                        e = val[i]
+                        out[i] = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fout=fout)
+            else:
+                # can just directly copy the array
+                out = np.zeros(val.shape, dtype=tgt_dt)
+                out[...] = val[...]
+        else:
+            out = val  # can just copy as is
+        return out
+
+    def _copy_array(self, src_arr, fout=None):
+        """Copy the numpy array to a new array.
+            Convert any reference type to point to item in the target's hierarchy.
+        """
+
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError(f"Expecting ndarray, but got: {src_arr}")
+        tgt_dt = convert_dtype(src_arr.dtype, to_h5py=True)
+        tgt_arr = np.zeros(src_arr.shape, dtype=tgt_dt)
+
+        if has_reference(src_arr.dtype):
+            # flatten array to simplify iteration
+            count = int(np.prod(src_arr.shape))
+            tgt_arr_flat = tgt_arr.reshape((count,))
+            src_arr_flat = src_arr.reshape((count,))
+            for i in range(count):
+                e = src_arr_flat[i]
+                element = self._copy_element(e, src_arr.dtype, tgt_dt, fout=fout)
+                tgt_arr_flat[i] = element
+            tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
+        else:
+            # can just copy the entire array
+            tgt_arr[...] = src_arr[...]
+        return tgt_arr
+
     def _createGroup(self, parent, grp_json, name=None):
         """ create the group and any links it contains """
         grp = parent.create_group(name)
@@ -254,26 +355,28 @@ def updateDatasetValues(self, dset_id, dset):
             dset[slices] = val
             self.log.debug(f"h5py_writer dset {dset.name} updated")
 
+    
 
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
-
-        dtype = createDataType(attr_json["type"])
+    
+        src_dt = createDataType(attr_json["type"])
+         
+        # handle special case of null space attribute here   
         shape_json = attr_json["shape"]
         shape_class = shape_json["class"]
         if shape_class == "H5S_NULL":
-            dims = None
-        elif shape_class == "H5S_SCALAR":
+            obj.attrs[name] = h5py.Empty(convert_dtype(src_dt, to_h5py=True))
+            return
+        
+        if shape_class == "H5S_SCALAR":
             dims = ()
         else:
-            dims = tuple(shape_json["dims"])
-
-        if dims is None:
-            obj.attrs[name] = h5py.Empty(dtype)
-        else:
-            json_value = attr_json["value"]
-            arr = jsonToArray(dims, dtype, json_value)
-            obj.attrs[name] = arr
+            dims = shape_json["dims"]
+        src_arr = jsonToArray(dims, src_dt, attr_json["value"])
+        tgt_arr = self._copy_array(src_arr, fout=obj.file)
+            
+        obj.attrs[name] = tgt_arr
 
 
     def updateAttributes(self, obj_id, obj):
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 3d81011c..75b7e37b 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -45,10 +45,11 @@ def __init__(self, *args, **kwargs):
         self.log.info("init!")
 
 
-    def testGroup(self):
+    def testSimple(self):
     
-        filepath = "test/unit/out/h5py_writer_test_testGroup.h5"
-        with Hdf5db(h5_writer=H5pyWriter(filepath, no_data=False), app_logger=self.log) as db:
+        filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "attr1", value=[1,2,3,4])
             db.createAttribute(root_id, "attr2", 42)
@@ -126,15 +127,11 @@ def testGroup(self):
                             expected = i * j
                         self.assertEqual(dset[i, j], expected)
 
-
-            
-            
-
-
-
     def testNullSpaceAttribute(self):
 
+        filepath = "test/unit/out/h5py_writer_test_testNullSpaceAttribute.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
             item = db.getAttribute(root_id, "A1")
@@ -145,9 +142,17 @@ def testNullSpaceAttribute(self):
             self.assertTrue(item["created"] > time.time() - 1.0)
             value = db.getAttributeValue(root_id, "A1")
             self.assertEqual(value, None)
+            db.flush()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            self.assertEqual(f.attrs["A1"], h5py.Empty(dtype=np.int32))
 
     def testScalarAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testNullScalarAttribute.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             dims = ()
             value = 42
@@ -165,13 +170,21 @@ def testScalarAttribute(self):
             self.assertTrue(item["created"] > now - 1)
             shape = item["shape"]
             self.assertEqual(shape["class"], "H5S_SCALAR")
-
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, np.int32))
+            self.assertEqual(a1, 42)
             
 
     def testFixedStringAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testFixedStringAttribute.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             value = "Hello, world!"
             db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
@@ -186,14 +199,23 @@ def testFixedStringAttribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
-            ret_value = db.getAttributeValue(root_id, "A1")
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, bytes))
+            self.assertEqual(a1, b'Hello, world!')
        
 
     def testVlenAsciiAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenAsciiAttribute.h5"
+        value = b"Hello, world!"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
  
-            value = b"Hello, world!"
             dt = special_dtype(vlen=bytes)
 
             # write the attribute
@@ -211,11 +233,21 @@ def testVlenAsciiAttribute(self):
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
 
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, str))
+            self.assertEqual(a1, value.decode("ascii"))
+
     def testVlenUtf8Attribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenUtf8Attribute.h5"
+        value = "one: \u4e00"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
  
-            value = b"Hello, world!"
             dt = special_dtype(vlen=str)
 
             # write the attribute
@@ -229,15 +261,25 @@ def testVlenUtf8Attribute(self):
             self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
             self.assertEqual(item_type["length"], "H5T_VARIABLE")
             self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item["value"], "Hello, world!")
+            self.assertEqual(item["value"], value)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, str))
+            self.assertEqual(a1, value)
  
 
     def testIntAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testIntAttribute.h5"
+        value = [2, 3, 5, 7, 11]
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
-            value = [2, 3, 5, 7, 11]
             db.createAttribute(root_id, "A1", value, dtype=np.int16)
             item = db.getAttribute(root_id, "A1")
             self.assertEqual(item["value"], [2, 3, 5, 7, 11])
@@ -250,8 +292,20 @@ def testIntAttribute(self):
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I16LE")
 
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, np.ndarray))
+            self.assertEqual(a1.shape, (5,))
+            for i in range(5):
+                self.assertEqual(a1[i], value[i])
+ 
+
     def testCreateReferenceAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCreateReferenceAttribute.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
 
             dset_id = db.createDataset(shape=(), dtype=np.int32)                
@@ -262,19 +316,28 @@ def testCreateReferenceAttribute(self):
             ds1_ref = "datasets/" + dset_id
             value = [ds1_ref,]
             db.createAttribute(root_id, "A1", value, dtype=dt)
-            item = db.getAttribute(root_id, "A1")
             attr = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in attr)
             
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
-            attr_value = item["value"]
+            attr_value = db.getAttributeValue(root_id, "A1")
             self.assertEqual(len(attr_value), 1)
-            self.assertEqual(attr_value[0], ds1_ref)
+            self.assertEqual(attr_value[0], ds1_ref.encode('ascii'))
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            obj_ref = a1[0]
+            obj = f[obj_ref]
+            self.assertEqual(obj.name, "/DS1")
 
     def testCreateVlenReferenceAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenReferenceAttribute.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             dset_id = db.createDataset(shape=(), dtype=np.int32)                
             db.createHardLink(root_id, "DS1", dset_id)
@@ -304,10 +367,24 @@ def testCreateVlenReferenceAttribute(self):
 
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SCALAR")
+
+        print("open:", filepath)
+        with h5py.File(filepath) as f:
+            self.assertTrue("DS1" in f)
+            ds1 = f["DS1"]
+            self.assertTrue("G1" in f)
+            g1 = f["G1"]
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            ref_obj = f[a1[0]]
+            self.assertEqual(ref_obj.name, "/DS1")
             
 
     def testCommittedType(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCommittedType.h5"
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
             dt = np.dtype("S15")
              

From 88fa1eb6b4a8dfc9118c93925b570f7c01d882d2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 25 Mar 2025 18:45:02 +0100
Subject: [PATCH 019/129] support for h5py and json readers and writers

---
 data/hdf5/dset_creationprop.h5     | Bin 8058 -> 7228 bytes
 data/json/nullspace_dset.json      |  34 ------
 setup.cfg                          |   6 +
 src/h5json/array_util.py           |   3 +-
 src/h5json/dset_util.py            |  20 ++-
 src/h5json/filters.py              |   3 +-
 src/h5json/h5py_util.py            |   9 +-
 src/h5json/hdf5db.py               | 156 +++++++++++-------------
 src/h5json/hdf5dtype.py            |  19 ++-
 src/h5json/objid.py                |   4 +-
 src/h5json/reader/h5json_reader.py |  26 ++--
 src/h5json/reader/h5py_reader.py   | 188 ++++++++++++++++++++++-------
 src/h5json/reader/h5reader.py      |  12 +-
 src/h5json/selections.py           |   3 +-
 src/h5json/writer/h5json_writer.py |  56 ++++-----
 src/h5json/writer/h5py_writer.py   |  44 +++----
 src/h5json/writer/h5writer.py      |  10 +-
 test/unit/h5json_writer_test.py    |  38 +++++-
 test/unit/h5py_writer_test.py      |  41 ++++++-
 test/unit/hdf5db_test.py           |  28 +++++
 test/unit/hdf5dtype_test.py        |   2 +
 21 files changed, 429 insertions(+), 273 deletions(-)
 delete mode 100644 data/json/nullspace_dset.json
 create mode 100644 setup.cfg

diff --git a/data/hdf5/dset_creationprop.h5 b/data/hdf5/dset_creationprop.h5
index ff5b7a723a1800126515ab515f1957ef12bddf97..12b7a3265d45fa1c8b36a9e15cb8e27f6c9ea3c7 100644
GIT binary patch
delta 35
mcmexmx5r|F29u4<My-Bk9!3TM1_)4)-nh|>ar18$M{xkBbO@&a

delta 402
zcmdmE@yl+422+*%My-Bk9wr731_)4Kp14tN^KNDd#?8N39K}WbojtsH%otdh8CV%a
z7+4q>%q&ezCX2ADvN3@)NKD))#S&*fuYB?W4qwg;S%@J?le^e0{W7<SnS->m2{JG+
zGO$%Arlb_rny^EZut|Xg`KwbBOA?DyOX3TP@(YS<XURZhWzD=<!8V71Y*q!?Jh_Zp
z-CB~7fk6PM1cZT1AmCsCxg!CH6}W)}5HJI={OdYZuzoj)ejbSa$$VTA!ad<q*9pow
zLG*Dz^eKx2%>;!I$SwtGAOQp>(XPkA<^+S(8cm+asl^+glbM%o6`ca~kzwp)UM@-g
O`<3T42&!TMsR96XQ%-mQ

diff --git a/data/json/nullspace_dset.json b/data/json/nullspace_dset.json
deleted file mode 100644
index 8808f215..00000000
--- a/data/json/nullspace_dset.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-    "apiVersion": "1.1.0",
-    "datasets": {
-        "23d3e919-7b53-11e4-961d-3c15c2da029e": {
-            "alias": [
-                "/DS1"
-            ],
-            "shape": {
-                "class": "H5S_NULL"
-            },
-            "type": {
-                "base": "H5T_STD_I32LE",
-                "class": "H5T_INTEGER"
-            },
-            "value": null
-        }
-    },
-    "groups": {
-        "23d2e06b-7b53-11e4-9910-3c15c2da029e": {
-            "alias": [
-                "/"
-            ],
-            "links": [
-                {
-                    "class": "H5L_TYPE_HARD",
-                    "collection": "datasets",
-                    "id": "23d3e919-7b53-11e4-961d-3c15c2da029e",
-                    "title": "DS1"
-                }
-            ]
-        }
-    },
-    "root": "23d2e06b-7b53-11e4-9910-3c15c2da029e"
-}
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..b2f3e822
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+max-line-length = 120
+# E402: module level import not at top of file
+# C901: too complex
+# F401: unused exports are necessary in __init__.py
+ignore = E402, C901, F401
diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index bef4587e..67c847c3 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -491,6 +491,7 @@ def arrayToBytes(arr, encoding=None):
         data = encodeData(data)
     return data
 
+
 def bytesToArray(data, dt, shape, encoding=None):
     """
     Create numpy array based on byte representation
@@ -522,7 +523,7 @@ def bytesToArray(data, dt, shape, encoding=None):
 
     return arr
 
-  
+
 def getNumpyValue(value, dt=None, encoding=None):
     """
     Return value as numpy type for given dtype and encoding
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 6cd51c3d..5b10323f 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -12,17 +12,31 @@
 
 import time
 
+
 def resize_dataset(dset_json, shape):
     shape_json = dset_json["shape"]
     shape_class = shape_json["class"]
     if shape_class != "H5S_SIMPLE":
         raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
-    if len(shape_class["dims"]) != len(shape):
+    if len(shape_json["dims"]) != len(shape):
         raise ValueError("Resize shape parameter doesn't match dataset's rank")
+    if "maxdims" not in shape_json:
+        raise ValueError("Dataset is not resizable")
+    dims = shape_json["dims"]
+    maxdims = shape_json["maxdims"]
+
     if shape_json["dims"] == list(shape):
         # no change, just return
         return
+    for i in range(len(dims)):
+        extent = shape[i]
+        if extent < 0:
+            raise ValueError("dimensions can't be negative")
+        if maxdims[i] == "H5S_UNLIMITED":
+            # any positive extent is ok
+            continue
+        if extent > maxdims[i]:
+            raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
+
     shape_json["dims"] = list(shape)
     dset_json["modified"] = time.time()
-        
-         
\ No newline at end of file
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index e6511366..cda38178 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -9,7 +9,7 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
- 
+
 import h5py
 
 _HDF_FILTERS = {
@@ -53,4 +53,3 @@
 }
 
 _H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-
diff --git a/src/h5json/h5py_util.py b/src/h5json/h5py_util.py
index 22df9ee0..ebe2dbdb 100644
--- a/src/h5json/h5py_util.py
+++ b/src/h5json/h5py_util.py
@@ -15,6 +15,7 @@
 
 from . import hdf5dtype
 
+
 def is_reference(val):
     """ Return True if the type or value is a Reference """
 
@@ -22,8 +23,8 @@ def is_reference(val):
         return True
     elif isinstance(val, type) and val.__name__ == "Reference":
         return True
- 
-    return False
+    else:
+        return False
 
 
 def is_regionreference(val):
@@ -59,7 +60,7 @@ def has_reference(dtype):
 
 def convert_dtype(srcdt, to_h5py=True):
     """Return a dtype based on input dtype, converting any Reference types from
-    h5py style to h5pyd and vice-versa.
+    h5py style to h5json and vice-versa.
     """
 
     if len(srcdt) > 0:
@@ -96,7 +97,7 @@ def convert_dtype(srcdt, to_h5py=True):
             if to_h5py:
                 tgt_dt = h5py.special_dtype(vlen=tgt_base)
             else:
-                tgt_dt = h5pyd.special_dtype(vlen=tgt_base)
+                tgt_dt = hdf5dtype.special_dtype(vlen=tgt_base)
         elif srcdt.kind == "U":
             # use vlen for unicode strings
             if to_h5py:
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index cc9d4220..5c7e37a6 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -28,7 +28,7 @@ class Hdf5db:
     and Datatypes).  By default all data is held in-memory.  Initialize with h5_reader to read from
     an HDF5 compatible storage pool, and or, h5_writer to write to an HDF5 compatible storage pool.
     """
-     
+
     @staticmethod
     def getVersionInfo():
         versionInfo = {}
@@ -39,7 +39,7 @@ def __init__(
         self,
         h5_reader: H5Reader = None,
         h5_writer: H5Writer = None,
-        app_logger = None,
+        app_logger=None,
     ):
         if app_logger:
             self.log = app_logger
@@ -50,10 +50,10 @@ def __init__(
 
         self._reader = h5_reader
         self._writer = h5_writer
-        
+
         self._new_objects = set()  # set of obj_id's
         self._dirty_objects = set()  # set of obj_id's
-    
+
         if self._reader:
             root_id = self._reader.get_root_id()
             group_json = self._reader.getObjectById(root_id)
@@ -65,7 +65,7 @@ def __init__(
 
         if self._writer:
             self._writer.set_db(self)
-        
+
         self._db[root_id] = group_json
         self._root_id = root_id
 
@@ -73,12 +73,12 @@ def __init__(
     def db(self):
         """ return object db dictionary """
         return self._db
-    
+
     @property
     def reader(self):
         """ return reader instance """
         return self._reader
-    
+
     @reader.setter
     def reader(self, value: H5Reader):
         """ set the reader """
@@ -87,44 +87,44 @@ def reader(self, value: H5Reader):
         self._reader = value
         if self._reader:
             self._reader.set_db(self)
-            
+
     @property
     def writer(self):
         """ return writer instance """
         return self._writer
-    
+
     @writer.setter
-    def writer(self, value: H5Reader):
+    def writer(self, value: H5Writer):
         """ set the writer """
         if self._writer:
             self._writer.close()
         self._writer = value
         if self._writer:
             self._writer.set_db(self)
-    
+
     @property
     def root_id(self):
         """ return root uuid """
         return self._root_id
-    
+
     def is_new(self, obj_id):
         """ return true if this is a new object (has not been persisted) """
         return obj_id in self._new_objects
-    
+
     def is_dirty(self, obj_id):
         """ return true if this object has been modified """
         if self.is_new(obj_id):
             return True
         return obj_id in self._dirty_objects
-    
+
     @property
     def new_objects(self):
         return self._new_objects
-    
+
     @property
     def dirty_objects(self):
         return self._dirty_objects
-    
+
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
         if self.is_new(obj_id):
@@ -140,21 +140,16 @@ def make_dirty(self, obj_id):
         obj_json["lastModified"] = time.time()
         self._dirty_objects.add(obj_id)
 
-
     def flush(self):
         """ write out any changes """
         if not self.writer:
             return  # nothing to do
-        
-        print("self._new_objects:", self._new_objects)
-        print("self._dirty_objects:", self._dirty_objects)
+
         obj_ids = self._new_objects.union(self._dirty_objects)
-        print(f"hdf5db_flush {len(obj_ids)} objects")
 
         if not self.writer.flush():
             # flush not successful, don't clear dirty set
-            return  
-
+            return
 
         for obj_id in obj_ids:
             obj_json = self._db[obj_id]
@@ -164,12 +159,12 @@ def flush(self):
         # reset new and dirty sets
         self._new_objects = set()
         self._dirty_objects = set()
-           
+
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
         self.flush()
-        if self.writer:                         
+        if self.writer:
             self.writer.close()
         if self.reader:
             self.reader.close()
@@ -185,7 +180,6 @@ def __exit__(self, type, value, traceback):
         """ called on package exit """
         self.log.info("Hdf5db __exit")
         self.close()
-         
 
     def getObjectById(self, obj_id):
         """ return object with given id """
@@ -210,7 +204,7 @@ def getObjectIdByPath(self, h5path, parent_id=None):
         if parent_id is None:
             parent_id = self.root_id
         self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}")
-        
+
         obj_json = self.getObjectById(parent_id)
         if obj_json is None:
             self.log.warning("getObjectIdDByPath - parent_id not found")
@@ -261,12 +255,12 @@ def getObjectIdByPath(self, h5path, parent_id=None):
                 self.log.warning(f"get_bypath {h5path} not found")
                 raise KeyError(h5path)
         return obj_id
-    
+
     def getObjectByPath(self, path):
         """ Get Object JSON at given path """
         obj_id = self.getObjectIdByPath(path)
         obj_json = self.getObjectById(obj_id)
-        return obj_json    
+        return obj_json
 
     def getDtype(self, obj_id):
         """ Return numpy data type for given object id """
@@ -277,11 +271,10 @@ def getDtype(self, obj_id):
             # group id?
             raise TypeError(f"{obj_id} does not have a datatype")
         type_json = obj_json["type"]
-        
+
         dtype = createDataType(type_json)
         return dtype
- 
- 
+
     def getAttribute(self, obj_id, name, includeData=True):
         """
         Get attribute given an object id and name
@@ -290,28 +283,20 @@ def getAttribute(self, obj_id, name, includeData=True):
 
         obj_json = self.getObjectById(obj_id)
         attrs = obj_json["attributes"]
-        
+
         if name not in attrs:
             msg = f"Attribute: [{name }] not found in object: {obj_id}"
             self.log.info(msg)
             return None
-        if attrs[name] == None:
+        if attrs[name] is None:
             msg = f"Attribute: [{name}] has been deleted"
             self.log.info(None)
             return None
-        
+
         attr_json = attrs[name]
 
-        if includeData and "value" not in attr_json:
-            # Reader may not have pre-loaded large attributes
-            # fetch it now
-            if not self.reader:
-                raise RuntimeError(f"Expected to find value for attribute {name} of {obj_id}")
-            attr_json = self.reader.get_attribute(obj_id, name)
-            attr_json["value"] = attr_json  # this will update the _db
-        
         return attr_json
-    
+
     def getAttributes(self, obj_id):
         """
         Get attributes given an object id and name
@@ -322,11 +307,11 @@ def getAttributes(self, obj_id):
         attrs = obj_json["attributes"]
         names = []
         for name in attrs:
-            if attrs[name] != None:
+            if attrs[name] is not None:
                 names.append(name)
-         
+
         return names
-    
+
     def getAttributeValue(self, obj_id, name):
         """ Return NDArray of the given attribute value """
         attr_json = self.getAttribute(obj_id, name)
@@ -339,20 +324,17 @@ def getAttributeValue(self, obj_id, name):
         else:
             dims = shape_json["dims"]
         dtype = createDataType(attr_json["type"])
-        print("getAttributeValue dtype, metadata:", dtype.metadata)
 
         value = attr_json["value"]
         arr = jsonToArray(dims, dtype, value)
-        print("getAttributeValue returning arr.dtype, metadata:", arr.dtype.metadata)
 
         return arr
 
-
     def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         """
         create an attribute - will override any existing attributes
         """
-        
+
         # TBD: if dtype is a committed ref type, fetch it first
         # TBD: also, check special case for complex types
 
@@ -367,7 +349,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
             type_json["id"] = ctype_id
             dtype = createDataType(type_json)
 
-        # First, make sure we have a NumPy array.   
+        # First, make sure we have a NumPy array
         if isinstance(value, Reference) and dtype is None:
             dtype = special_dtype(ref=Reference)
         if shape == "H5S_NULL":
@@ -383,7 +365,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
                 dtype = value.dtype
             else:
                 dtype = np.dtype(dtype)  # In case a string, e.g. 'i8' is passed
- 
+
         # Where a top-level array type is requested, we have to do some
         # fiddling around to present the data as a smaller array of
         # sub-arrays.
@@ -443,7 +425,6 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         # mark object as dirty
         self.make_dirty(obj_id)
 
-
     def deleteAttribute(self, obj_id, name):
         """ delete the given attribute """
         obj_json = self.getObjectById(obj_id)
@@ -451,9 +432,8 @@ def deleteAttribute(self, obj_id, name):
         if name not in attrs_json:
             raise KeyError(f"attribute [{name}] not found in {obj_id}")
         attrs_json[name] = None  # mark key for deletion
-        
-        self.make_dirty(obj_id)
 
+        self.make_dirty(obj_id)
 
     def getDatasetValues(self, dset_id, sel):
         """
@@ -466,22 +446,23 @@ def getDatasetValues(self, dset_id, sel):
         shape_json = dset_json["shape"]
         if not isinstance(sel, selections.Selection):
             raise TypeError("Expected Selection class")
-       
+
         if shape_json["class"] == "H5S_NULL":
             return None
 
         if shape_json["class"] == "H5S_SCALAR":
-            if sel.select_type != sel.H5S_SELECT_ALL:
+            if sel.select_type != selections.H5S_SELECT_ALL:
                 # TBD: support other selection types
                 raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
             if sel.shape != ():
                 raise ValueError("Selection shape does not match dataset shape")
+            rank = 0
         else:
             dims = tuple(shape_json["dims"])
             if sel.shape != dims:
                 raise ValueError("Selection shape does not match dataset shape")
-        rank = len(dims)  
-            
+            rank = len(dims)
+
         dtype = self.getDtype(dset_id)
         if self.reader:
             arr = self.reader.getDatasetValues(dset_id, sel)
@@ -506,7 +487,7 @@ def getDatasetValues(self, dset_id, sel):
                 arr[slices] = update_val
 
         return arr
-    
+
     def setDatasetValues(self, dset_id, sel, arr):
         """
         Write the given ndarray to the dataset using the selection
@@ -538,17 +519,15 @@ def setDatasetValues(self, dset_id, sel, arr):
         updates.append((sel, arr.copy()))
         self.make_dirty(dset_id)
 
-
     def resizeDataset(self, dset_id, shape):
         """
         Resize existing Dataset
         """
         self.log.info(f"resizeDataset {dset_id}, {shape}")
-        
+
         dset_json = self.getObjectById(dset_id)  # will throw exception if not found
         if resize_dataset(dset_json, shape):
             self._dirty_objects.add(dset_id)
-         
 
     def deleteObject(self, obj_id):
         """ Delete the given object """
@@ -558,14 +537,13 @@ def deleteObject(self, obj_id):
         if obj_id == self.root_id:
             raise KeyError("Root group cannot be deleted")
         self.db[obj_id] = None
-        
+
         if obj_id in self._new_objects:
             self._new_objects.remove(obj_id)
 
         if obj_id in self._dirty_objects:
             self._dirty_objects.remove(obj_id)
 
-        
     def getLinks(self, grp_id):
         """ Get the links for the given group """
         grp_json = self.getObjectById(grp_id)
@@ -574,30 +552,30 @@ def getLinks(self, grp_id):
         links = grp_json["links"]
         names = []
         for name in links:
-            if links[name] != None:
+            if links[name] is not None:
                 names.append(name)
         return names
-      
+
     def getLink(self, grp_id, name):
         """ Get the given link """
-        
+
         obj_json = self.getObjectById(grp_id)
         links = obj_json["links"]
         if name not in links:
             self.log.info(f"Link [{name}] not found in {grp_id}")
             return None
-        if links[name] == None:
+        if links[name] is None:
             self.log.info(f"Link {name} in {grp_id} has been deleted")
             return None
 
         return links[name]
-    
+
     def _addLink(self, grp_id, name, link_json):
         obj_json = self.getObjectById(grp_id)
         links = obj_json["links"]
         links[name] = link_json
         self.make_dirty(grp_id)
-    
+
     def createHardLink(self, grp_id, name, tgt_id):
         """ Create a new hardlink """
         link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id}
@@ -622,7 +600,7 @@ def createExternalLink(self, grp_id, name, h5path, filepath):
         link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath}
         link_json["created"] = time.time()
         self._addLink(grp_id, name, link_json)
- 
+
     def deleteLink(self, grp_id, name):
         """ Delete the given link """
         grp_json = self.getObjectById(grp_id)
@@ -633,7 +611,6 @@ def deleteLink(self, grp_id, name):
             raise KeyError(f"Link [{name}] not found in {grp_id}")
         links[name] = None  # mark for deletion
         self.make_dirty(grp_id)
- 
 
     def createGroup(self, cpl=None):
         """ Create a new group """
@@ -648,7 +625,6 @@ def createGroup(self, cpl=None):
         self.db[grp_id] = group_json
         self._new_objects.add(grp_id)
         return grp_id
-    
 
     def createCommittedType(self, datatype, cpl=None):
         """
@@ -658,7 +634,7 @@ def createCommittedType(self, datatype, cpl=None):
         self.log.info("createCommittedType")
         if cpl is None:
             cpl = {}
-         
+
         ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id)
         if isinstance(datatype, np.dtype):
             dt = datatype
@@ -672,11 +648,11 @@ def createCommittedType(self, datatype, cpl=None):
         self.db[ctype_id] = ctype_json
         self._new_objects.add(ctype_id)
         return ctype_id
-  
-    
+
     def createDataset(
         self,
         shape=None,
+        maxdims=None,
         dtype=None,
         cpl=None,
     ):
@@ -687,25 +663,34 @@ def createDataset(
         type_json = getTypeItem(dtype)
         if shape == "H5S_NULL":
             shape_json = {"class": "H5S_NULL"}
+        elif shape == ():
+            shape_json = {"class": "H5S_SCALAR"}
         else:
             shape_json = {"class": "H5S_SIMPLE"}
             shape_json["dims"] = list(shape)
 
+        if maxdims:
+            if shape_json["class"] != "H5S_SIMPLE":
+                raise ValueError("only simple shapes can be resizable")
+            if len(shape) != len(maxdims):
+                raise ValueError("maxdims length not equal to shape rank")
+            shape_json["maxdims"] = ["H5S_UNLIMITED" if x is None else x for x in maxdims]
+
         dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
         if cpl:
             dset_json["cpl"] = cpl
         else:
             dset_json["cpl"] = {}
- 
-        dset_id = createObjId("datasets", root_id=self.root_id)   
-        self.db[dset_id] = dset_json 
+
+        dset_id = createObjId("datasets", root_id=self.root_id)
+        self.db[dset_id] = dset_json
         self._new_objects.add(dset_id)
         return dset_id
 
     def getCollection(self, col_type=None):
         obj_ids = []
         for obj_id in self.db:
-            if self.db[obj_id] == None:
+            if self.db[obj_id] is None:
                 # skip deleted objects
                 continue
             if not col_type or getCollectionForId(obj_id) == col_type:
@@ -717,7 +702,7 @@ def __len__(self):
         count = 0
         for obj_id in self.db:
             # skip deleted objects
-            if self.db[obj_id] != None:
+            if self.db[obj_id] is not None:
                 count += 1
         return count
 
@@ -725,12 +710,11 @@ def __iter__(self):
         """ Iterate over object ids """
 
         for obj_id in self.db:
-            if self.db[obj_id] == None:
+            if self.db[obj_id] is None:
                 # skip deleted objects
                 continue
             yield obj_id
 
-
     def __contains__(self, obj_id):
         """ Test if a obj id  exists """
-        return obj_id in self.db and self.db[obj_id] != None
+        return obj_id in self.db and self.db[obj_id] is not None
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index acbb2d21..cd3c6a45 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -152,7 +152,6 @@ def special_dtype(**kwds):
     raise TypeError(f'Unknown special type "{name}"')
 
 
-
 def find_item_type(data):
     """Find the item type of a simple object or collection of objects.
 
@@ -182,6 +181,7 @@ def find_item_type(data):
         return None
     return item_types.pop()
 
+
 def guess_dtype(data):
     """ Attempt to guess an appropriate dtype for the object, returning None
     if nothing is appropriate (or if it should be left up the the array
@@ -197,6 +197,7 @@ def guess_dtype(data):
 
     return None
 
+
 def is_float16_dtype(dt):
     if dt is None:
         return False
@@ -204,6 +205,7 @@ def is_float16_dtype(dt):
     dt = np.dtype(dt)  # normalize strings -> np.dtype objects
     return dt.kind == 'f' and dt.itemsize == 2
 
+
 def check_dtype(**kwds):
     """Check a dtype for h5py special type "hint" information.  Only one
     keyword may be given.
@@ -307,7 +309,7 @@ def getTypeItem(dt, metadata=None):
         "float32": "H5T_IEEE_F32",
         "float64": "H5T_IEEE_F64",
     }
-    
+
     dt = np.dtype(dt)  # convert 'int32', np.int32, etc. to a dtype
 
     if not metadata and dt.metadata:
@@ -465,7 +467,6 @@ def getTypeItem(dt, metadata=None):
                 item = {"name": name, "value": value}
                 members.append(item)
             type_info["members"] = members
-            #type_info["mapping"] = mapping
             if dt.name not in predefined_int_types:
                 raise TypeError("Unexpected integer type: " + dt.name)
             # maps to one of the HDF5 predefined types
@@ -505,6 +506,17 @@ def isVlen(dt):
     return is_vlen
 
 
+def isOpaqueDtype(dt):
+    """
+    Return True if this is an opaque dtype
+    """
+    if dt.kind == "V" and len(dt) <= 1 and len(dt.shape) == 0 and not dt.names:
+        return True
+    if dt.metadata and dt.metadata.get('h5py_opaque'):
+        return True
+    return False
+
+
 def getItemSize(typeItem):
     """
     Get size of an item in bytes.
@@ -805,7 +817,6 @@ def createBaseDataType(typeItem):
             raise KeyError("'base' not provided")
         if typeItem["base"] == "H5T_STD_REF_OBJ":
             dtRet = special_dtype(ref=Reference)
-            print("special dtype, metadata:", dtRet.metadata)
         elif typeItem["base"] == "H5T_STD_REF_DSETREG":
             dtRet = special_dtype(ref=RegionReference)
         else:
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index bd34bc56..a5453641 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -117,7 +117,7 @@ def getCollectionForId(obj_id):
     """return groups/datasets/datatypes based on id"""
     if not isinstance(obj_id, str):
         raise ValueError("invalid object id")
-    
+
     collection = None
     if obj_id.startswith("g-"):
         collection = "groups"
@@ -486,6 +486,7 @@ def getUuidFromId(id):
     else:
         raise ValueError(f"Unexpected obj_id: {id}")
 
+
 def stripId(obj_id):
     """ return just the base id without any prefix (e.g. 'g-') """
     if len(obj_id) == UUID_LEN:
@@ -494,4 +495,3 @@ def stripId(obj_id):
         return obj_id[2:]
     else:
         raise ValueError("unexpected obj_id: {obj_id}")
-    
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
index 44d178a5..6666587c 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/reader/h5json_reader.py
@@ -18,14 +18,13 @@
 from ..array_util import jsonToArray
 from .. import selections
 from ..h5reader import H5Reader
-  
+
 
 class H5JsonReader(H5Reader):
     """
     This class can be used by HDF5DB to read content from an hdf5-json file
     """
 
-
     def __init__(
         self,
         filepath,
@@ -55,7 +54,7 @@ def close(self):
     def get_root_id(self):
         """ Return root id """
         return self._root_id
-    
+
     def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         """ return object with given id """
         collection = getCollectionForId(obj_id)
@@ -84,8 +83,14 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
                     continue
                 name = item["name"]
                 attr = {}
-                for k in ("type", "shape", "value"):
-                    attr[k] = item[k]
+                if "type" not in item:
+                    raise KeyError(f"expected to find type key for attribute {name} of {obj_id}")
+                attr["type"] = item["type"]
+                if "shape" not in item:
+                    raise KeyError(f"expected to find shape key for attribute {name} of {obj_id}")
+                attr["shape"] = item["shape"]
+                if "value" in item:
+                    attr["value"] = item["value"]
                 attrs[name] = attr
             resp["attributes"] = attrs
 
@@ -122,7 +127,6 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
 
         return resp
 
-  
     def getAttribute(self, obj_id, name, includeData=True):
         """
         Get attribute given an object id and name
@@ -140,7 +144,6 @@ def getAttribute(self, obj_id, name, includeData=True):
             self.log.info(f"attr: [{name}] of {obj_id} not found")
             return None
         return attributes[name]
-        
 
     def getDatasetValues(self, obj_id, sel=None):
         """
@@ -175,12 +178,5 @@ def getDatasetValues(self, obj_id, sel=None):
             arr = arr[sel.slices]
         else:
             raise NotImplementedError("selection type not supported")
-        
-        return arr
-            
-
-
-        
-  
-       
 
+        return arr
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py
index cfae72cc..7042a259 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/reader/h5py_reader.py
@@ -13,31 +13,130 @@
 import numpy as np
 import logging
 
-from ..objid import createObjId
-from ..hdf5dtype import getTypeItem
+from ..objid import createObjId, getCollectionForId
+from ..hdf5dtype import getTypeItem, isOpaqueDtype
 from ..array_util import bytesArrayToList
 from .. import selections
 from .. import filters
+
+from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
 from .h5reader import H5Reader
-  
+
 
 class H5pyReader(H5Reader):
     """
-    This class can be used by HDF5DB to read content from an HDF5 file (using h5py) 
+    This class can be used by HDF5DB to read content from an HDF5 file (using h5py)
     """
 
+    def _copy_element(self, val, src_dt, tgt_dt, fin=None):
+        """ convert the given dataset or attribute element from h5py to h5json equivalent """
+
+        out = None
+        if len(src_dt) > 0:
+            out_fields = []
+            i = 0
+            for name in src_dt.fields:
+                field_src_dt = src_dt.fields[name][0]
+                field_tgt_dt = tgt_dt.fields[name][0]
+                field_val = val[i]
+                i += 1
+                out_field = self._copy_element(field_val, field_src_dt, field_tgt_dt, fin=fin)
+                out_fields.append(out_field)
+            out = tuple(out_fields)
+        elif src_dt.metadata and "ref" in src_dt.metadata:
+            if not tgt_dt.metadata or "ref" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be ref, but got: {tgt_dt}")
+            ref = tgt_dt.metadata["ref"]
+            if is_reference(ref):
+                # initialize out to null ref
+                out = h5py.Reference()  # null h5py ref
+
+                if ref and val:
+                    try:
+                        fin_obj = fin[val]
+                    except AttributeError as ae:
+                        msg = f"Unable able to get obj for ref value: {ae}"
+                        self.log.error(msg)
+                        raise ValueError(msg)
+
+                    addr = h5py.h5o.get_info(fin_obj.id).addr
+                    if addr not in self._addr_map:
+                        msg = f"No object found for ref object: {fin_obj.name}"
+                        self.log.warning(msg)
+                        out = ""
+                    else:
+                        obj_id = self._addr_map[addr]
+                        collection = getCollectionForId(obj_id)
+                        out = f"{collection}/{obj_id}"
+
+            elif is_regionreference(ref):
+                self.log.warning("region reference not supported")
+                # TBD: just return a null region reference till we have support
+                out = ""
+            else:
+                raise TypeError(f"Unexpected ref type: {type(ref)}")
+        elif src_dt.metadata and "vlen" in src_dt.metadata:
+            if not isinstance(val, np.ndarray):
+                raise TypeError(f"Expecting ndarray or vlen element, but got: {type(val)}")
+            if not tgt_dt.metadata or "vlen" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be vlen, but got: {tgt_dt}")
+            src_vlen_dt = src_dt.metadata["vlen"]
+            tgt_vlen_dt = tgt_dt.metadata["vlen"]
+            if has_reference(src_vlen_dt):
+                if len(val.shape) == 0:
+                    # scalar array
+                    e = val[()]
+                    v = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fin=fin)
+                    out = np.array(v, dtype=tgt_dt)
+                else:
+                    out = np.zeros(val.shape, dtype=tgt_dt)
+                    for i in range(len(out)):
+                        e = val[i]
+                        out[i] = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fin=fin)
+            else:
+                # can just directly copy the array
+                out = np.zeros(val.shape, dtype=tgt_dt)
+                out[...] = val[...]
+        else:
+            out = val  # can just copy as is
+        return out
+
+    def _copy_array(self, src_arr, fin=None):
+        """Copy the numpy array to a new array.
+            Convert any reference type to point to item in the target's hierarchy.
+        """
+
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError(f"Expecting ndarray, but got: {src_arr}")
+        tgt_dt = convert_dtype(src_arr.dtype, to_h5py=False)
+        tgt_arr = np.zeros(src_arr.shape, dtype=tgt_dt)
+
+        if has_reference(src_arr.dtype):
+            # flatten array to simplify iteration
+            count = int(np.prod(src_arr.shape))
+            tgt_arr_flat = tgt_arr.reshape((count,))
+            src_arr_flat = src_arr.reshape((count,))
+            for i in range(count):
+                e = src_arr_flat[i]
+                element = self._copy_element(e, src_arr.dtype, tgt_dt, fin=fin)
+                tgt_arr_flat[i] = element
+            tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
+        else:
+            # can just copy the entire array
+            tgt_arr[...] = src_arr[...]
+        return tgt_arr
+
     def visit(self, path, obj):
         name = obj.__class__.__name__
         self.log.info(f"visit: {path} name: {name}")
-        
+
         obj_id = createObjId(obj_type=name, root_id=self._root_id)  # create uuid
 
-        self._id_map[obj_id] = obj        
-        
+        self._id_map[obj_id] = obj
+
         addr = h5py.h5o.get_info(obj.id).addr
         self._addr_map[addr] = obj_id
 
-
     def __init__(
         self,
         filepath,
@@ -66,13 +165,13 @@ def close(self):
     def get_root_id(self):
         """ Return root id """
         return self._root_id
-    
+
     def getObjIdByAddress(self, addr):
         if addr in self._addr_map:
             return self._addr_map[addr]
         else:
             return None
-    
+
     def getAttribute(self, obj_id, name, include_data=True):
         """ Return JSON for the given attribute """
 
@@ -117,7 +216,7 @@ def getAttribute(self, obj_id, name, include_data=True):
         item["shape"] = shape_item
         if shape_item["class"] == "H5S_NULL":
             include_data = False
-        elif isinstance(type_item, dict) and type_item["class"] in ("H5T_OPAQUE"):
+        elif isinstance(type_item, dict) and type_item["class"] == "H5T_OPAQUE":
             # TBD - don't include data for OPAQUE until JSON serialization
             # issues are addressed
             include_data = False
@@ -126,13 +225,18 @@ def getAttribute(self, obj_id, name, include_data=True):
 
         if include_data:
             try:
-                data = obj.attrs[name] 
+                data = obj.attrs[name]
+                # convert from h5py to h5json
+                data = self._copy_array(data, fin=obj.file)
             except TypeError:
                 self.log.warning("type error reading attribute")
 
         if include_data and data is not None:
-            item["value"] = bytesArrayToList(data)
-             
+            value = bytesArrayToList(data)
+            item["value"] = value
+        else:
+            pass  # no data
+
         # timestamps will be added by getAttributeItem()
         return item
 
@@ -146,7 +250,7 @@ def getAttributes(self, obj_id, include_data=True):
             items[name] = item
 
         return items
-    
+
     def _getLink(self, parent, link_name):
         if link_name not in parent:
             return None
@@ -178,7 +282,7 @@ def _getLink(self, parent, link_name):
                 item["id"] = None
             else:
                 item["id"] = self._addr_map[addr]
-             
+
         return item
 
     def _getLinks(self, grp):
@@ -197,7 +301,7 @@ def _getGroup(self, grp, include_links=True):
             links = self._getLinks(grp)
             item["links"] = links
         return item
-    
+
     def _getDatatype(self, ctype, include_attrs=True):
         self.log.info(f"getDatatype alias: ]{ctype.name}")
         item = {"alias": ctype.name}
@@ -205,7 +309,6 @@ def _getDatatype(self, ctype, include_attrs=True):
 
         return item
 
-
     def _getHDF5DatasetCreationProperties(self, dset, type_class):
         """ Get dataset creation properties maintained by HDF5 library """
 
@@ -267,7 +370,7 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
                 filter_id = filter_info[0]
                 filter_prop["id"] = filter_id
                 if filter_info[3]:
-                    filter_prop["name"] = self.bytesArrayToList(filter_info[3])
+                    filter_prop["name"] = bytesArrayToList(filter_info[3])
                 if filter_id in filters._HDF_FILTERS:
                     hdf_filter = filters._HDF_FILTERS[filter_id]
                     filter_prop["class"] = hdf_filter["class"]
@@ -296,8 +399,8 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
             creationProps["filters"] = filter_props
 
         return creationProps
-    
-    def _getDataset(self, dset):     
+
+    def _getDataset(self, dset):
         self.log.info(f"getDataset alias: [{dset.name}]")
 
         item = {"alias": dset.name}
@@ -308,21 +411,21 @@ def _getDataset(self, dset):
             addr = h5py.h5o.get_info(typeid).addr
             type_uuid = self.getObjIdByAddress(addr)
             committedType = self.getObjectById(type_uuid)
-            typeItem = committedType["type"]
-            typeItem["id"] = type_uuid
+            type_item = committedType["type"]
+            type_item["id"] = type_uuid
         else:
-            typeItem = getTypeItem(dset.dtype)
-        item["type"] = typeItem
-        
-        shapeItem = {}
+            type_item = getTypeItem(dset.dtype)
+        item["type"] = type_item
+
+        shape_item = {}
         if dset.shape is None:
             # new with h5py 2.6, null space datasets will return None for shape
-            shapeItem["class"] = "H5S_NULL"
+            shape_item["class"] = "H5S_NULL"
         elif len(dset.shape) == 0:
-            shapeItem["class"] = "H5S_SCALAR"
+            shape_item["class"] = "H5S_SCALAR"
         else:
-            shapeItem["class"] = "H5S_SIMPLE"
-            shapeItem["dims"] = list(dset.shape)
+            shape_item["class"] = "H5S_SIMPLE"
+            shape_item["dims"] = list(dset.shape)
             maxshape = []
             include_maxdims = False
             for i in range(len(dset.shape)):
@@ -335,14 +438,13 @@ def _getDataset(self, dset):
                         include_maxdims = True
                 maxshape.append(extent)
             if include_maxdims:
-                shapeItem["maxdims"] = maxshape
-        item["shape"] = shapeItem
-
-        item["cpl"] = self._getHDF5DatasetCreationProperties(dset, typeItem["class"])
+                shape_item["maxdims"] = maxshape
+        item["shape"] = shape_item
 
+        item["cpl"] = self._getHDF5DatasetCreationProperties(dset, type_item["class"])
 
         return item
-    
+
     def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         """ return object with given id """
         if obj_id not in self._id_map:
@@ -356,33 +458,35 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
             obj_json = self._getDatatype(h5obj)
         else:
             raise TypeError(f"unexpected object type: {type(h5obj)}")
-        
+
         if include_attrs:
             attributes = self.getAttributes(obj_id)
             obj_json["attributes"] = attributes
 
         return obj_json
 
-
     def getDatasetValues(self, dset_id, sel=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
         number of elements as the rank of the dataset.
         """
+
         dset = self._id_map[dset_id]
         self.log.info(f"getDatasetValues: {dset_id}")
         if dset.shape is None:
             # TBD: return something like h5py.Empty in this case?
             return None
+        if isOpaqueDtype(dset.dtype):
+            # TBD: Opaque data not supported yet
+            return None
         if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
             arr = dset[...]
         elif isinstance(sel, selections.SimpleSelection):
             arr = dset[sel.slices]
         else:
             raise NotImplementedError("selection type not supported")
-        
-        return arr
-
-       
 
+        # convert any h5py references to h5json references
+        arr = self._copy_array(arr, fin=dset.file)
+        return arr
diff --git a/src/h5json/reader/h5reader.py b/src/h5json/reader/h5reader.py
index 3923bb15..377bc3f9 100644
--- a/src/h5json/reader/h5reader.py
+++ b/src/h5json/reader/h5reader.py
@@ -16,11 +16,10 @@
 
 class H5Reader(ABC):
     """
-    This abstract class defines properties and methods that the Hdf5db class uses for reading from an HDF5 
-    compatible storage medium.  
+    This abstract class defines properties and methods that the Hdf5db class uses for reading from an HDF5
+    compatible storage medium.
     """
 
-
     def __init__(
         self,
         filepath,
@@ -31,17 +30,17 @@ def __init__(
             self.log = app_logger
         else:
             self.log = logging.getLogger()
-       
+
     @abstractmethod
     def get_root_id(self):
         """ Return root id """
         pass
 
-    @abstractmethod 
+    @abstractmethod
     def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         """ return object with given id """
         pass
-  
+
     @abstractmethod
     def getAttribute(self, obj_id, name, includeData=True):
         """
@@ -63,4 +62,3 @@ def getDatasetValues(self, obj_id, sel=None):
     def close(self):
         """ close any open handles to the storage """
         pass
-
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index ef296d70..3a94b094 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -115,6 +115,7 @@ def select(obj, args):
     sel[args]
     return sel
 
+
 def intersect(s1, s2):
     """ Return the intersection of two selections """
     # TBD: this is currently only working for simple selections with stride 1
@@ -129,7 +130,7 @@ def intersect(s1, s2):
         raise TypeError("Expected hyperslab selection for second arg")
     if s1.shape != s2.shape:
         raise ValueError("selections have incompatible shapes")
-    
+
     slices = []
     rank = len(s1.shape)
     for dim in range(rank):
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 8c5ce6af..097a1ccc 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -17,10 +17,11 @@
 from ..array_util import bytesArrayToList
 from .. import selections
 
+
 class H5JsonWriter(H5Writer):
     """
-    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5 
-    compatible storage medium.  
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
+    compatible storage medium.
     """
 
     def __init__(
@@ -34,14 +35,14 @@ def __init__(
         self.alias_db = {}
         self.json = {}
         self._root_uuid = None
-       
+
     def flush(self):
         """ Write dirty items """
         # json writer doesn't support incremental updates, so we'll wait
         # for close to write out database
         self.log.info("flush")
         return False
-  
+
     def close(self):
         """ close storage handle """
         self.dumpFile()
@@ -51,8 +52,7 @@ def getAliasList(self, obj_id):
         if obj_id not in self.alias_db:
             self.alias_db[obj_id] = []
         return self.alias_db[obj_id]
-         
-    
+
     def updateAliasList(self):
         """ update the alias list for each object """
         # clear exiting aliases
@@ -62,7 +62,6 @@ def updateAliasList(self):
 
         self._setAlias(self._root_uuid, set(), "/")
 
-     
     def _setAlias(self, obj_id, id_set, h5path):
         """ add the given h5path to the object's alias list
             If the object is a group, recurse through each hard link """
@@ -83,24 +82,23 @@ def _setAlias(self, obj_id, id_set, h5path):
             if link_json["class"] == "H5L_TYPE_HARD":
                 tgt_id = link_json["id"]
                 if tgt_id in id_set:
-                    self.log.info(f"_setAlias - circular loop found")
+                    self.log.info("_setAlias - circular loop found")
                 else:
-                    self._setAlias(tgt_id, id_set, h5path+link_name)
+                    self._setAlias(tgt_id, id_set, f"{h5path}{link_name}")
         id_set.remove(obj_id)
 
-
     def dumpAttribute(self, obj_id, attr_name):
         self.log.info(f"dumpAttribute: [{attr_name}]")
         item = self.db.getAttribute(obj_id, attr_name)
         response = {"name": attr_name}
         response["type"] = item["type"]
         response["shape"] = item["shape"]
-        if True:
-            if "value" not in item:
-                self.log.warning("no value key in attribute: " + attr_name)
-            else:
-                # dump values unless header -D was passed
-                response["value"] = item["value"]  
+
+        if "value" not in item:
+            self.log.warning(f"no value key in attribute: {attr_name}")
+        else:
+            # dump values unless header -D was passed
+            response["value"] = item["value"]
         return response
 
     def dumpAttributes(self, obj_id):
@@ -142,7 +140,7 @@ def dumpGroup(self, obj_id):
 
         alias = self.getAliasList(obj_id)
         response["alias"] = alias
-         
+
         if "cpl" in item:
             item["creationProperties"] = item["cpl"]
         attributes = self.dumpAttributes(obj_id)
@@ -172,11 +170,8 @@ def dumpDataset(self, obj_id):
         response = {}
         self.log.info("dumpDataset: " + obj_id)
         item = self.db.getObjectById(obj_id)
-        if "alias" in item:
-            alias = item["alias"]
-            if alias:
-                self.log.info(f"dumpDataset alias: [{alias[0]}]")
-            response["alias"] = item["alias"]
+        alias = self.getAliasList(obj_id)
+        response["alias"] = alias
 
         response["type"] = item["type"]
         shapeItem = item["shape"]
@@ -217,8 +212,6 @@ def dumpDataset(self, obj_id):
                 sel_all = selections.select(dims, ...)
                 arr = self.db.getDatasetValues(obj_id, sel_all)
                 response["value"] = bytesArrayToList(arr)  # dump values unless header flag was passed
-            else:
-                response["value"] = []  # empty list
         return response
 
     def dumpDatasets(self):
@@ -235,7 +228,8 @@ def dumpDatasets(self):
     def dumpDatatype(self, obj_id):
         response = {}
         item = self.db.getObjectById(obj_id)
-        response["alias"] = item["alias"]
+        alias = self.getAliasList(obj_id)
+        response["alias"] = alias
         response["type"] = item["type"]
         if "cpl" in item:
             response["creationProperties"] = item["cpl"]
@@ -255,7 +249,6 @@ def dumpDatatypes(self):
 
             self.json["datatypes"] = datatypes
 
-
     def dumpFile(self):
         self._root_uuid = self.db.getObjectIdByPath("/")
 
@@ -272,7 +265,10 @@ def dumpFile(self):
 
         self.dumpDatatypes()
 
-        print(json.dumps(self.json, sort_keys=True, indent=4))
-
-
-
+        indent = 4
+        ensure_ascii = False
+        if self._filepath:
+            with open('data.json', 'w', encoding='utf-8') as f:
+                json.dump(self.json, f, ensure_ascii=ensure_ascii, indent=indent)
+        else:
+            print(json.dumps(self.json, sort_keys=True, ensure_ascii=ensure_ascii, indent=indent))
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 07717ddf..2d212102 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -20,13 +20,11 @@
 from .h5writer import H5Writer
 
 
-
 class H5pyWriter(H5Writer):
     """
-    This class saves state from the Hdf5Db class into an HDF5 file.  
+    This class saves state from the Hdf5Db class into an HDF5 file.
     """
 
-
     def __init__(
         self,
         filepath,
@@ -43,7 +41,6 @@ def __init__(
 
         self._id_map = {}
 
-    
     def _copy_element(self, val, src_dt, tgt_dt, fout=None):
         """ convert the given dataset or attribute element to h5py equivalent """
 
@@ -66,7 +63,7 @@ def _copy_element(self, val, src_dt, tgt_dt, fout=None):
             if is_reference(ref):
                 # initialize out to null ref
                 out = h5py.Reference()  # null h5py ref
-             
+
                 if ref and val:
                     if isinstance(val, bytes):
                         val = val.decode("ascii")
@@ -148,7 +145,6 @@ def _createGroup(self, parent, grp_json, name=None):
         grp = parent.create_group(name)
         return grp
 
-
     def _createDataset(self, parent, dset_json, name=None):
         """ create a dataset object """
 
@@ -175,7 +171,7 @@ def _createDataset(self, parent, dset_json, name=None):
                         msg = "fillvalue has incorrect number of elements"
                         self.log.warning(msg)
                         raise ValueError(msg)
-                    
+
                     fillvalue = jsonToArray((), dtype, fillvalue)
 
                 kwargs["fillvalue"] = fillvalue
@@ -255,7 +251,7 @@ def _createDataset(self, parent, dset_json, name=None):
                             kwargs["scaleoffset"] = filter_prop["scaleOffset"]
                         else:
                             self.log.info(f"Unexpected filter name: {filter_alias}, ignoring")
-                            
+
         dset = parent.create_dataset(name, **kwargs)
         return dset
 
@@ -267,14 +263,10 @@ def _createDatatype(self, parent, ctype_json, name=None):
         parent[name] = dtype
         return parent[name]
 
-
     def _createObjects(self, parent, links_json, visited=set()):
         """ create child object in the given group, recurse for any sub-groups """
 
         for title in links_json:
-            #if title in parent:
-            #    # TBD: this will do the wrong thing if the link tgt has changed
-            #    continue
             link_json = links_json[title]
             link_class = link_json["class"]
             if link_class == "H5L_TYPE_SOFT" and title not in parent:
@@ -299,11 +291,11 @@ def _createObjects(self, parent, links_json, visited=set()):
                         self.log.warning("h5py_writer - expected to find {tgt_id} in id_map")
                     continue
                 """
-                
+
                 collection = getCollectionForId(tgt_id)
 
                 obj_json = self.db.getObjectById(tgt_id)
-            
+
                 if tgt_id in self._id_map:
                     # object has already been created
                     tgt_path = self._id_map[tgt_id]
@@ -351,55 +343,51 @@ def updateDatasetValues(self, dset_id, dset):
                 stop = start + sel.count[dim]
                 step = sel.step[dim]
                 slices.append(slice(start, stop, step))
-            slices = tuple(slices)  
+            slices = tuple(slices)
             dset[slices] = val
             self.log.debug(f"h5py_writer dset {dset.name} updated")
 
-    
-
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
-    
+
         src_dt = createDataType(attr_json["type"])
-         
-        # handle special case of null space attribute here   
+
+        # handle special case of null space attribute here
         shape_json = attr_json["shape"]
         shape_class = shape_json["class"]
         if shape_class == "H5S_NULL":
             obj.attrs[name] = h5py.Empty(convert_dtype(src_dt, to_h5py=True))
             return
-        
+
         if shape_class == "H5S_SCALAR":
             dims = ()
         else:
             dims = shape_json["dims"]
         src_arr = jsonToArray(dims, src_dt, attr_json["value"])
         tgt_arr = self._copy_array(src_arr, fout=obj.file)
-            
-        obj.attrs[name] = tgt_arr
 
+        obj.attrs[name] = tgt_arr
 
     def updateAttributes(self, obj_id, obj):
         """ create/replace any modified attributes """
 
         obj_json = self.db.getObjectById(obj_id)
-        
+
         if "attributes" not in obj_json:
             # no attributes
             return
-        
+
         attrs = obj_json["attributes"]
         for name in attrs:
             attr_json = attrs[name]
             self.createAttribute(obj, name, attr_json)
 
- 
     def flush(self):
         """ Write dirty items """
         if not self.db:
             # no db set yet
             return False
-   
+
         self.log.info("h5py_writer.flush()")
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
@@ -420,8 +408,6 @@ def flush(self):
         self._mode = "a"  # use append mode for future updates
         return True  # all objects written successfully
 
-  
     def close(self):
         """ close storage handle """
         self.flush()
-
diff --git a/src/h5json/writer/h5writer.py b/src/h5json/writer/h5writer.py
index 4e57048f..aaab2e51 100644
--- a/src/h5json/writer/h5writer.py
+++ b/src/h5json/writer/h5writer.py
@@ -16,11 +16,10 @@
 
 class H5Writer(ABC):
     """
-    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5 
-    compatible storage medium.  
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
+    compatible storage medium.
     """
 
-
     def __init__(
         self,
         filepath,
@@ -38,9 +37,7 @@ def __init__(
         else:
             self.log = logging.getLogger()
 
-    
     def set_db(self, db):
-        #TBD - use weak ref?
         self._db_ref = weakref.ref(db)
 
     @property
@@ -53,9 +50,8 @@ def db(self):
     def flush(self):
         """ Write dirty items """
         pass
-  
+
     @abstractmethod
     def close(self):
         """ close storage handle """
         pass
-
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index df69f029..e6512d7f 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -44,9 +44,12 @@ def __init__(self, *args, **kwargs):
         self.log.info("init!")
 
 
-    def testGroup(self):
+    def testSimple(self):
     
-        with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False), app_logger=self.log) as db:
+        filepath = "test/unit/out/h5json_writer_testSimple.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "attr1", value=[1,2,3,4])
             db.createAttribute(root_id, "attr2", 42)
@@ -72,10 +75,12 @@ def testGroup(self):
             
 
 
-
     def testNullSpaceAttribute(self):
+        
+        filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.h5"
 
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
             item = db.getAttribute(root_id, "A1")
@@ -88,7 +93,10 @@ def testNullSpaceAttribute(self):
             self.assertEqual(value, None)
 
     def testScalarAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testScalarAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             dims = ()
             value = 42
@@ -112,7 +120,10 @@ def testScalarAttribute(self):
             
 
     def testFixedStringAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             value = "Hello, world!"
             db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
@@ -131,7 +142,10 @@ def testFixedStringAttribute(self):
        
 
     def testVlenAsciiAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
  
             value = b"Hello, world!"
@@ -153,7 +167,10 @@ def testVlenAsciiAttribute(self):
             self.assertTrue(item["created"] > now - 1)
 
     def testVlenUtf8Attribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenutf8Attribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
  
             value = b"Hello, world!"
@@ -176,7 +193,10 @@ def testVlenUtf8Attribute(self):
  
 
     def testIntAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testIntAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             value = [2, 3, 5, 7, 11]
             db.createAttribute(root_id, "A1", value, dtype=np.int16)
@@ -192,7 +212,10 @@ def testIntAttribute(self):
             self.assertEqual(item_type["base"], "H5T_STD_I16LE")
 
     def testCreateReferenceAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testCreateReferenceAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
 
             dset_id = db.createDataset(shape=(), dtype=np.int32)                
@@ -215,7 +238,10 @@ def testCreateReferenceAttribute(self):
             self.assertEqual(attr_value[0], ds1_ref)
 
     def testCreateVlenReferenceAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenReferenceAttribute.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             dset_id = db.createDataset(shape=(), dtype=np.int32)                
             db.createHardLink(root_id, "DS1", dset_id)
@@ -248,7 +274,10 @@ def testCreateVlenReferenceAttribute(self):
             
 
     def testCommittedType(self):
+        filepath = "test/unit/out/h5json_writer_testCommittedType.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             dt = np.dtype("S15")
              
@@ -277,7 +306,10 @@ def testCommittedType(self):
 
 
     def testCommittedCompoundType(self):
+        filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
 
             dt_str = special_dtype(vlen=str)
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 75b7e37b..3a8964e0 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -368,7 +368,6 @@ def testCreateVlenReferenceAttribute(self):
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SCALAR")
 
-        print("open:", filepath)
         with h5py.File(filepath) as f:
             self.assertTrue("DS1" in f)
             ds1 = f["DS1"]
@@ -383,16 +382,19 @@ def testCreateVlenReferenceAttribute(self):
     def testCommittedType(self):
 
         filepath = "test/unit/out/h5py_writer_test_testCommittedType.h5"
+        dt = np.dtype("S15")
+
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
-            dt = np.dtype("S15")
              
             ctype_id = db.createCommittedType(dt)
             db.createHardLink(root_id, "ctype", ctype_id)
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
+            db.createHardLink(root_id, "T1", ctype_id)
+
 
             item_type = item["type"]
 
@@ -411,15 +413,30 @@ def testCommittedType(self):
             self.assertEqual(attr_type["length"], 15)
             self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
+        with h5py.File(filepath) as f:
+            self.assertTrue("T1" in f)
+            t1 = f["T1"]
+            self.assertTrue(isinstance(t1, h5py.Datatype))
+            self.assertEqual(t1.dtype, dt)
+
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            print("a1:", a1)
+            self.assertEqual(a1, b"hello, world!")
+
 
     def testCommittedCompoundType(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCommittedCompoundType.h5"
+
         with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
 
             dt_str = special_dtype(vlen=str)
             fields = []
             fields.append(("field_1", np.dtype(">i8")))
-            fields.append(("field_2", ">f8"))
+            fields.append(("field_2", np.dtype(">f8")))
             fields.append(("field_3", np.dtype("S15")))
             fields.append(("field_4", dt_str))
             dt = np.dtype(fields)
@@ -429,6 +446,7 @@ def testCommittedCompoundType(self):
             item = db.getObjectById(ctype_id)
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
+            db.createHardLink(root_id, "T1", ctype_id)
 
             item_type = item["type"]
 
@@ -449,6 +467,23 @@ def testCommittedCompoundType(self):
             
             value = db.getAttributeValue(root_id, "A1")
             self.assertTrue(isinstance(value, np.ndarray))
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("T1" in f)
+            t1 = f["T1"]
+            self.assertTrue(isinstance(t1, h5py.Datatype))
+            print("dtype:", t1.dtype)
+            self.assertEqual(len(t1.dtype), 4)
+            sub_dt = t1.dtype["field_1"]
+            self.assertEqual(sub_dt, np.dtype(">i8"))
+            sub_dt = t1.dtype["field_2"]
+            self.assertEqual(sub_dt, np.dtype(">f8"))
+            sub_dt = t1.dtype["field_3"]
+            self.assertEqual(sub_dt, np.dtype("S15"))
+            sub_dt = t1.dtype["field_4"]
+            self.assertEqual(sub_dt, h5py.special_dtype(vlen=str))
+
+
    
 
 if __name__ == "__main__":
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 8931dd9c..dd6869ec 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -410,6 +410,34 @@ def testScalarDataset(self):
             self.assertEqual(arr.min(), 42)
             self.assertEqual(arr.max(), 42)
 
+    def testResizableDataset(self):
+        with Hdf5db(app_logger=self.log) as db:
+            nrows = 8
+            ncols = 10
+            shape = (nrows, ncols)
+            dtype = np.int32
+            maxdims = (None, ncols*2)
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+            
+            # resize limited dimension
+            db.resizeDataset(dset_id, (nrows, ncols*2))
+
+            # try to go beyond max extent
+            try:
+                db.resizeDataset(dset_id, (nrows, ncols*3))
+                self.assertTrue(False)
+            except ValueError:
+                pass  # expected
+
+            # resize unlimited dimension
+            db.resizeDataset(dset_id, (nrows*10, ncols))
+
+
+            
+
             
 
 
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index 63efc239..2f798378 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -18,6 +18,7 @@
 from h5json.hdf5dtype import check_dtype
 from h5json.hdf5dtype import Reference
 from h5json.hdf5dtype import RegionReference
+from h5json.hdf5dtype import isOpaqueDtype
 
 
 class Hdf5dtypeTest(unittest.TestCase):
@@ -287,6 +288,7 @@ def testCompoundArrayVlenStringTypeItem(self):
 
     def testOpaqueTypeItem(self):
         dt = np.dtype("V200")
+        self.assertTrue(isOpaqueDtype(dt))
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_OPAQUE")

From 541b96663b4c78fade681907429ae6eb2a4a2de8 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 28 Mar 2025 13:45:03 +0100
Subject: [PATCH 020/129] fix for vlen encoding

---
 src/h5json/array_util.py           |  41 ++++----
 src/h5json/hdf5db.py               |   2 +
 src/h5json/writer/h5json_writer.py |   1 -
 src/h5json/writer/h5py_writer.py   |  24 ++---
 test/unit/array_util_test.py       | 162 +++++++++++++++++++++--------
 test/unit/h5json_reader_test.py    |   6 +-
 test/unit/h5json_writer_test.py    |  37 +++----
 test/unit/h5py_reader_test.py      |   6 +-
 test/unit/h5py_writer_test.py      |  50 ++++-----
 test/unit/hdf5db_test.py           |  55 ++++------
 testall.py                         |  11 +-
 11 files changed, 216 insertions(+), 179 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 67c847c3..1640d687 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -30,7 +30,7 @@ def bytesArrayToList(data):
         if len(data.shape) == 0:
             is_list = False
             data = data.tolist()  # tolist will return a scalar in this case
-            if type(data) in (list, tuple):
+            if type(data) in (list, tuple, np.ndarray):
                 is_list = True
             else:
                 is_list = False
@@ -40,7 +40,6 @@ def bytesArrayToList(data):
         is_list = True
     else:
         is_list = False
-
     if is_list:
         out = []
         for item in data:
@@ -71,8 +70,6 @@ def toTuple(rank, data):
         else:
             return tuple(toTuple(rank - 1, x) for x in data)
     else:
-        if isinstance(data, str):
-            data = data.encode("utf8")
         return data
 
 
@@ -124,12 +121,15 @@ def jsonToArray(data_shape, data_dtype, data_json):
     Return numpy array from the given json array.
     """
     def fillVlenArray(rank, data, arr, index):
-        for i in range(len(data)):
-            if rank > 1:
-                index = fillVlenArray(rank - 1, data[i], arr, index)
-            else:
-                arr[index] = data[i]
-                index += 1
+        if arr.shape == ():
+            arr[()] = data
+        else:
+            for i in range(len(data)):
+                if rank > 1:
+                    index = fillVlenArray(rank - 1, data[i], arr, index)
+                else:
+                    arr[index] = data[i]
+                    index += 1
         return index
 
     if data_json is None:
@@ -149,25 +149,26 @@ def fillVlenArray(rank, data, arr, index):
 
     if type(data_json) in (list, tuple):
         converted_data = []
-        if npoints == 1 and len(data_json) == len(data_dtype):
-            converted_data.append(toTuple(0, data_json))
+        if npoints == 1:
+            converted_data = toTuple(np_shape_rank, data_json)
         else:
             converted_data = toTuple(np_shape_rank, data_json)
         data_json = converted_data
-    else:
-        if isinstance(data_json, str):
-            data_json = data_json.encode("utf8")
-        data_json = [data_json,]  # listify
 
     if isVlen(data_dtype):
-        arr = np.zeros((npoints,), dtype=data_dtype)
+        if np_shape_rank == 0 and npoints == 1:
+            arr_shape = ()
+        else:
+            arr_shape = (npoints,)
+        arr = np.zeros(arr_shape, dtype=data_dtype)
         fillVlenArray(np_shape_rank, data_json, arr, 0)
     else:
         try:
             arr = np.array(data_json, dtype=data_dtype)
-        except UnicodeEncodeError as ude:
-            msg = "Unable to encode data"
-            raise ValueError(msg) from ude
+        except UnicodeEncodeError:
+            # Unable to encode data
+            # TBD: look into using surrogate encoding here
+            raise
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
     # numpy is ok with this
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 5c7e37a6..029c6645 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -393,7 +393,9 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
 
                 # We need this to handle special string types.
                 value = np.asarray(value, dtype=dtype)
+
             value_json = bytesArrayToList(value)
+
         else:
             value_json = None
 
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 097a1ccc..bdf59822 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -264,7 +264,6 @@ def dumpFile(self):
         self.dumpDatasets()
 
         self.dumpDatatypes()
-
         indent = 4
         ensure_ascii = False
         if self._filepath:
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 2d212102..68a1f147 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -43,7 +43,6 @@ def __init__(
 
     def _copy_element(self, val, src_dt, tgt_dt, fout=None):
         """ convert the given dataset or attribute element to h5py equivalent """
-
         out = None
         if len(src_dt) > 0:
             out_fields = []
@@ -90,23 +89,24 @@ def _copy_element(self, val, src_dt, tgt_dt, fout=None):
             else:
                 raise TypeError(f"Unexpected ref type: {type(ref)}")
         elif src_dt.metadata and "vlen" in src_dt.metadata:
-            if not isinstance(val, np.ndarray):
-                raise TypeError(f"Expecting ndarray or vlen element, but got: {type(val)}")
             if not tgt_dt.metadata or "vlen" not in tgt_dt.metadata:
                 raise TypeError(f"Expected tgt dtype to be vlen, but got: {tgt_dt}")
             src_vlen_dt = src_dt.metadata["vlen"]
             tgt_vlen_dt = tgt_dt.metadata["vlen"]
+
             if has_reference(src_vlen_dt):
-                if len(val.shape) == 0:
-                    # scalar array
-                    e = val[()]
-                    v = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fout=fout)
-                    out = np.array(v, dtype=tgt_dt)
-                else:
-                    out = np.zeros(val.shape, dtype=tgt_dt)
-                    for i in range(len(out)):
+                if isinstance(val, np.ndarray) and val.shape == ():
+                    val = val[()]
+                if isinstance(val, np.ndarray) or isinstance(val, list) or isinstance(val, tuple):
+                    count = len(val)
+                    out = np.zeros((count,), dtype=tgt_dt)
+                    for i in range(count):
                         e = val[i]
                         out[i] = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fout=fout)
+                else:
+                    # scalar array
+                    v = self._copy_element(val, src_vlen_dt, tgt_vlen_dt, fout=fout)
+                    out = np.array(v, dtype=tgt_dt)
             else:
                 # can just directly copy the array
                 out = np.zeros(val.shape, dtype=tgt_dt)
@@ -119,7 +119,6 @@ def _copy_array(self, src_arr, fout=None):
         """Copy the numpy array to a new array.
             Convert any reference type to point to item in the target's hierarchy.
         """
-
         if not isinstance(src_arr, np.ndarray):
             raise TypeError(f"Expecting ndarray, but got: {src_arr}")
         tgt_dt = convert_dtype(src_arr.dtype, to_h5py=True)
@@ -365,7 +364,6 @@ def createAttribute(self, obj, name, attr_json):
             dims = shape_json["dims"]
         src_arr = jsonToArray(dims, src_dt, attr_json["value"])
         tgt_arr = self._copy_array(src_arr, fout=obj.file)
-
         obj.attrs[name] = tgt_arr
 
     def updateAttributes(self, obj_id, obj):
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index d37c7f5f..f68cbbc8 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -95,6 +95,9 @@ def testGetNumElements(self):
         self.assertEqual(nelements, 80)
 
     def testJsonToArray(self):
+       
+        # simple integer 
+
         dt = np.dtype("i4")
         shape = [4, ]
         data = [0, 2, 4, 6]
@@ -105,50 +108,40 @@ def testJsonToArray(self):
         for i in range(4):
             self.assertEqual(out[i], i * 2)
 
-        # compound type
-        dt = np.dtype([("a", "i4"), ("b", "S5")])
-        shape = [2, ]
-        data = [[4, "four"], [5, "five"]]
+        shape = ()  # scalar
+        data = 42
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertEqual(out[()], 42)  
 
-        self.assertEqual(out.shape, (2,))
-        self.assertTrue(isinstance(out[0], np.void))
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (4, b"four"))
-        self.assertTrue(isinstance(out[1], np.void))
-        e1 = out[1].tolist()
-        self.assertEqual(e1, (5, b"five"))
-
-        shape = [1, ]
-        data = [
-            [6, "six"],
-        ]
-        out = jsonToArray(shape, dt, data)
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (6, b"six"))
-
-        data = [6, "six"]
-        out = jsonToArray(shape, dt, data)
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (6, b"six"))
-
-        # test ascii chars >127
-        dt = np.dtype("S26")
-        data = "extended ascii char 241: " + chr(241)
+        # VLEN Scalar str
+        dt = special_dtype(vlen=str)
+        data = "I'm a string!"
+        shape = []
         out = jsonToArray(shape, dt, data)
-        self.assertEqual(out[0], b'extended ascii char 241: \xc3')
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        val = out[()]
+        self.assertEqual(val, data)
 
-        dt = np.dtype("S12")
-        data = "eight: \u516b"
-        out = jsonToArray(shape, dt, data)
-        self.assertEqual(out[0], b'eight: \xe5\x85\xab')
+        # VLEN one element str
+        dt = special_dtype(vlen=str)
+        data = "I'm a string!"
+        shape = [1,]
+        out = jsonToArray(shape, dt, [data,])
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        val = out[0]
+        self.assertEqual(val, data)
 
         # VLEN ascii
         dt = special_dtype(vlen=bytes)
         data = [b"one", b"two", b"three", b"four", b"five"]
         shape = [5, ]
         out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5,))
         self.assertTrue("vlen" in out.dtype.metadata)
         self.assertEqual(out.dtype.metadata["vlen"], bytes)
         self.assertEqual(out.dtype.kind, "O")
@@ -166,6 +159,7 @@ def testJsonToArray(self):
         ]
         shape = [2,]
         out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
         self.assertTrue("vlen" in out.dtype.metadata)
         self.assertEqual(out.dtype.metadata["vlen"], str)
         self.assertEqual(out.dtype.kind, "O")
@@ -173,21 +167,40 @@ def testJsonToArray(self):
         self.assertEqual(out[0], tuple(data[0]))
         self.assertEqual(out[1], tuple(data[1]))
 
-        # VLEN Scalar str
-        dt = special_dtype(vlen=str)
-        data = "I'm a string!"
-        shape = [1, ]
-        out = jsonToArray(shape, dt, data)
-
+        
         # VLEN unicode
         dt = special_dtype(vlen=bytes)
         data = ["one", "two", "three", "four", "five"]
         shape = [5, ]
         out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
         self.assertTrue("vlen" in out.dtype.metadata)
         self.assertEqual(out.dtype.metadata["vlen"], bytes)
         self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out[2], b"three")
+        self.assertEqual(out[2], "three")  
+
+        # test ascii chars >127
+        dt = np.dtype("S26")
+        shape = []
+        data = "extended ascii char 241: " + chr(241)
+        try:
+            jsonToArray(shape, dt, data)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+          
+        dt = special_dtype(vlen=str)
+        out = jsonToArray(shape, dt, data)  # vlen str should be ok
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data)
+
+        dt = np.dtype("S12")
+        data = "eight: \u516b"
+        try:
+            jsonToArray(shape, dt, data)
+            self.assertTrue(False)
+        except UnicodeEncodeError:
+            pass  # expected
 
         # VLEN data
         dt = special_dtype(vlen=np.dtype("int32"))
@@ -270,6 +283,62 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(e, tuple))
         self.assertEqual(e, (id0, id1, id2))
 
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [2, ]
+        data = [[4, "four"], [5, "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        self.assertEqual(out.shape, (2,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (4, b"four"))
+        self.assertTrue(isinstance(out[1], np.void))
+        e1 = out[1].tolist()
+        self.assertEqual(e1, (5, b"five"))
+
+        # compound with VLEN element
+         
+        dt_str = special_dtype(vlen=str)
+        dt = np.dtype([("a", "i4"), ("b", dt_str)])
+        shape = [1, ]
+        data = [[6, "six"],]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        e0 = out[0]
+
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (6, "six"))
+        shape = []
+        data = [6, "six",]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        e0 = out[()]
+        self.assertEqual(len(e0), 2)
+        self.assertEqual(e0[0], 6)
+        self.assertEqual(e0[1], "six")
+
+        # one element compound
+        shape = [1, ]
+        data = [[6, "six"],]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (6, "six"))
+        
+        # scalar compound
+        shape = []
+        data = [6, "six"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        e0 = out[()].tolist()
+        self.assertEqual(e0, (6, "six")) 
+
         # compound type with array field
         dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])
         shape = [2, ]
@@ -472,8 +541,8 @@ def testToBytes(self):
         #
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": str})
+        arr = np.zeros((4,), dtype=dt)         
+        dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
         arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
         buffer = arrayToBytes(arr)
@@ -499,7 +568,8 @@ def testToBytes(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": bytes})
+         
+        dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
         arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
         buffer = arrayToBytes(arr)
@@ -625,7 +695,8 @@ def testArrToBytesBase64(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": str})
+         
+        dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
         arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
         buffer = arrayToBytes(arr, encoding="base64")
@@ -645,7 +716,8 @@ def testArrToBytesBase64(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": bytes})
+         
+        dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
         arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
         buffer = arrayToBytes(arr, encoding="base64")
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index 5027232e..06946f94 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -61,7 +61,7 @@ def testSimple(self):
             self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
             dset_shape = dset_json["shape"]
             self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(dset_shape["dims"], [10,10])
+            self.assertEqual(dset_shape["dims"], [10, 10])
 
             # try adding an attribute
             db.createAttribute(dset111_id, "attr3", value=42)
@@ -85,7 +85,3 @@ def testSimple(self):
     # setup test files
 
     unittest.main()
-
-
-
-
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index e6512d7f..608f627f 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -43,15 +43,14 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-
     def testSimple(self):
-    
+
         filepath = "test/unit/out/h5json_writer_testSimple.h5"
 
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "attr1", value=[1,2,3,4])
+            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
             db.createAttribute(root_id, "attr2", 42)
             g1_id = db.createGroup()
             db.createHardLink(root_id, "g1", g1_id)
@@ -60,7 +59,7 @@ def testSimple(self):
 
             g1_1_id = db.createGroup()
             db.createHardLink(g1_id, "g1.1", g1_1_id)
-            dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32)
+            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
             arr = np.zeros((10, 10), dtype=np.int32)
             for i in range(10):
                 for j in range(10):
@@ -72,11 +71,9 @@ def testSimple(self):
             db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
             db.createCustomLink(g2_id, "cust", {"foo": "bar"})
             db.flush()
-            
-
 
     def testNullSpaceAttribute(self):
-        
+
         filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.h5"
 
         with Hdf5db(app_logger=self.log) as db:
@@ -117,7 +114,6 @@ def testScalarAttribute(self):
 
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            
 
     def testFixedStringAttribute(self):
         filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.h5"
@@ -139,7 +135,7 @@ def testFixedStringAttribute(self):
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
             ret_value = db.getAttributeValue(root_id, "A1")
-       
+            self.assertEqual(ret_value, b'Hello, world!')
 
     def testVlenAsciiAttribute(self):
         filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.h5"
@@ -147,7 +143,7 @@ def testVlenAsciiAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
- 
+
             value = b"Hello, world!"
             dt = special_dtype(vlen=bytes)
 
@@ -172,7 +168,7 @@ def testVlenUtf8Attribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
- 
+
             value = b"Hello, world!"
             dt = special_dtype(vlen=str)
 
@@ -190,7 +186,6 @@ def testVlenUtf8Attribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
- 
 
     def testIntAttribute(self):
         filepath = "test/unit/out/h5json_writer_testIntAttribute.h5"
@@ -218,7 +213,7 @@ def testCreateReferenceAttribute(self):
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
 
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
 
             dt = special_dtype(ref=Reference)
@@ -229,7 +224,7 @@ def testCreateReferenceAttribute(self):
             item = db.getAttribute(root_id, "A1")
             attr = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in attr)
-            
+
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
@@ -243,14 +238,14 @@ def testCreateVlenReferenceAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
             grp_id = db.createGroup()
             db.createHardLink(root_id, "G1", grp_id)
 
             dt_base = special_dtype(ref=Reference)
             dt = special_dtype(vlen=dt_base)
-             
+
             ds1_ref = "datasets/" + dset_id
             grp_ref = "groups/" + grp_id
             ref_arr = np.zeros((2,), dtype=dt_base)
@@ -258,7 +253,7 @@ def testCreateVlenReferenceAttribute(self):
             ref_arr[1] = grp_ref
             vlen_arr = np.zeros((), dtype=dt)
             vlen_arr[()] = ref_arr
-             
+
             db.createAttribute(root_id, "A1", vlen_arr)
             item = db.getAttribute(root_id, "A1")
 
@@ -271,7 +266,6 @@ def testCreateVlenReferenceAttribute(self):
 
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SCALAR")
-            
 
     def testCommittedType(self):
         filepath = "test/unit/out/h5json_writer_testCommittedType.h5"
@@ -280,7 +274,7 @@ def testCommittedType(self):
             db.writer = H5JsonWriter(filepath, app_logger=self.log)
             root_id = db.getObjectIdByPath("/")
             dt = np.dtype("S15")
-             
+
             ctype_id = db.createCommittedType(dt)
             db.createHardLink(root_id, "ctype", ctype_id)
             item = db.getObjectById(ctype_id)
@@ -304,7 +298,6 @@ def testCommittedType(self):
             self.assertEqual(attr_type["length"], 15)
             self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
-
     def testCommittedCompoundType(self):
         filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.h5"
 
@@ -342,10 +335,10 @@ def testCommittedCompoundType(self):
 
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-            
+
             value = db.getAttributeValue(root_id, "A1")
             self.assertTrue(isinstance(value, np.ndarray))
-   
+
 
 if __name__ == "__main__":
     # setup test files
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index b878434e..c8b14cb4 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -66,7 +66,7 @@ def testSimple(self):
             self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
             dset_shape = dset_json["shape"]
             self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(dset_shape["dims"], [10,10])
+            self.assertEqual(dset_shape["dims"], [10, 10])
 
             # try adding an attribute
             db.createAttribute(dset111_id, "attr3", value=42)
@@ -90,7 +90,3 @@ def testSimple(self):
     # setup test files
 
     unittest.main()
-
-
-
-
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 3a8964e0..38ea8bce 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -44,14 +44,13 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-
     def testSimple(self):
-    
+
         filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "attr1", value=[1,2,3,4])
+            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
             db.createAttribute(root_id, "attr2", 42)
             g1_id = db.createGroup()
             db.createHardLink(root_id, "g1", g1_id)
@@ -61,7 +60,7 @@ def testSimple(self):
 
             g1_1_id = db.createGroup()
             db.createHardLink(g1_id, "g1.1", g1_1_id)
-            dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32)
+            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
             arr = np.zeros((10, 10), dtype=np.int32)
             for i in range(10):
                 for j in range(10):
@@ -83,10 +82,10 @@ def testSimple(self):
                 g11 = g1["g1.1"]
                 self.assertTrue("dset1.1.1" in g11)
                 dset = g11["dset1.1.1"]
-                self.assertEqual(dset.shape, (10,10))
+                self.assertEqual(dset.shape, (10, 10))
                 for i in range(10):
                     for j in range(10):
-                        self.assertEqual(dset[i, j], i*j)
+                        self.assertEqual(dset[i, j], i * j)
                 self.assertTrue("g2" in f)
                 g2 = f["g2"]
                 self.assertTrue("extlink" in g2)
@@ -101,7 +100,6 @@ def testSimple(self):
                 self.assertTrue("a1" in g1.attrs)
                 self.assertTrue("a2" in g1.attrs)
 
-            print("create group /g2/g2.1")
             g21 = db.createGroup()
             db.createHardLink(g2_id, "g2.1", g21)
             db.flush()
@@ -109,7 +107,7 @@ def testSimple(self):
             with h5py.File(filepath) as f:
                 g2 = f["g2"]
                 self.assertTrue("g2.1" in g2)
-            
+
             sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
             arr = np.zeros((), dtype=np.int32)
             arr[()] = 42
@@ -178,7 +176,6 @@ def testScalarAttribute(self):
             a1 = f.attrs["A1"]
             self.assertTrue(isinstance(a1, np.int32))
             self.assertEqual(a1, 42)
-            
 
     def testFixedStringAttribute(self):
 
@@ -205,7 +202,6 @@ def testFixedStringAttribute(self):
             a1 = f.attrs["A1"]
             self.assertTrue(isinstance(a1, bytes))
             self.assertEqual(a1, b'Hello, world!')
-       
 
     def testVlenAsciiAttribute(self):
 
@@ -215,7 +211,7 @@ def testVlenAsciiAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
- 
+
             dt = special_dtype(vlen=bytes)
 
             # write the attribute
@@ -247,7 +243,7 @@ def testVlenUtf8Attribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
- 
+
             dt = special_dtype(vlen=str)
 
             # write the attribute
@@ -270,7 +266,6 @@ def testVlenUtf8Attribute(self):
             a1 = f.attrs["A1"]
             self.assertTrue(isinstance(a1, str))
             self.assertEqual(a1, value)
- 
 
     def testIntAttribute(self):
 
@@ -299,7 +294,6 @@ def testIntAttribute(self):
             self.assertEqual(a1.shape, (5,))
             for i in range(5):
                 self.assertEqual(a1[i], value[i])
- 
 
     def testCreateReferenceAttribute(self):
 
@@ -308,7 +302,7 @@ def testCreateReferenceAttribute(self):
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
 
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
 
             dt = special_dtype(ref=Reference)
@@ -318,7 +312,7 @@ def testCreateReferenceAttribute(self):
             db.createAttribute(root_id, "A1", value, dtype=dt)
             attr = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in attr)
-            
+
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
@@ -339,14 +333,14 @@ def testCreateVlenReferenceAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
             grp_id = db.createGroup()
             db.createHardLink(root_id, "G1", grp_id)
 
             dt_base = special_dtype(ref=Reference)
             dt = special_dtype(vlen=dt_base)
-             
+
             ds1_ref = "datasets/" + dset_id
             grp_ref = "groups/" + grp_id
             ref_arr = np.zeros((2,), dtype=dt_base)
@@ -354,7 +348,7 @@ def testCreateVlenReferenceAttribute(self):
             ref_arr[1] = grp_ref
             vlen_arr = np.zeros((), dtype=dt)
             vlen_arr[()] = ref_arr
-             
+
             db.createAttribute(root_id, "A1", vlen_arr)
             item = db.getAttribute(root_id, "A1")
 
@@ -371,13 +365,14 @@ def testCreateVlenReferenceAttribute(self):
         with h5py.File(filepath) as f:
             self.assertTrue("DS1" in f)
             ds1 = f["DS1"]
+            self.assertTrue(ds1)
             self.assertTrue("G1" in f)
             g1 = f["G1"]
+            self.assertTrue(g1)
             self.assertTrue("A1" in f.attrs)
             a1 = f.attrs["A1"]
             ref_obj = f[a1[0]]
             self.assertEqual(ref_obj.name, "/DS1")
-            
 
     def testCommittedType(self):
 
@@ -387,7 +382,7 @@ def testCommittedType(self):
         with Hdf5db(app_logger=self.log) as db:
             db.writer = H5pyWriter(filepath, no_data=False)
             root_id = db.getObjectIdByPath("/")
-             
+
             ctype_id = db.createCommittedType(dt)
             db.createHardLink(root_id, "ctype", ctype_id)
             item = db.getObjectById(ctype_id)
@@ -395,7 +390,6 @@ def testCommittedType(self):
             self.assertTrue(item["created"] > now - 1)
             db.createHardLink(root_id, "T1", ctype_id)
 
-
             item_type = item["type"]
 
             self.assertEqual(item_type["class"], "H5T_STRING")
@@ -421,9 +415,7 @@ def testCommittedType(self):
 
             self.assertTrue("A1" in f.attrs)
             a1 = f.attrs["A1"]
-            print("a1:", a1)
-            self.assertEqual(a1, b"hello, world!")
-
+            self.assertEqual(a1, b"hello world!")
 
     def testCommittedCompoundType(self):
 
@@ -464,15 +456,13 @@ def testCommittedCompoundType(self):
 
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-            
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertTrue(isinstance(value, np.ndarray))
+            arr = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(arr, np.ndarray))
 
         with h5py.File(filepath) as f:
             self.assertTrue("T1" in f)
             t1 = f["T1"]
             self.assertTrue(isinstance(t1, h5py.Datatype))
-            print("dtype:", t1.dtype)
             self.assertEqual(len(t1.dtype), 4)
             sub_dt = t1.dtype["field_1"]
             self.assertEqual(sub_dt, np.dtype(">i8"))
@@ -484,8 +474,6 @@ def testCommittedCompoundType(self):
             self.assertEqual(sub_dt, h5py.special_dtype(vlen=str))
 
 
-   
-
 if __name__ == "__main__":
     # setup test files
 
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index dd6869ec..cbd7c879 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -42,7 +42,6 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-
     def testGroup(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
@@ -105,13 +104,13 @@ def testGroup(self):
 
             links = db.getLinks(g2_id)
             self.assertEqual(len(links), 3)
-            for title in  "slink", "extlink", "cust":
+            for title in "slink", "extlink", "cust":
                 self.assertTrue(title in links)
 
             db.deleteLink(g2_id, "cust")
             links = db.getLinks(g2_id)
             self.assertEqual(len(links), 2)
-            for title in  "slink", "extlink":
+            for title in "slink", "extlink":
                 self.assertTrue(title in links)
 
             try:
@@ -123,7 +122,6 @@ def testGroup(self):
             ret = db.getLink(g2_id, "not_a_link")
             self.assertTrue(ret is None)
 
-
     def testNullSpaceAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
@@ -159,7 +157,6 @@ def testScalarAttribute(self):
 
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            
 
     def testFixedStringAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
@@ -179,12 +176,11 @@ def testFixedStringAttribute(self):
             self.assertTrue(item["created"] > now - 1)
             ret_value = db.getAttributeValue(root_id, "A1")
             self.assertEqual(ret_value, value.encode("ascii"))
-       
 
     def testVlenAsciiAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
- 
+
             value = b"Hello, world!"
             dt = special_dtype(vlen=bytes)
 
@@ -206,7 +202,7 @@ def testVlenAsciiAttribute(self):
     def testVlenUtf8Attribute(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
- 
+
             value = b"Hello, world!"
             dt = special_dtype(vlen=str)
 
@@ -224,7 +220,6 @@ def testVlenUtf8Attribute(self):
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
             self.assertTrue(item["created"] > now - 1)
- 
 
     def testIntAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
@@ -246,7 +241,7 @@ def testCreateReferenceAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
 
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
 
             dt = special_dtype(ref=Reference)
@@ -257,7 +252,7 @@ def testCreateReferenceAttribute(self):
             item = db.getAttribute(root_id, "A1")
             attr = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in attr)
-            
+
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
@@ -268,14 +263,14 @@ def testCreateReferenceAttribute(self):
     def testCreateVlenReferenceAttribute(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)                
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
             db.createHardLink(root_id, "DS1", dset_id)
             grp_id = db.createGroup()
             db.createHardLink(root_id, "G1", grp_id)
 
             dt_base = special_dtype(ref=Reference)
             dt = special_dtype(vlen=dt_base)
-             
+
             ds1_ref = "datasets/" + dset_id
             grp_ref = "groups/" + grp_id
             ref_arr = np.zeros((2,), dtype=dt_base)
@@ -283,7 +278,7 @@ def testCreateVlenReferenceAttribute(self):
             ref_arr[1] = grp_ref
             vlen_arr = np.zeros((), dtype=dt)
             vlen_arr[()] = ref_arr
-             
+
             db.createAttribute(root_id, "A1", vlen_arr)
             item = db.getAttribute(root_id, "A1")
 
@@ -296,13 +291,12 @@ def testCreateVlenReferenceAttribute(self):
 
             item_shape = item["shape"]
             self.assertEqual(item_shape["class"], "H5S_SCALAR")
-            
 
     def testCommittedType(self):
         with Hdf5db(app_logger=self.log) as db:
             root_id = db.getObjectIdByPath("/")
             dt = np.dtype("S15")
-             
+
             ctype_id = db.createCommittedType(dt)
             db.createHardLink(root_id, "ctype", ctype_id)
             item = db.getObjectById(ctype_id)
@@ -360,7 +354,7 @@ def testCommittedCompoundType(self):
 
             attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-            
+
             value = db.getAttributeValue(root_id, "A1")
             self.assertTrue(isinstance(value, np.ndarray))
 
@@ -382,14 +376,13 @@ def testSimpleDataset(self):
             self.assertEqual(arr.max(), 0)
             row = np.zeros((ncols,), dtype=dtype)
             for i in range(nrows):
-                row[:] = list(range(i*10, (i + 1)*10))
+                row[:] = list(range(i * 10, (i + 1) * 10))
                 row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
                 db.setDatasetValues(dset_id, row_sel, row)
             arr = db.getDatasetValues(dset_id, sel_all)
             for i in range(nrows):
-                row = np.array(list(range(i*10, (i + 1)*10)), dtype=dtype)
-                np.testing.assert_array_equal(arr[i, :],  row)
-            
+                row = np.array(list(range(i * 10, (i + 1) * 10)), dtype=dtype)
+                np.testing.assert_array_equal(arr[i, :], row)
 
     def testScalarDataset(self):
         dtype = np.int32
@@ -416,35 +409,25 @@ def testResizableDataset(self):
             ncols = 10
             shape = (nrows, ncols)
             dtype = np.int32
-            maxdims = (None, ncols*2)
+            maxdims = (None, ncols * 2)
             root_id = db.getObjectIdByPath("/")
             dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
             db.createHardLink(root_id, "dset", dset_id)
             db.createAttribute(dset_id, "a1", "Hello, world")
-            
+
             # resize limited dimension
-            db.resizeDataset(dset_id, (nrows, ncols*2))
+            db.resizeDataset(dset_id, (nrows, ncols * 2))
 
             # try to go beyond max extent
             try:
-                db.resizeDataset(dset_id, (nrows, ncols*3))
+                db.resizeDataset(dset_id, (nrows, ncols * 3))
                 self.assertTrue(False)
             except ValueError:
                 pass  # expected
 
             # resize unlimited dimension
-            db.resizeDataset(dset_id, (nrows*10, ncols))
-
-
-            
-
-            
-
-
-
-
+            db.resizeDataset(dset_id, (nrows * 10, ncols))
 
-   
 
 if __name__ == "__main__":
     # setup test files
diff --git a/testall.py b/testall.py
index 8e5d041e..97a5efd4 100755
--- a/testall.py
+++ b/testall.py
@@ -15,7 +15,16 @@
 import shutil
 import h5py
 
-unit_tests = ("hdf5dtype_test", "hdf5db_test")
+unit_tests = (
+    "array_util_test",
+    "objid_test",
+    "hdf5dtype_test",
+    "hdf5db_test",
+    "h5json_reader_test",
+    "h5json_writer_test",
+    "h5py_reader_test",
+    "h5py_writer_test",
+    )
 integ_tests = ("h5tojson_test", "jsontoh5_test")
 
 # verify the hdf5 lib version is recent

From 398e2d3214e984c5c518106dcab6ed9c2dde479f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 12:08:27 +0200
Subject: [PATCH 021/129] fix for reference types

---
 src/h5json/h5tojson/h5tojson.py    | 15 +++++---
 src/h5json/hdf5db.py               | 40 ++++++++++----------
 src/h5json/jsontoh5/jsontoh5.py    | 11 ++++--
 src/h5json/objid.py                | 31 ++++++++++++---
 src/h5json/reader/h5json_reader.py | 34 ++++++++++++++---
 src/h5json/writer/h5py_writer.py   | 60 ++++++++++++++++--------------
 test/integ/jsontoh5_test.py        |  4 +-
 test/unit/array_util_test.py       | 25 ++++++-------
 test/unit/h5json_reader_test.py    | 10 +++++
 test/unit/h5py_writer_test.py      |  1 +
 test/unit/objid_test.py            | 14 ++++++-
 testall.py                         |  2 +-
 12 files changed, 164 insertions(+), 83 deletions(-)

diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index 48a4b83b..a2259dae 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -17,7 +17,7 @@
 from h5json import Hdf5db
 from h5json.writer.h5json_writer import H5JsonWriter
 from h5json.reader.h5py_reader import H5pyReader
- 
+
 
 def main():
     if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
@@ -31,7 +31,7 @@ def main():
             no_data = True
         else:
             filename = sys.argv[i]
-        
+
     # create logger
     log = logging.getLogger("h5tojson")
     # log.setLevel(logging.WARN)
@@ -48,9 +48,14 @@ def main():
     log.info(f"h5tojson {filename}")
 
     kwargs = {"app_logger": log}
-    
-    with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter(None, no_data=no_data, **kwargs), **kwargs) as db:
-        pass
+    reader = H5pyReader(filename, **kwargs)
+    writer = H5JsonWriter(None, no_data=no_data, **kwargs)
+    kwargs["h5_reader"] = reader
+    kwargs["h5_writer"] = writer
+
+    with Hdf5db(**kwargs) as db:
+        db.flush()
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 029c6645..4e9cd353 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -15,7 +15,7 @@
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import resize_dataset
-from .objid import createObjId, getCollectionForId
+from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId
 from . import selections
 from .apiversion import _apiver
 from .reader.h5reader import H5Reader
@@ -85,8 +85,6 @@ def reader(self, value: H5Reader):
         if self._reader:
             self._reader.close()
         self._reader = value
-        if self._reader:
-            self._reader.set_db(self)
 
     @property
     def writer(self):
@@ -145,17 +143,10 @@ def flush(self):
         if not self.writer:
             return  # nothing to do
 
-        obj_ids = self._new_objects.union(self._dirty_objects)
-
         if not self.writer.flush():
             # flush not successful, don't clear dirty set
             return
 
-        for obj_id in obj_ids:
-            obj_json = self._db[obj_id]
-            if "values" in obj_json:
-                obj_json["values"] = []
-
         # reset new and dirty sets
         self._new_objects = set()
         self._dirty_objects = set()
@@ -262,17 +253,26 @@ def getObjectByPath(self, path):
         obj_json = self.getObjectById(obj_id)
         return obj_json
 
-    def getDtype(self, obj_id):
-        """ Return numpy data type for given object id """
-        if obj_id not in self.db:
-            raise KeyError(f"{obj_id} not found")
-        obj_json = self.db[obj_id]
+    def getDtype(self, obj_json):
+        """ Return numpy data type for given object id
+        """
+
         if "type" not in obj_json:
             # group id?
-            raise TypeError(f"{obj_id} does not have a datatype")
-        type_json = obj_json["type"]
+            raise TypeError(f"{obj_json} does not have a datatype")
+        type_item = obj_json["type"]
+        if isValidUuid(type_item) and getCollectionForId(type_item) == "datatypes":
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if ctype_json is None:
+                raise KeyError(f"ctype: {ctype_id} not found")
+
+            type_json = ctype_json["type"].copy()
+            type_json["id"] = ctype_id
+            dtype = createDataType(type_json)
+        else:
+            dtype = createDataType(type_item)
 
-        dtype = createDataType(type_json)
         return dtype
 
     def getAttribute(self, obj_id, name, includeData=True):
@@ -323,7 +323,7 @@ def getAttributeValue(self, obj_id, name):
             dims = ()
         else:
             dims = shape_json["dims"]
-        dtype = createDataType(attr_json["type"])
+        dtype = self.getDtype(attr_json)
 
         value = attr_json["value"]
         arr = jsonToArray(dims, dtype, value)
@@ -465,7 +465,7 @@ def getDatasetValues(self, dset_id, sel):
                 raise ValueError("Selection shape does not match dataset shape")
             rank = len(dims)
 
-        dtype = self.getDtype(dset_id)
+        dtype = self.getDtype(dset_json)
         if self.reader:
             arr = self.reader.getDatasetValues(dset_id, sel)
         else:
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index bd1455e8..cec39c0c 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -52,11 +52,16 @@ def main():
     log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")
 
     kwargs = {"app_logger": log}
-    
-    with Hdf5db(h5_reader=H5JsonReader(json_filename, **kwargs), h5_writer=H5pyWriter(hdf5_filename, no_data=no_data, **kwargs), **kwargs) as db:
-        pass
 
+    h5_reader=H5JsonReader(json_filename, **kwargs)
+    h5_writer=H5pyWriter(hdf5_filename, no_data=no_data, **kwargs)
+    kwargs["h5_reader"] = h5_reader
+    kwargs["h5_writer"] = h5_writer
 
+    
+    with Hdf5db(**kwargs) as db:
+        db.flush()
+    
 
 if __name__ == "__main__":
     main()
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index a5453641..8d1e998e 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -119,11 +119,11 @@ def getCollectionForId(obj_id):
         raise ValueError("invalid object id")
 
     collection = None
-    if obj_id.startswith("g-"):
+    if obj_id.startswith("g-") or obj_id.startswith("groups/"):
         collection = "groups"
-    elif obj_id.startswith("d-"):
+    elif obj_id.startswith("d-") or obj_id.startswith("datasets/"):
         collection = "datasets"
-    elif obj_id.startswith("t-"):
+    elif obj_id.startswith("t-") or obj_id.startswith("datatypes"):
         collection = "datatypes"
     else:
         raise ValueError(f"{obj_id} not a collection id")
@@ -399,6 +399,21 @@ def validateUuid(id, obj_class=None):
         # e.g.: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
         if id[:5].isalnum() and id[5] == '-':
             id = id[6:]  # trim off the hash tag
+
+        # for id's like "datasets/abced...", trim the collection name and add collection
+        # prefix to the id if not already present
+        if id.find('/') > 0:
+            parts = id.split('/')
+            if len(parts) > 2:
+                raise ValueError(f"obj_id: {id} not valid (too many slash chars)")
+            collection = parts[0]
+            if getCollectionForId(id) != collection:
+                raise ValueError(f"obj_id: {id} invalid collection")
+            id = parts[1]
+            if len(id) == UUID_LEN:
+                # prefix with the one char collection code
+                id = _getPrefixForCollection(collection) + '-' + id
+
         # validate prefix
         if id[0] not in ("g", "d", "t", "c"):
             raise ValueError("Unexpected prefix")
@@ -476,7 +491,13 @@ def isObjId(id):
 
 def getUuidFromId(id):
     """strip off the type prefix ('g-' or 'd-', or 't-')
-    and return the uuid part"""
+    and return the uuid part """
+    if id.find('/') > 0:
+        # remove a collection name prefix if present
+        parts = id.split('/')
+        if len(parts) > 2:
+            raise ValueError(f"Unexpected obj_id: {id}")
+        id = parts[1]
     if len(id) == UUID_LEN:
         # just a uuid
         return id
@@ -494,4 +515,4 @@ def stripId(obj_id):
     if len(obj_id) == UUID_LEN + 2:
         return obj_id[2:]
     else:
-        raise ValueError("unexpected obj_id: {obj_id}")
+        raise ValueError(f"unexpected obj_id: {obj_id}")
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
index 6666587c..861f4d4f 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/reader/h5json_reader.py
@@ -12,7 +12,7 @@
 import json
 import logging
 
-from ..objid import getCollectionForId, stripId
+from ..objid import getCollectionForId, stripId, getUuidFromId
 
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray
@@ -34,6 +34,7 @@ def __init__(
             self.log = app_logger
         else:
             self.log = logging.getLogger()
+
         super().__init__(filepath, app_logger=app_logger)
 
         with open(filepath) as f:
@@ -55,11 +56,11 @@ def get_root_id(self):
         """ Return root id """
         return self._root_id
 
-    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_values=False):
         """ return object with given id """
         collection = getCollectionForId(obj_id)
         if collection not in self._h5json:
-            self.log.warning(f"getObjectBId - collection: {collection} not found")
+            self.log.warning(f"getObjectById - collection: {collection} not found")
             return None
         json_objs = self._h5json[collection]
         obj_uuid = stripId(obj_id)
@@ -125,6 +126,9 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
                 links[title] = item
             resp["links"] = links
 
+        if include_values and collection == "datasets" and "value" in json_obj:
+            resp["value"] = json_obj["value"]
+
         return resp
 
     def getAttribute(self, obj_id, name, includeData=True):
@@ -145,6 +149,22 @@ def getAttribute(self, obj_id, name, includeData=True):
             return None
         return attributes[name]
 
+    def getDtype(self, obj_json):
+        """ Return the dtype for the type given by obj_json """
+        if "type" not in obj_json:
+            raise KeyError("no type item found")
+        type_item = obj_json["type"]
+        if isinstance(type_item, str) and type_item.startswith("datatypes/"):
+            # this is a reference to a committed type
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if "type" not in ctype_json:
+                raise KeyError(f"Unexpected datatype: {ctype_json}")
+            # Use the ctype's item json
+            type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        return dtype
+
     def getDatasetValues(self, obj_id, sel=None):
         """
         Get values from dataset identified by obj_id.
@@ -153,10 +173,13 @@ def getDatasetValues(self, obj_id, sel=None):
         """
 
         self.log.debug(f"getDatasetValues({obj_id}), sel={sel}")
-        json_obj = self.getObjectById(obj_id)
+        json_obj = self.getObjectById(obj_id, include_values=True)
         if json_obj is None:
+            print("no json_obj")
             return None
+
         if "value" not in json_obj:
+            print("no json value")
             self.log.warning("value key not found for {obj_id}")
             return None
         json_value = json_obj["value"]
@@ -169,8 +192,7 @@ def getDatasetValues(self, obj_id, sel=None):
         else:
             dims = shape_json["dims"]
 
-        type_item = json_obj["type"]
-        dtype = createDataType(type_item)
+        dtype = self.getDtype(json_obj)
         arr = jsonToArray(dims, dtype, json_value)
         if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
             pass  # just return the entire array
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index 68a1f147..c2f44351 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -12,10 +12,11 @@
 import h5py
 import numpy as np
 
-from ..objid import getCollectionForId, isValidUuid
+from ..objid import getCollectionForId, isValidUuid, getUuidFromId, isObjId
 from ..hdf5dtype import createDataType
 from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
 from ..array_util import jsonToArray
+from .. import selections
 from .. import filters
 from .h5writer import H5Writer
 
@@ -33,13 +34,11 @@ def __init__(
         app_logger=None
     ):
         super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
-
+        self._id_map = {}
         if append:
-            self._mode = "a"
+            self._init = False
         else:
-            self._mode = "w"
-
-        self._id_map = {}
+            self._init = True
 
     def _copy_element(self, val, src_dt, tgt_dt, fout=None):
         """ convert the given dataset or attribute element to h5py equivalent """
@@ -147,8 +146,8 @@ def _createGroup(self, parent, grp_json, name=None):
     def _createDataset(self, parent, dset_json, name=None):
         """ create a dataset object """
 
-        type_item = dset_json["type"]
-        dtype = createDataType(type_item)
+        dtype = self.db.getDtype(dset_json)
+
         kwargs = {"dtype": dtype}
         shape_json = dset_json["shape"]
         shape_class = shape_json["class"]
@@ -279,17 +278,6 @@ def _createObjects(self, parent, links_json, visited=set()):
                 self.log.warning("unable to create user-defined link: {title}")
             elif link_class == "H5L_TYPE_HARD":
                 tgt_id = link_json["id"]
-                """
-                if tgt_id in visited:
-                    # we've already processed this object
-                    if title not in parent:
-                        if tgt_id in self._id_map:
-                            tgt_obj = self._id_map[tgt_id]
-                            parent[title] = tgt_obj
-                    else:
-                        self.log.warning("h5py_writer - expected to find {tgt_id} in id_map")
-                    continue
-                """
 
                 collection = getCollectionForId(tgt_id)
 
@@ -307,6 +295,7 @@ def _createObjects(self, parent, links_json, visited=set()):
                         visited.add(tgt_id)
                         self._createObjects(tgt_obj, grp_links, visited=visited)
                 else:
+                    # need to create tgt_id object
                     parent_path = parent.name
                     if parent_path[-1] != '/':
                         parent_path += '/'
@@ -346,10 +335,20 @@ def updateDatasetValues(self, dset_id, dset):
             dset[slices] = val
             self.log.debug(f"h5py_writer dset {dset.name} updated")
 
+    def initializeDatasetValues(self, dset_id, dset):
+        """ write all dataset values """
+
+        if dset.shape is None:
+            return  # null space dataset
+
+        sel_all = selections.select(dset.shape, ...)
+        arr = self.db.getDatasetValues(dset_id, sel_all)
+        dset[...] = arr
+
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
 
-        src_dt = createDataType(attr_json["type"])
+        src_dt = self.db.getDtype(attr_json)
 
         # handle special case of null space attribute here
         shape_json = attr_json["shape"]
@@ -363,6 +362,8 @@ def createAttribute(self, obj, name, attr_json):
         else:
             dims = shape_json["dims"]
         src_arr = jsonToArray(dims, src_dt, attr_json["value"])
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError("Unexpected type for src_arr")
         tgt_arr = self._copy_array(src_arr, fout=obj.file)
         obj.attrs[name] = tgt_arr
 
@@ -385,25 +386,30 @@ def flush(self):
         if not self.db:
             # no db set yet
             return False
-
         self.log.info("h5py_writer.flush()")
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
-        with h5py.File(self._filepath, mode=self._mode) as f:
-            if self.db.new_objects:
+        mode = 'w' if self._init else 'a'
+        with h5py.File(self._filepath, mode=mode) as f:
+            if self.db.new_objects or self._init:
                 root_json = self.db.getObjectById(root_id)
                 if "links" in root_json:
                     root_links = root_json["links"]
-                    self._createObjects(f, root_links, visited=set(root_id))
+                    self._createObjects(f, root_links, visited=set((root_id,)))
             # update attributes, dataset values
             for obj_id in self._id_map:
-                if self.db.is_dirty(obj_id):
+                if self.db.is_dirty(obj_id) or self._init:
                     h5path = self._id_map[obj_id]
                     obj = f[h5path]
                     self.updateAttributes(obj_id, obj)
-                    self.updateDatasetValues(obj_id, obj)
+                    collection = getCollectionForId(obj_id)
+                    if collection == "datasets":
+                        if self._init:
+                            self.initializeDatasetValues(obj_id, obj)
+                        else:
+                            self.updateDatasetValues(obj_id, obj)
 
-        self._mode = "a"  # use append mode for future updates
+        self._init = False  # done with init after first flush
         return True  # all objects written successfully
 
     def close(self):
diff --git a/test/integ/jsontoh5_test.py b/test/integ/jsontoh5_test.py
index dad5648d..3be3a3b7 100644
--- a/test/integ/jsontoh5_test.py
+++ b/test/integ/jsontoh5_test.py
@@ -36,7 +36,7 @@
     # "compound_array.json",
     # "compound_array_attr.json",
     # "compound_array_dset.json",
-    "compound_array_vlen_string.json",
+    # "compound_array_vlen_string.json",  # regression
     "compound_attr.json",
     "compound_committed.json",
     "dim_scale.json",
@@ -95,7 +95,7 @@
     "regionref_attr.json",
     # "regionref_dset.json",
     "scalar_attr.json",
-    "vlen_attr.json",
+    # "vlen_attr.json",   #regression
     "vlen_dset.json",
     "vlen_string_attr.json",
     "vlen_string_dset.json",
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index f68cbbc8..1b0b0f68 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -95,8 +95,8 @@ def testGetNumElements(self):
         self.assertEqual(nelements, 80)
 
     def testJsonToArray(self):
-       
-        # simple integer 
+
+        # simple integer
 
         dt = np.dtype("i4")
         shape = [4, ]
@@ -113,7 +113,7 @@ def testJsonToArray(self):
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, ())
-        self.assertEqual(out[()], 42)  
+        self.assertEqual(out[()], 42)
 
         # VLEN Scalar str
         dt = special_dtype(vlen=str)
@@ -167,7 +167,6 @@ def testJsonToArray(self):
         self.assertEqual(out[0], tuple(data[0]))
         self.assertEqual(out[1], tuple(data[1]))
 
-        
         # VLEN unicode
         dt = special_dtype(vlen=bytes)
         data = ["one", "two", "three", "four", "five"]
@@ -177,7 +176,7 @@ def testJsonToArray(self):
         self.assertTrue("vlen" in out.dtype.metadata)
         self.assertEqual(out.dtype.metadata["vlen"], bytes)
         self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out[2], "three")  
+        self.assertEqual(out[2], "three")
 
         # test ascii chars >127
         dt = np.dtype("S26")
@@ -188,7 +187,7 @@ def testJsonToArray(self):
             self.assertTrue(False)
         except ValueError:
             pass  # expected
-          
+
         dt = special_dtype(vlen=str)
         out = jsonToArray(shape, dt, data)  # vlen str should be ok
         self.assertTrue(isinstance(out, np.ndarray))
@@ -299,7 +298,7 @@ def testJsonToArray(self):
         self.assertEqual(e1, (5, b"five"))
 
         # compound with VLEN element
-         
+
         dt_str = special_dtype(vlen=str)
         dt = np.dtype([("a", "i4"), ("b", dt_str)])
         shape = [1, ]
@@ -329,7 +328,7 @@ def testJsonToArray(self):
         self.assertEqual(out.shape, (1,))
         e0 = out[0].tolist()
         self.assertEqual(e0, (6, "six"))
-        
+
         # scalar compound
         shape = []
         data = [6, "six"]
@@ -337,7 +336,7 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, ())
         e0 = out[()].tolist()
-        self.assertEqual(e0, (6, "six")) 
+        self.assertEqual(e0, (6, "six"))
 
         # compound type with array field
         dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])
@@ -541,7 +540,7 @@ def testToBytes(self):
         #
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)         
+        arr = np.zeros((4,), dtype=dt)
         dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
         arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
@@ -568,7 +567,7 @@ def testToBytes(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-         
+
         dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
         arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
@@ -695,7 +694,7 @@ def testArrToBytesBase64(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-         
+
         dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
         arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
@@ -716,7 +715,7 @@ def testArrToBytesBase64(self):
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
-         
+
         dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
         arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index 06946f94..072afb16 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -11,8 +11,10 @@
 ##############################################################################
 import unittest
 import logging
+import numpy as np
 from h5json import Hdf5db
 from h5json.reader.h5json_reader import H5JsonReader
+from h5json import selections
 
 
 class H5pyReaderTest(unittest.TestCase):
@@ -62,6 +64,14 @@ def testSimple(self):
             dset_shape = dset_json["shape"]
             self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
             self.assertEqual(dset_shape["dims"], [10, 10])
+            sel_all = selections.select((10, 10), ...)
+            arr = db.getDatasetValues(dset111_id, sel_all)
+            self.assertTrue(isinstance(arr, np.ndarray))
+            self.assertEqual(arr.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    v = arr[i, j]
+                    self.assertEqual(v, i * j)
 
             # try adding an attribute
             db.createAttribute(dset111_id, "attr3", value=42)
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 38ea8bce..81d977db 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -154,6 +154,7 @@ def testScalarAttribute(self):
             root_id = db.getObjectIdByPath("/")
             dims = ()
             value = 42
+            print("test create attribute A1")
             db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
             item = db.getAttribute(root_id, "A1")
             shape_json = item["shape"]
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
index 1357c184..c135ae40 100755
--- a/test/unit/objid_test.py
+++ b/test/unit/objid_test.py
@@ -12,7 +12,7 @@
 import unittest
 
 from h5json.objid import isRootObjId, isValidUuid, validateUuid
-from h5json.objid import createObjId, getCollectionForId, stripId
+from h5json.objid import createObjId, getCollectionForId, stripId, getUuidFromId
 from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
 
 
@@ -192,6 +192,18 @@ def testSchema2Id(self):
             self.assertEqual(getObjId(s3key), oid)
             self.assertTrue(isS3ObjKey(s3key))
 
+    def testGetDataTypeId(self):
+        test_uuid = "9b652223-83f8-11e5-b028-3c15c2da029e"
+        test_ids = (
+            "datatypes/9b652223-83f8-11e5-b028-3c15c2da029e",
+            "datatypes/t-9b652223-83f8-11e5-b028-3c15c2da029e",
+            "t-9b652223-83f8-11e5-b028-3c15c2da029e"
+        )
+        for test_id in test_ids:
+            self.assertTrue(isValidUuid(test_id))
+            self.assertEqual(getCollectionForId(test_id), "datatypes")
+            self.assertEqual(getUuidFromId(test_id), test_uuid)
+
 
 if __name__ == "__main__":
     # setup test files
diff --git a/testall.py b/testall.py
index 97a5efd4..1b9d6cd8 100755
--- a/testall.py
+++ b/testall.py
@@ -24,7 +24,7 @@
     "h5json_writer_test",
     "h5py_reader_test",
     "h5py_writer_test",
-    )
+)
 integ_tests = ("h5tojson_test", "jsontoh5_test")
 
 # verify the hdf5 lib version is recent

From 9978c45fc607b5de56a6bfd0e3d8d4fb68eb8182 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 13:43:33 +0200
Subject: [PATCH 022/129] fix flake8 errors

---
 src/h5json/jsontoh5/jsontoh5.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index cec39c0c..fb58abb7 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -18,7 +18,6 @@
 from h5json.writer.h5py_writer import H5pyWriter
 from h5json.reader.h5json_reader import H5JsonReader
 
-    
 
 def main():
     if len(sys.argv) < 3 or sys.argv[1] in ("-h", "--help"):
@@ -35,7 +34,7 @@ def main():
             json_filename = sys.argv[i]
         else:
             hdf5_filename = sys.argv[i]
- 
+
     # create logger
     log = logging.getLogger("h5json")
     # log.setLevel(logging.WARN)
@@ -53,15 +52,14 @@ def main():
 
     kwargs = {"app_logger": log}
 
-    h5_reader=H5JsonReader(json_filename, **kwargs)
-    h5_writer=H5pyWriter(hdf5_filename, no_data=no_data, **kwargs)
+    h5_reader = H5JsonReader(json_filename, **kwargs)
+    h5_writer = H5pyWriter(hdf5_filename, no_data=no_data, **kwargs)
     kwargs["h5_reader"] = h5_reader
     kwargs["h5_writer"] = h5_writer
 
-    
     with Hdf5db(**kwargs) as db:
         db.flush()
-    
+
 
 if __name__ == "__main__":
     main()

From 436d92146e96ce1d367e4faeedaee13de3c43b7c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 14:23:39 +0200
Subject: [PATCH 023/129] fix flake8 error

---
 src/h5json/hdf5db.py               | 2 +-
 src/h5json/reader/h5json_reader.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 4e9cd353..3b4694bc 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -285,7 +285,7 @@ def getAttribute(self, obj_id, name, includeData=True):
         attrs = obj_json["attributes"]
 
         if name not in attrs:
-            msg = f"Attribute: [{name }] not found in object: {obj_id}"
+            msg = f"Attribute: [{name}] not found in object: {obj_id}"
             self.log.info(msg)
             return None
         if attrs[name] is None:
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
index 861f4d4f..606fe012 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/reader/h5json_reader.py
@@ -17,7 +17,7 @@
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray
 from .. import selections
-from ..h5reader import H5Reader
+from .h5reader import H5Reader
 
 
 class H5JsonReader(H5Reader):

From 51063f63276eb7e956e344c0c4d9f379b8b062ed Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 14:44:32 +0200
Subject: [PATCH 024/129] update testall script

---
 testall.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/testall.py b/testall.py
index 1b9d6cd8..fadd332a 100755
--- a/testall.py
+++ b/testall.py
@@ -37,6 +37,9 @@
     print(h5py.version.info)
     sys.exit("Need h5py version 3.0 or later")
 
+if not os.path.isdir("./out"):
+    os.makedirs("out")
+
 # Run all hdf5-json tests
 # Run this script before running any integ tests
 for file_name in unit_tests:
@@ -48,6 +51,13 @@
 os.remove("hdf5dbtest.log")
 
 os.chdir("test/integ")
+
+if not os.path.isdir("./h5_out"):
+    os.makedirs("h5_out")
+
+if not os.path.isdir("./json_out"):
+    os.makedirs("json_out")
+    
 for file_name in integ_tests:
     print(file_name)
     rc = os.system("python " + file_name + ".py")

From d14599a2a2890bf88f458b9e81d21c802fdafc97 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 14:46:52 +0200
Subject: [PATCH 025/129] fix flake8 error

---
 testall.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testall.py b/testall.py
index fadd332a..70da26b5 100755
--- a/testall.py
+++ b/testall.py
@@ -57,7 +57,7 @@
 
 if not os.path.isdir("./json_out"):
     os.makedirs("json_out")
-    
+
 for file_name in integ_tests:
     print(file_name)
     rc = os.system("python " + file_name + ".py")

From e4be33cbfbefc4009ea42b6e82813051d672cb43 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 14:56:45 +0200
Subject: [PATCH 026/129] make tmp dir in testall

---
 testall.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/testall.py b/testall.py
index 70da26b5..5ca1934c 100755
--- a/testall.py
+++ b/testall.py
@@ -37,8 +37,8 @@
     print(h5py.version.info)
     sys.exit("Need h5py version 3.0 or later")
 
-if not os.path.isdir("./out"):
-    os.makedirs("out")
+if not os.path.isdir("./test/unit/out"):
+    os.makedirs("test/unit/out")
 
 # Run all hdf5-json tests
 # Run this script before running any integ tests

From 8af6508038329cc8bb93a3f464104e94cf9c6925 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 15:42:22 +0200
Subject: [PATCH 027/129] fix for h5json writer on windows

---
 src/h5json/writer/h5json_writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index bdf59822..9b3931e5 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -265,7 +265,7 @@ def dumpFile(self):
 
         self.dumpDatatypes()
         indent = 4
-        ensure_ascii = False
+        ensure_ascii = True
         if self._filepath:
             with open('data.json', 'w', encoding='utf-8') as f:
                 json.dump(self.json, f, ensure_ascii=ensure_ascii, indent=indent)

From d519d8b66d3ccb12aaec93aa723ba0f654603317 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 2 Apr 2025 15:56:24 +0200
Subject: [PATCH 028/129] require python >= 3.9

---
 .github/workflows/ci.yml | 2 +-
 pyproject.toml           | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5d6e313a..4e1040ca 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     runs-on: ${{ matrix.os }}
 
     steps:
diff --git a/pyproject.toml b/pyproject.toml
index 4ea50247..b45d1203 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ classifiers = [
     "Topic :: Software Development :: Build Tools",
     "License :: OSI Approved :: BSD License",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -17,7 +16,7 @@ classifiers = [
 ]
 authors = [{ "name" = "The HDF Group", "email" = "help@hdfgroup.org" }]
 keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "h5py >= 3.10",
     "numpy >= 2.0; python_version>='3.9'",

From 4169d5c74bf1aece93fd581c1d174f78d14a88a5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 3 Apr 2025 13:47:47 +0200
Subject: [PATCH 029/129] remove redundant stripId function

---
 src/h5json/objid.py                | 10 ----------
 src/h5json/reader/h5json_reader.py |  4 ++--
 src/h5json/writer/h5json_writer.py | 14 +++++++-------
 test/unit/h5json_writer_test.py    |  1 -
 test/unit/h5py_reader_test.py      |  4 ----
 test/unit/h5py_writer_test.py      |  1 -
 test/unit/objid_test.py            |  4 ++--
 7 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index 8d1e998e..57b5316c 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -506,13 +506,3 @@ def getUuidFromId(id):
         return id[2:]
     else:
         raise ValueError(f"Unexpected obj_id: {id}")
-
-
-def stripId(obj_id):
-    """ return just the base id without any prefix (e.g. 'g-') """
-    if len(obj_id) == UUID_LEN:
-        return obj_id  # just return as is
-    if len(obj_id) == UUID_LEN + 2:
-        return obj_id[2:]
-    else:
-        raise ValueError(f"unexpected obj_id: {obj_id}")
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
index 606fe012..f4d6426e 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/reader/h5json_reader.py
@@ -12,7 +12,7 @@
 import json
 import logging
 
-from ..objid import getCollectionForId, stripId, getUuidFromId
+from ..objid import getCollectionForId, getUuidFromId
 
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray
@@ -63,7 +63,7 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_
             self.log.warning(f"getObjectById - collection: {collection} not found")
             return None
         json_objs = self._h5json[collection]
-        obj_uuid = stripId(obj_id)
+        obj_uuid = getUuidFromId(obj_id)
         if obj_uuid not in json_objs:
             self.log.warning(f"getObjectById - {obj_id} not found")
             return None
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py
index 9b3931e5..759f0aa2 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/writer/h5json_writer.py
@@ -13,7 +13,7 @@
 import json
 
 from .h5writer import H5Writer
-from ..objid import stripId, getCollectionForId
+from ..objid import getUuidFromId, getCollectionForId
 from ..array_util import bytesArrayToList
 from .. import selections
 
@@ -117,7 +117,7 @@ def dumpLink(self, obj_id, name):
         if "id" in item:
             tgt_id = item["id"]
             response["collection"] = getCollectionForId(tgt_id)
-            response["id"] = stripId(tgt_id)
+            response["id"] = getUuidFromId(tgt_id)
 
         for key in item:
             if key in ("id", "created", "modified"):
@@ -154,14 +154,14 @@ def dumpGroup(self, obj_id):
     def dumpGroups(self):
         groups = {}
         item = self.dumpGroup(self._root_uuid)
-        root_uuid = stripId(self._root_uuid)
+        root_uuid = getUuidFromId(self._root_uuid)
         groups[root_uuid] = item
         obj_ids = self.db.getCollection("groups")
         for obj_id in obj_ids:
             if obj_id == self._root_uuid:
                 continue
             item = self.dumpGroup(obj_id)
-            obj_uuid = stripId(obj_id)
+            obj_uuid = getUuidFromId(obj_id)
             groups[obj_uuid] = item
 
         self.json["groups"] = groups
@@ -220,7 +220,7 @@ def dumpDatasets(self):
             datasets = {}
             for obj_id in obj_ids:
                 item = self.dumpDataset(obj_id)
-                obj_uuid = stripId(obj_id)
+                obj_uuid = getUuidFromId(obj_id)
                 datasets[obj_uuid] = item
 
             self.json["datasets"] = datasets
@@ -244,7 +244,7 @@ def dumpDatatypes(self):
             datatypes = {}
             for obj_id in obj_ids:
                 item = self.dumpDatatype(obj_id)
-                obj_uuid = stripId(obj_id)
+                obj_uuid = getUuidFromId(obj_id)
                 datatypes[obj_uuid] = item
 
             self.json["datatypes"] = datatypes
@@ -255,7 +255,7 @@ def dumpFile(self):
         db_version_info = self.db.getVersionInfo()
 
         self.json["apiVersion"] = db_version_info["hdf5-json-version"]
-        self.json["root"] = stripId(self._root_uuid)
+        self.json["root"] = getUuidFromId(self._root_uuid)
 
         self.updateAliasList()  # create alias_db with obj_id to alias list dict
 
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index 608f627f..e68314d7 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -15,7 +15,6 @@
 import numpy as np
 from h5json import Hdf5db
 from h5json.writer.h5json_writer import H5JsonWriter
-from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index c8b14cb4..ef42a29d 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -10,12 +10,8 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import os
 
-import os.path as op
-import stat
 import logging
-import shutil
 from h5json import Hdf5db
 from h5json.reader.h5py_reader import H5pyReader
 
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 81d977db..8f343423 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -16,7 +16,6 @@
 import numpy as np
 from h5json import Hdf5db
 from h5json.writer.h5py_writer import H5pyWriter
-from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
index c135ae40..d74ec102 100755
--- a/test/unit/objid_test.py
+++ b/test/unit/objid_test.py
@@ -12,7 +12,7 @@
 import unittest
 
 from h5json.objid import isRootObjId, isValidUuid, validateUuid
-from h5json.objid import createObjId, getCollectionForId, stripId, getUuidFromId
+from h5json.objid import createObjId, getCollectionForId, getUuidFromId
 from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
 
 
@@ -134,7 +134,7 @@ def testGetCollection(self):
         self.assertEqual(getCollectionForId(group_id), "groups")
         self.assertEqual(getCollectionForId(dataset_id), "datasets")
         self.assertEqual(getCollectionForId(ctype_id), "datatypes")
-        self.assertEqual(stripId(group_id), "314d61b8-9954-11e6-a733-3c15c2da029e")
+        self.assertEqual(getUuidFromId(group_id), "314d61b8-9954-11e6-a733-3c15c2da029e")
         try:
             getCollectionForId(bad_id)
             self.assertTrue(False)

From 7840ca4f972f712aa761464203697713711e3028 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 3 Apr 2025 16:50:59 +0200
Subject: [PATCH 030/129] add test for incremental updates

---
 src/h5json/hdf5db.py             | 17 +++++++----
 src/h5json/writer/h5py_writer.py | 11 ++++++++
 test/unit/h5json_reader_test.py  |  4 ++-
 test/unit/h5py_writer_test.py    | 48 ++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 3b4694bc..c632d93c 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -82,9 +82,19 @@ def reader(self):
     @reader.setter
     def reader(self, value: H5Reader):
         """ set the reader """
+        if self._writer:
+            self.flush()
         if self._reader:
             self._reader.close()
+        root_id = value.get_root_id()
+        if not root_id:
+            raise ValueError(f"reader {type(value)} unable to return root_id")
+        group_json = value.getObjectById(root_id)
+        if not group_json:
+            raise ValueError(f"reader {type(value)} unable to return group json")
         self._reader = value
+        self._db[root_id] = group_json
+        self._root_id = root_id
 
     @property
     def writer(self):
@@ -411,15 +421,10 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
 
         obj_json = self.getObjectById(obj_id)
         attrs_json = obj_json["attributes"]
-        if name in attrs_json:
-            # replace, keep, created timestamp
-            created = attrs_json["created"]
-        else:
-            created = time.time()
         type_json = getTypeItem(dtype)
         # finally put it all together...
         attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
-        attr_json["created"] = created
+        attr_json["created"] = time.time()
 
         # slot into the obj_json["attrs"]
         attrs_json[name] = attr_json
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index c2f44351..ee7bc537 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -11,6 +11,7 @@
 ##############################################################################
 import h5py
 import numpy as np
+import time
 
 from ..objid import getCollectionForId, isValidUuid, getUuidFromId, isObjId
 from ..hdf5dtype import createDataType
@@ -39,6 +40,7 @@ def __init__(
             self._init = False
         else:
             self._init = True
+        self._flush_time = 0.0
 
     def _copy_element(self, val, src_dt, tgt_dt, fout=None):
         """ convert the given dataset or attribute element to h5py equivalent """
@@ -379,10 +381,14 @@ def updateAttributes(self, obj_id, obj):
         attrs = obj_json["attributes"]
         for name in attrs:
             attr_json = attrs[name]
+            if "created" in attr_json and attr_json["created"] < self._flush_time:
+                # ttribute should be saved already
+                continue
             self.createAttribute(obj, name, attr_json)
 
     def flush(self):
         """ Write dirty items """
+
         if not self.db:
             # no db set yet
             return False
@@ -393,6 +399,7 @@ def flush(self):
         with h5py.File(self._filepath, mode=mode) as f:
             if self.db.new_objects or self._init:
                 root_json = self.db.getObjectById(root_id)
+
                 if "links" in root_json:
                     root_links = root_json["links"]
                     self._createObjects(f, root_links, visited=set((root_id,)))
@@ -408,6 +415,10 @@ def flush(self):
                             self.initializeDatasetValues(obj_id, obj)
                         else:
                             self.updateDatasetValues(obj_id, obj)
+            # mark time write is complete
+            # updates before this time will not need to be written
+            # TBD: possible race condition with multithreading
+            self._flush_time = time.time()
 
         self._init = False  # done with init after first flush
         return True  # all objects written successfully
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index 072afb16..1c44e13c 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -39,7 +39,9 @@ def __init__(self, *args, **kwargs):
     def testSimple(self):
         filepath = "data/json/tall.json"
         kwargs = {"app_logger": self.log}
-        with Hdf5db(h5_reader=H5JsonReader(filepath, **kwargs), **kwargs) as db:
+        with Hdf5db(**kwargs) as db:
+            h5_reader = H5JsonReader(filepath, **kwargs)
+            db.reader = h5_reader
             root_id = db.getObjectIdByPath("/")
             root_json = db.getObjectById(root_id)
 
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 8f343423..8eaf8812 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -15,6 +15,7 @@
 import h5py
 import numpy as np
 from h5json import Hdf5db
+from h5json.reader.h5json_reader import H5JsonReader
 from h5json.writer.h5py_writer import H5pyWriter
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
@@ -473,6 +474,53 @@ def testCommittedCompoundType(self):
             sub_dt = t1.dtype["field_4"]
             self.assertEqual(sub_dt, h5py.special_dtype(vlen=str))
 
+    def testReaderWithUpdate(self):
+
+        file_in = "data/json/tall.json"
+        file_out = "test/unit/out/h5py_writer_test_testReaderWithUpdate.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.reader = H5JsonReader(file_in)
+            db.writer = H5pyWriter(file_out, no_data=False)
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+                dset111 = f["/g1/g1.1/dset1.1.1"]
+                self.assertEqual(len(dset111.attrs), 2)
+                
+            db.createAttribute(dset111_id, "attr3", "hello")
+            dset_json = db.getObjectById(dset111_id)
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+                dset111 = f["/g1/g1.1/dset1.1.1"]
+                self.assertEqual(len(dset111.attrs), 3)
+                self.assertEqual(dset111.attrs["attr3"], b"hello")
+
+            db.createAttribute(dset111_id, "attr3", "bye-bye")
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+                dset111 = f["/g1/g1.1/dset1.1.1"]
+                self.assertEqual(len(dset111.attrs), 3)
+                self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
+                g1 = f["g1"]
+                
+            # create a new link
+            g13_id = db.createGroup()
+            g1_id = db.getObjectIdByPath("/g1")
+            db.createHardLink(g1_id, "g1.3", g13_id)
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                g1 = f["g1"]
+                self.assertEqual(len(g1), 3)
+                self.assertTrue("g1.3" in g1)
+
 
 if __name__ == "__main__":
     # setup test files

From deb501f567cf7c8683838f7e79bdc68dd09866cb Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 3 Apr 2025 16:59:19 +0200
Subject: [PATCH 031/129] fix flake8 errors

---
 test/unit/h5py_writer_test.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 8eaf8812..7d129bd9 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -489,9 +489,8 @@ def testReaderWithUpdate(self):
                 self.assertTrue("/g1/g1.1/dset1.1.1" in f)
                 dset111 = f["/g1/g1.1/dset1.1.1"]
                 self.assertEqual(len(dset111.attrs), 2)
-                
+
             db.createAttribute(dset111_id, "attr3", "hello")
-            dset_json = db.getObjectById(dset111_id)
             db.flush()
 
             with h5py.File(file_out) as f:
@@ -509,7 +508,7 @@ def testReaderWithUpdate(self):
                 self.assertEqual(len(dset111.attrs), 3)
                 self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
                 g1 = f["g1"]
-                
+
             # create a new link
             g13_id = db.createGroup()
             g1_id = db.getObjectIdByPath("/g1")

From 1bf10b14fbb6f4baf0c12b315b70fabc980366e5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 3 Apr 2025 18:43:53 +0200
Subject: [PATCH 032/129] added dset writes to h5py_writer test

---
 test/unit/h5py_writer_test.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 7d129bd9..a103873b 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -482,7 +482,6 @@ def testReaderWithUpdate(self):
         with Hdf5db(app_logger=self.log) as db:
             db.reader = H5JsonReader(file_in)
             db.writer = H5pyWriter(file_out, no_data=False)
-            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
             db.flush()
 
             with h5py.File(file_out) as f:
@@ -490,6 +489,7 @@ def testReaderWithUpdate(self):
                 dset111 = f["/g1/g1.1/dset1.1.1"]
                 self.assertEqual(len(dset111.attrs), 2)
 
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
             db.createAttribute(dset111_id, "attr3", "hello")
             db.flush()
 
@@ -509,7 +509,7 @@ def testReaderWithUpdate(self):
                 self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
                 g1 = f["g1"]
 
-            # create a new link
+            # create a new group
             g13_id = db.createGroup()
             g1_id = db.getObjectIdByPath("/g1")
             db.createHardLink(g1_id, "g1.3", g13_id)
@@ -520,6 +520,32 @@ def testReaderWithUpdate(self):
                 self.assertEqual(len(g1), 3)
                 self.assertTrue("g1.3" in g1)
 
+            # create a new dataset
+            dset_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+            db.createHardLink(g1_id, "DS1", dset_id)
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                g1 = f["g1"]
+                self.assertTrue("DS1" in g1)
+                ds1 = g1["DS1"]
+                self.assertEqual(ds1.shape, (10, 10))
+
+            arr = np.asarray(range(10), dtype=np.int32)
+            sel = selections.select((10, 10), (slice(5, 6), slice(0, 10)))
+            db.setDatasetValues(dset_id, sel, arr)
+            db.flush()
+
+            with h5py.File(file_out) as f:
+                ds1 = f["/g1/DS1"]
+                data = ds1[:, :]
+                for i in range(10):
+                    for j in range(10):
+                        if i == 5:
+                            self.assertEqual(data[i, j], j)
+                        else:
+                            self.assertEqual(data[i, j], 0)
+
 
 if __name__ == "__main__":
     # setup test files

From bfd6cdd693b14bbf64af2dd8bb42d3bdb60b6d46 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 10 Apr 2025 14:41:56 +0200
Subject: [PATCH 033/129] fix for array types

---
 src/h5json/array_util.py           |  66 +++++-----------
 src/h5json/hdf5dtype.py            |  10 +--
 src/h5json/reader/h5json_reader.py |   5 +-
 src/h5json/writer/h5py_writer.py   |   3 +-
 test/integ/h5tojson_test.py        |   6 +-
 test/unit/array_util_test.py       | 120 +++++++++++++++++++++++------
 test/unit/hdf5dtype_test.py        | 104 +++++++++++++++++++++++--
 7 files changed, 227 insertions(+), 87 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 1640d687..91b5e499 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -15,6 +15,8 @@
 import binascii
 import numpy as np
 
+from .hdf5dtype import isVlen
+
 MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
 
 
@@ -99,23 +101,6 @@ def getNumElements(dims):
     return num_elements
 
 
-def isVlen(dt):
-    """
-    Return True if the type contains variable length elements
-    """
-    is_vlen = False
-    if len(dt) > 1:
-        names = dt.names
-        for name in names:
-            if isVlen(dt[name]):
-                is_vlen = True
-                break
-    else:
-        if dt.metadata and "vlen" in dt.metadata:
-            is_vlen = True
-    return is_vlen
-
-
 def jsonToArray(data_shape, data_dtype, data_json):
     """
     Return numpy array from the given json array.
@@ -193,16 +178,16 @@ def getElementSize(e, dt):
             field_dt = dt[name]
             field_val = e[name]
             count += getElementSize(field_val, field_dt)
-    elif not dt.metadata or "vlen" not in dt.metadata:
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
         count = dt.itemsize  # fixed size element
     else:
         # variable length element
-        vlen = dt.metadata["vlen"]
+        vlen = dt.base.metadata["vlen"]
         if isinstance(e, int):
             if e == 0:
                 count = 4  # non-initialized element
             else:
-                raise ValueError("Unexpected value: {}".format(e))
+                raise ValueError(f"Unexpected value: {e}")
         elif isinstance(e, bytes):
             count = len(e) + 4
         elif isinstance(e, str):
@@ -226,6 +211,7 @@ def getElementSize(e, dt):
                 count = len(e) * vlen.itemsize + 4  # +4 for byte count
         else:
             raise TypeError("unexpected type: {}".format(type(e)))
+    # print("getElementSize returning:", count)
     return count
 
 
@@ -262,46 +248,40 @@ def copyElement(e, dt, buffer, offset):
     """
     Copy element to bytearray
     """
+
     # print(f"copyElement - dt: {dt}  offset: {offset}")
     if len(dt) > 1:
         for name in dt.names:
             field_dt = dt[name]
             field_val = e[name]
             offset = copyElement(field_val, field_dt, buffer, offset)
-    elif not dt.metadata or "vlen" not in dt.metadata:
-        # print(f"e vlen: {e} type: {type(e)} itemsize: {dt.itemsize}")
-        e_buf = e.tobytes()
-        # print("tobytes:", e_buf)
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
+        # print(f"no vlen: {e} type: {type(e)} e.dtype: {e.dtype} itemsize: {dt.itemsize}")
+        e_buf = np.asarray(e, dtype=dt).tobytes()
         if len(e_buf) < dt.itemsize:
             # extend the buffer for fixed size strings
-            # print("extending buffer")
             e_buf_ex = bytearray(dt.itemsize)
             for i in range(len(e_buf)):
                 e_buf_ex[i] = e_buf[i]
             e_buf = bytes(e_buf_ex)
 
-        # print("length:", len(e_buf))
         offset = copyBuffer(e_buf, buffer, offset)
     else:
         # variable length element
-        vlen = dt.metadata["vlen"]
-        # print("copyBuffer vlen:", vlen)
+        vlen = dt.base.metadata["vlen"]
         if isinstance(e, int):
-            # print("copyBuffer int")
             if e == 0:
                 # write 4-byte integer 0 to buffer
                 offset = copyBuffer(b"\x00\x00\x00\x00", buffer, offset)
             else:
                 raise ValueError("Unexpected value: {}".format(e))
         elif isinstance(e, bytes):
-            # print("copyBuffer bytes")
             count = np.int32(len(e))
             if count > MAX_VLEN_ELEMENT:
                 raise ValueError("vlen element too large")
             offset = copyBuffer(count.tobytes(), buffer, offset)
             offset = copyBuffer(e, buffer, offset)
         elif isinstance(e, str):
-            # print("copyBuffer, str")
             text = e.encode("utf-8")
             count = np.int32(len(text))
             if count > MAX_VLEN_ELEMENT:
@@ -311,18 +291,13 @@ def copyElement(e, dt, buffer, offset):
 
         elif isinstance(e, np.ndarray):
             nElements = math.prod(e.shape)
-            # print("copyBuffer ndarray, nElements:", nElements)
 
             if e.dtype.kind != "O":
                 count = np.int32(e.dtype.itemsize * nElements)
-                # print("copyBuffeer got vlen count:", count)
-                # print("copyBuffer e:", e)
                 if count > MAX_VLEN_ELEMENT:
                     raise ValueError("vlen element too large")
                 offset = copyBuffer(count.tobytes(), buffer, offset)
-                # print("copyBuffer write new count, offset:", offset)
                 offset = copyBuffer(e.tobytes(), buffer, offset)
-                # print("copyBuffer write data, offset:", offset)
             else:
                 arr1d = e.reshape((nElements,))
                 for item in arr1d:
@@ -340,7 +315,6 @@ def copyElement(e, dt, buffer, offset):
 
         else:
             raise TypeError("unexpected type: {}".format(type(e)))
-        # print("buffer: {}".format(buffer))
     return offset
 
 
@@ -385,12 +359,13 @@ def readElement(buffer, offset, arr, index, dt):
     Returns:
         int: The updated offset value after reading the element.
     """
+    # print("readElement, offset:", offset)
     if len(dt) > 1:
         e = arr[index]
         for name in dt.names:
             field_dt = dt[name]
             offset = readElement(buffer, offset, e, name, field_dt)
-    elif not dt.metadata or "vlen" not in dt.metadata:
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
         count = dt.itemsize
         n = offset
         m = offset + count
@@ -399,12 +374,13 @@ def readElement(buffer, offset, arr, index, dt):
         try:
             e = np.frombuffer(bytes(e_buffer), dtype=dt)
             arr[index] = e[0]
+
         except ValueError:
-            print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}")
+            # print(f"ValueError setting {e_buffer} and dtype: {dt}")
             raise
     else:
         # variable length element
-        vlenBaseType = dt.metadata["vlen"]
+        vlenBaseType = dt.base.metadata["vlen"]
         e = arr[index]
 
         if isinstance(e, np.ndarray):
@@ -474,6 +450,7 @@ def arrayToBytes(arr, encoding=None):
     """
     Return byte representation of numpy array
     """
+
     if isVlen(arr.dtype):
         nSize = getByteArraySize(arr)
         buffer = bytearray(nSize)
@@ -481,7 +458,6 @@ def arrayToBytes(arr, encoding=None):
         nElements = math.prod(arr.shape)
         arr1d = arr.reshape((nElements,))
         for e in arr1d:
-            # print("arrayToBytes:", e)
             offset = copyElement(e, arr1d.dtype, buffer, offset)
         data = bytes(buffer)
     else:
@@ -499,17 +475,17 @@ def bytesToArray(data, dt, shape, encoding=None):
     """
     if encoding:
         # decode the data
-        # will raise ValueError if non-decodeable
+        # will raise ValueError if non-decodable
         data = decodeData(data)
     if not isVlen(dt):
         # regular numpy from string
         arr = np.frombuffer(data, dtype=dt)
     else:
-        nelements = getNumElements(shape)
+        nElements = getNumElements(shape)
 
-        arr = np.zeros((nelements,), dtype=dt)
+        arr = np.zeros((nElements,), dtype=dt)
         offset = 0
-        for index in range(nelements):
+        for index in range(nElements):
             offset = readElement(data, offset, arr, index, dt)
     if shape is not None:
         arr = arr.reshape(shape)
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index cd3c6a45..bbef116d 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -316,7 +316,7 @@ def getTypeItem(dt, metadata=None):
         metadata = dt.metadata
 
     type_info = {}
-    if len(dt) > 1:
+    if len(dt):
         # compound type
         names = dt.names
         type_info["class"] = "H5T_COMPOUND"
@@ -494,14 +494,14 @@ def isVlen(dt):
     Return True if the type contains variable length elements
     """
     is_vlen = False
-    if len(dt) > 1:
+    if len(dt):
         names = dt.names
         for name in names:
             if isVlen(dt[name]):
                 is_vlen = True
                 break
     else:
-        if dt.metadata and "vlen" in dt.metadata:
+        if dt.base.metadata and "vlen" in dt.base.metadata:
             is_vlen = True
     return is_vlen
 
@@ -510,7 +510,7 @@ def isOpaqueDtype(dt):
     """
     Return True if this is an opaque dtype
     """
-    if dt.kind == "V" and len(dt) <= 1 and len(dt.shape) == 0 and not dt.names:
+    if dt.kind == "V" and len(dt) == 0 and len(dt.shape) == 0 and not dt.names:
         return True
     if dt.metadata and dt.metadata.get('h5py_opaque'):
         return True
@@ -626,7 +626,7 @@ def getDtypeItemSize(dtype):
         return the string "H5T_VARIABLE
     """
     item_size = 0
-    if len(dtype) > 0:
+    if len(dtype):
         # compound dtype
         for i in range(len(dtype)):
             sub_dt = dtype[i]
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/reader/h5json_reader.py
index f4d6426e..455b185c 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/reader/h5json_reader.py
@@ -175,12 +175,11 @@ def getDatasetValues(self, obj_id, sel=None):
         self.log.debug(f"getDatasetValues({obj_id}), sel={sel}")
         json_obj = self.getObjectById(obj_id, include_values=True)
         if json_obj is None:
-            print("no json_obj")
+            self.log.warning(f"no object found with id; {obj_id}")
             return None
 
         if "value" not in json_obj:
-            print("no json value")
-            self.log.warning("value key not found for {obj_id}")
+            self.log.warning(f"value key not found for {obj_id}")
             return None
         json_value = json_obj["value"]
         shape_json = json_obj["shape"]
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/writer/h5py_writer.py
index ee7bc537..2d281338 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/writer/h5py_writer.py
@@ -345,7 +345,8 @@ def initializeDatasetValues(self, dset_id, dset):
 
         sel_all = selections.select(dset.shape, ...)
         arr = self.db.getDatasetValues(dset_id, sel_all)
-        dset[...] = arr
+        if arr is not None:
+            dset[...] = arr
 
     def createAttribute(self, obj, name, attr_json):
         """ add the given attribute to obj """
diff --git a/test/integ/h5tojson_test.py b/test/integ/h5tojson_test.py
index 68b04642..5be40c84 100644
--- a/test/integ/h5tojson_test.py
+++ b/test/integ/h5tojson_test.py
@@ -35,7 +35,7 @@
     "compound.h5",
     "compound_array.h5",
     "compound_array_attr.h5",
-    # "compound_array_vlen_string.h5",  # crashes python w/ Linux!
+    "compound_array_vlen_string.h5",  # crashes python w/ Linux?
     "compound_array_dset.h5",
     "compound_attr.h5",
     "compound_committed.h5",
@@ -47,8 +47,8 @@
     "enum_attr.h5",
     "enum_dset.h5",
     "fillvalue.h5",
-    "fixed_string_attr.h5",  # temp for trying travis
-    "fixed_string_dset.h5",  # temp for trying travis
+    "fixed_string_attr.h5",
+    "fixed_string_dset.h5",
     "h5ex_d_alloc.h5",
     "h5ex_d_checksum.h5",
     "h5ex_d_chunk.h5",
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 1b0b0f68..cc2f63c3 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -26,6 +26,7 @@
 from h5json.array_util import ndarray_compare
 from h5json.array_util import getNumpyValue
 from h5json.array_util import getBroadcastShape
+from h5json.array_util import isVlen
 
 from h5json.hdf5dtype import special_dtype
 from h5json.hdf5dtype import check_dtype
@@ -378,6 +379,12 @@ def testToBytes(self):
         arr_copy = bytesToArray(buffer, dt, (4,))
         self.assertTrue(np.array_equal(arr, arr_copy))
 
+        # big-endian ints
+        dt = np.dtype(">u8")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
         # fixed length string
         dt = np.dtype("S8")
         arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
@@ -428,11 +435,11 @@ def testToBytes(self):
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
         # VLEN of int32's
-        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
+        dt = special_dtype(vlen=np.dtype("<i4"))
         arr = np.zeros((4,), dtype=dt)
         arr[0] = np.int32([1, ])
         arr[1] = np.int32([1, 2])
-        arr[2] = 0  # test un-intialized value
+        arr[2] = 0  # test un-initialized value
         arr[3] = np.int32([1, 2, 3])
         buffer = arrayToBytes(arr)
         self.assertEqual(len(buffer), 40)
@@ -442,7 +449,7 @@ def testToBytes(self):
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
         # VLEN of strings
-        dt = np.dtype("O", metadata={"vlen": str})
+        dt = special_dtype(vlen=str)
         arr = np.zeros((5,), dtype=dt)
         arr[0] = "one: \u4e00"
         arr[1] = "two: \u4e8c"
@@ -467,7 +474,7 @@ def testToBytes(self):
 
         self.assertTrue(ndarray_compare(arr, arr_copy))
         # VLEN of bytes
-        dt = np.dtype("O", metadata={"vlen": bytes})
+        dt = special_dtype(vlen=bytes)
         arr = np.zeros((5,), dtype=dt)
         arr[0] = b"Parting"
         arr[1] = b"is such"
@@ -494,7 +501,7 @@ def testToBytes(self):
         #
         # Compound str vlen
         #
-        dt_vstr = np.dtype("O", metadata={"vlen": str})
+        dt_vstr = special_dtype(vlen=str)
         dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
         arr = np.zeros((4,), dtype=dt)
         arr[0] = (42, "Hello", "X1")
@@ -515,7 +522,7 @@ def testToBytes(self):
         #
         # Compound int vlen
         #
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
         dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
         arr = np.zeros((4,), dtype=dt)
         arr[0] = (42, np.array((), dtype="int32"))
@@ -538,7 +545,8 @@ def testToBytes(self):
         #
         # VLEN utf string with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
         dt_str = special_dtype(vlen=str)
@@ -564,11 +572,11 @@ def testToBytes(self):
         #
         # VLEN ascii with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
 
-        dt_str = special_dtype(vlen=str)
         arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
         arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
         buffer = arrayToBytes(arr)
@@ -583,6 +591,69 @@ def testToBytes(self):
         arr_copy = bytesToArray(buffer, dt, (4,))
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
+        # test Compound with VLEN
+        count = 4
+        fixed_str8_type = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "length": 8,
+            "strPad": "H5T_STR_NULLPAD",
+        }
+        fields = [
+            {
+                "type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"},
+                "name": "VALUE1",
+            },
+            {
+                "type": fixed_str8_type,
+                "name": "VALUE2"
+            },
+            {
+                "type": {
+                    "class": "H5T_ARRAY",
+                    "dims": [2],
+                    "base": {
+                        "class": "H5T_STRING",
+                        "charSet": "H5T_CSET_ASCII",
+                        "strPad": "H5T_STR_NULLTERM",
+                        "length": "H5T_VARIABLE",
+                    },
+                },
+                "name": "VALUE3",
+            },
+        ]
+
+        datatype = {"class": "H5T_COMPOUND", "fields": fields}
+
+        dt = createDataType(datatype)
+        self.assertTrue(isVlen(dt))
+
+        # create numpy vlen array
+        arr = np.zeros((count,), dtype=dt)
+        for i in range(count):
+            e = arr[i]
+            e["VALUE1"] = i + 1
+            s = ""
+            for j in range(i + 5):
+                offset = (i + j) % 26
+                s += chr(ord("A") + offset)
+            e["VALUE2"] = s
+            e["VALUE3"] = [b"Hi! " * (i + 1), b"Bye!" * (i + 1)]
+
+        # converts to bytes
+        data = arrayToBytes(arr)
+        self.assertEqual(len(data), 192)  # will vary based on count
+
+        # convert back to array
+        arr_copy = bytesToArray(data, dt, (4,))
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+
     def testArrToBytesBase64(self):
         # Simple array
         dt = np.dtype("<i4")
@@ -617,11 +688,11 @@ def testArrToBytesBase64(self):
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
         # VLEN of int32's
-        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
+        dt = special_dtype(vlen=np.dtype("<i4"))
         arr = np.zeros((4,), dtype=dt)
         arr[0] = np.int32([1, ])
         arr[1] = np.int32([1, 2])
-        arr[2] = 0  # test un-intialized value
+        arr[2] = 0  # test un-initialized value
         arr[3] = np.int32([1, 2, 3])
         buffer = arrayToBytes(arr, encoding="base64")
 
@@ -630,7 +701,7 @@ def testArrToBytesBase64(self):
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
         # VLEN of strings
-        dt = np.dtype("O", metadata={"vlen": str})
+        dt = special_dtype(vlen=str)
         arr = np.zeros((5,), dtype=dt)
         arr[0] = "one: \u4e00"
         arr[1] = "two: \u4e8c"
@@ -643,7 +714,7 @@ def testArrToBytesBase64(self):
         arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
         self.assertTrue(ndarray_compare(arr, arr_copy))
         # VLEN of bytes
-        dt = np.dtype("O", metadata={"vlen": bytes})
+        dt = special_dtype(vlen=bytes)
         arr = np.zeros((5,), dtype=dt)
         arr[0] = b"Parting"
         arr[1] = b"is such"
@@ -660,7 +731,7 @@ def testArrToBytesBase64(self):
         #
         # Compound str vlen
         #
-        dt_vstr = np.dtype("O", metadata={"vlen": str})
+        dt_vstr = special_dtype(vlen=str)
         dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
         arr = np.zeros((4,), dtype=dt)
         arr[0] = (42, "Hello", "X1")
@@ -675,7 +746,7 @@ def testArrToBytesBase64(self):
         #
         # Compound int vlen
         #
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
         dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
         arr = np.zeros((4,), dtype=dt)
         arr[0] = (42, np.array((), dtype="int32"))
@@ -691,7 +762,8 @@ def testArrToBytesBase64(self):
         #
         # VLEN utf string with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
 
@@ -712,7 +784,8 @@ def testArrToBytesBase64(self):
         #
         # VLEN ascii with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         arr = np.zeros((4,), dtype=dt)
 
@@ -737,7 +810,7 @@ def testArrayCompareInt(self):
 
     def testArrayCompareVlenInt(self):
         # Vlen array
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
         dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
         arr1 = np.zeros((1024, 1024), dtype=dt)
         arr2 = np.zeros((1024, 1024), dtype=dt)
@@ -777,7 +850,6 @@ def array_equal(a, b):
                 if isinstance(b, str):
                     b = b.encode("utf8")
                 if a != b:
-                    print(f"{a} != {b}")
                     return False
 
             return True
@@ -818,7 +890,7 @@ def array_equal(a, b):
         #
         # Compound vlen
         #
-        dt_str = np.dtype("O", metadata={"vlen": str})
+        dt_str = special_dtype(vlen=str)
         dt = np.dtype([("x", "i4"), ("tag", dt_str)])
         shape = [4, ]
         data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
@@ -845,7 +917,8 @@ def array_equal(a, b):
         #
         # VLEN utf with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         shape = [4,]
         data = [
@@ -871,7 +944,8 @@ def array_equal(a, b):
         #
         # VLEN ascii with array type
         #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
         shape = [4,]
         data = [
@@ -934,7 +1008,7 @@ def testGetNumpyValue(self):
         self.assertEqual(val, b"hello")
 
         # test variable length string conversion
-        dt = np.dtype("O", metadata={"vlen": bytes})
+        dt = special_dtype(vlen=bytes)
         val = getNumpyValue("hello", dt=dt)
         self.assertTrue(isinstance(val, str))
         self.assertEqual(val, "hello")
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index 2f798378..fc0ffb44 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -19,6 +19,7 @@
 from h5json.hdf5dtype import Reference
 from h5json.hdf5dtype import RegionReference
 from h5json.hdf5dtype import isOpaqueDtype
+from h5json.hdf5dtype import isVlen
 
 
 class Hdf5dtypeTest(unittest.TestCase):
@@ -151,6 +152,8 @@ def testBaseArrayTypeItem(self):
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_ARRAY")
+        self.assertTrue("dims" in typeItem)
+        self.assertEqual(typeItem["dims"], (2, 2,))
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I32LE")
@@ -247,12 +250,12 @@ def testCompoundArrayVlenIntTypeItem(self):
         field_c_type = field_c["type"]
         self.assertEqual(field_c_type["class"], "H5T_ARRAY")
         self.assertEqual(field_c_type["dims"], (4,))
-        field_c_base_type = field_c_type["base"]
-        self.assertEqual(field_c_base_type["class"], "H5T_VLEN")
-        self.assertEqual(field_c_base_type["size"], "H5T_VARIABLE")
-        field_c_base_base_type = field_c_base_type["base"]
-        self.assertEqual(field_c_base_base_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_c_base_base_type["base"], "H5T_STD_I32LE")
+        field_c_type_base = field_c_type["base"]
+        self.assertEqual(field_c_type_base["class"], "H5T_VLEN")
+        self.assertEqual(field_c_type_base["size"], "H5T_VARIABLE")
+        field_c_type_base_base = field_c_type_base["base"]
+        self.assertEqual(field_c_type_base_base["class"], "H5T_INTEGER")
+        self.assertEqual(field_c_type_base_base["base"], "H5T_STD_I32LE")
 
     def testCompoundArrayVlenStringTypeItem(self):
         dt_vlen = special_dtype(vlen=bytes)
@@ -286,6 +289,32 @@ def testCompoundArrayVlenStringTypeItem(self):
         self.assertEqual(field_c_base_type["length"], "H5T_VARIABLE")
         self.assertEqual(field_c_base_type["charSet"], "H5T_CSET_ASCII")
 
+    def testCompoundArrayVlenStr(self):
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (3, 2)))
+        dt_compound = np.dtype([("VALUE1", "i4"), ("VALUE2", dt_arr_str)])
+        self.assertTrue(isVlen(dt_compound))
+        type_item = hdf5dtype.getTypeItem(dt_compound)
+        typeSize = hdf5dtype.getItemSize(type_item)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(type_item["class"], "H5T_COMPOUND")
+        fields = type_item["fields"]
+        field_a = fields[0]
+        self.assertEqual(field_a["name"], "VALUE1")
+        field_a_type = field_a["type"]
+        self.assertEqual(field_a_type["class"], "H5T_INTEGER")
+        self.assertEqual(field_a_type["base"], "H5T_STD_I32LE")
+
+        field_b = fields[1]
+        field_b_type = field_b["type"]
+
+        self.assertEqual(field_b_type["class"], "H5T_ARRAY")
+        self.assertEqual(field_b_type["dims"], (3, 2))
+        field_b_base_type = field_b_type["base"]
+        self.assertEqual(field_b_base_type["class"], "H5T_STRING")
+        self.assertEqual(field_b_base_type["length"], "H5T_VARIABLE")
+        self.assertEqual(field_b_base_type["charSet"], "H5T_CSET_UTF8")
+
     def testOpaqueTypeItem(self):
         dt = np.dtype("V200")
         self.assertTrue(isOpaqueDtype(dt))
@@ -348,7 +377,23 @@ def testCompoundTypeItem(self):
         self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
         self.assertEqual(typeSize, 10)
 
-    def testCompoundofCompoundTypeItem(self):
+    def testCompoundOnfFieldTypeItem(self):
+        dt = np.dtype([("temp", np.float32),])
+        typeItem = hdf5dtype.getTypeItem(dt)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
+        self.assertTrue("fields" in typeItem)
+        fields = typeItem["fields"]
+        self.assertEqual(len(fields), 1)
+        tempField = fields[0]
+        self.assertEqual(tempField["name"], "temp")
+        self.assertTrue("type" in tempField)
+        tempFieldType = tempField["type"]
+        self.assertEqual(tempFieldType["class"], "H5T_FLOAT")
+        self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
+        self.assertEqual(typeSize, 4)
+
+    def testCompoundOfCompoundTypeItem(self):
         dt1 = np.dtype([("x", np.float32), ("y", np.float32)])
         dt2 = np.dtype([("a", np.float32), ("b", np.float32), ("c", np.float32)])
         dt = np.dtype([("field1", dt1), ("field2", dt2)])
@@ -376,6 +421,7 @@ def testCreateBaseType(self):
         self.assertEqual(dt.name, "uint32")
         self.assertEqual(dt.byteorder, ">")
         self.assertEqual(dt.kind, "u")
+        self.assertFalse(isVlen(dt))
 
         dt = hdf5dtype.createDataType("H5T_STD_I16LE")
         self.assertEqual(dt.name, "int16")
@@ -384,10 +430,12 @@ def testCreateBaseType(self):
         dt = hdf5dtype.createDataType("H5T_IEEE_F64LE")
         self.assertEqual(dt.name, "float64")
         self.assertEqual(dt.kind, "f")
+        self.assertFalse(isVlen(dt))
 
         dt = hdf5dtype.createDataType("H5T_IEEE_F32LE")
         self.assertEqual(dt.name, "float32")
         self.assertEqual(dt.kind, "f")
+        self.assertFalse(isVlen(dt))
 
         typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I32BE"}
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -395,6 +443,7 @@ def testCreateBaseType(self):
         self.assertEqual(dt.name, "int32")
         self.assertEqual(dt.kind, "i")
         self.assertEqual(typeSize, 4)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBaseStringType(self):
         typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_ASCII", "length": 6}
@@ -403,6 +452,7 @@ def testCreateBaseStringType(self):
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")
         self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBaseUnicodeType(self):
         typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 6}
@@ -413,6 +463,7 @@ def testCreateBaseUnicodeType(self):
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")  # uses byte
         self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateNullTermStringType(self):
         typeItem = {
@@ -427,6 +478,7 @@ def testCreateNullTermStringType(self):
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")
         self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateVLenStringType(self):
         typeItem = {
@@ -440,6 +492,28 @@ def testCreateVLenStringType(self):
         self.assertEqual(dt.kind, "O")
         self.assertEqual(check_dtype(vlen=dt), bytes)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
+
+    def testCreateVLenStringArrayType(self):
+        typeItem = {
+            "class": "H5T_ARRAY",
+            "dims": (2, 2),
+            "base": {
+                "class": "H5T_STRING",
+                "charSet": "H5T_CSET_ASCII",
+                "length": "H5T_VARIABLE",
+            }
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.name, "void256")  # assuming 8-byte pointers
+        self.assertEqual(dt.kind, "V")
+        self.assertEqual(dt.shape, (2, 2))
+        self.assertEqual(check_dtype(vlen=dt), None)
+        self.assertEqual(check_dtype(vlen=dt.base), bytes)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(dt.base.kind, 'O')
+        self.assertTrue(isVlen(dt))
 
     def testCreateVLenUTF8Type(self):
         typeItem = {
@@ -453,6 +527,7 @@ def testCreateVLenUTF8Type(self):
         self.assertEqual(dt.kind, "O")
         self.assertEqual(check_dtype(vlen=dt), str)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
 
     def testCreateVLenDataType(self):
         typeItem = {"class": "H5T_VLEN", "base": "H5T_STD_I32BE"}
@@ -461,6 +536,7 @@ def testCreateVLenDataType(self):
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "object")
         self.assertEqual(dt.kind, "O")
+        self.assertTrue(isVlen(dt))
 
     def testCreateOpaqueType(self):
         typeItem = {"class": "H5T_OPAQUE", "size": 200}
@@ -469,6 +545,7 @@ def testCreateOpaqueType(self):
         self.assertEqual(dt.name, "void1600")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(typeSize, 200)
+        self.assertFalse(isVlen(dt))
 
     def testCreateEnumType(self):
         typeItem = {
@@ -488,6 +565,7 @@ def testCreateEnumType(self):
         self.assertEqual(mapping["LIQUID"], 1)
         self.assertEqual(mapping["GAS"], 2)
         self.assertEqual(mapping["PLASMA"], 3)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBoolType(self):
         typeItem = {
@@ -502,6 +580,7 @@ def testCreateBoolType(self):
         self.assertEqual(dt.name, "bool")
         self.assertEqual(dt.kind, "b")
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
 
     def testCreateReferenceType(self):
         typeItem = {
@@ -517,6 +596,7 @@ def testCreateReferenceType(self):
         self.assertEqual(dt.kind, "S")
         self.assertTrue(dt.metadata['ref'] is Reference)
         self.assertEqual(check_dtype(ref=dt), Reference)
+        self.assertFalse(isVlen(dt))
 
     def testCreateVlenReferenceType(self):
         typeItem = {
@@ -530,6 +610,7 @@ def testCreateVlenReferenceType(self):
         base = dt.metadata['vlen']
         self.assertTrue(base.metadata['ref'] is Reference)
         self.assertEqual(check_dtype(ref=base), Reference)
+        self.assertTrue(isVlen(dt))
 
     def testCreateCompoundType(self):
         typeItem = {
@@ -555,6 +636,7 @@ def testCreateCompoundType(self):
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 4)
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertTrue(isVlen(dt))
 
         dtLocation = dt[2]
         self.assertEqual(dtLocation.name, "object")
@@ -644,6 +726,7 @@ def testCreateCompoundOfCompoundType(self):
         self.assertEqual(dt.name, "void160")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 2)
+        self.assertFalse(isVlen(dt))
         dt_field1 = dt[0]
         self.assertEqual(dt_field1.name, "void64")
         self.assertEqual(dt_field1.kind, "V")
@@ -669,6 +752,7 @@ def testCreateCompoundTypeUnicodeFields(self):
         self.assertEqual(len(dt.fields), 3)
         self.assertEqual(typeSize, 10)
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
 
     def testCreateArrayType(self):
         typeItem = {"class": "H5T_ARRAY", "base": "H5T_STD_I64LE", "dims": (3, 5)}
@@ -676,8 +760,10 @@ def testCreateArrayType(self):
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "void960")
         self.assertEqual(dt.kind, "V")
+        self.assertEqual(dt.base.kind, "i")
         self.assertEqual(typeSize, 120)
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
 
     def testCreateCompoundArrayVlenType(self):
         typeItem = {
@@ -702,6 +788,7 @@ def testCreateCompoundArrayVlenType(self):
         self.assertEqual(dt.kind, "V")
         self.assertEqual(typeSize, "H5T_VARIABLE")
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertTrue(isVlen(dt))
         dt_arr = dt["VALUE3"]
         self.assertEqual(dt_arr.kind, "V")
         self.assertEqual(dt_arr.shape, (8,))
@@ -725,6 +812,7 @@ def testCreateVlenObjRefType(self):
         self.assertEqual(dt.name, "object")
         self.assertEqual(dt.kind, "O")
         self.assertTrue(check_dtype(ref=dt) is None)
+        self.assertTrue(isVlen(dt))
         dt_base = check_dtype(vlen=dt)
         self.assertTrue(dt_base is not None)
         self.assertTrue(check_dtype(ref=dt_base) is Reference)
@@ -756,6 +844,7 @@ def testCreateCompoundArrayType(self):
         self.assertTrue("b" in dt.fields.keys())
         self.assertEqual(typeSize, 11)
         self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
 
     def testCompoundArrayType(self):
         typeItem = {
@@ -787,6 +876,7 @@ def testCompoundArrayType(self):
         dt = hdf5dtype.createDataType(typeItem)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
         self.assertEqual(len(dt), 3)
         self.assertTrue("VALUE1" in dt.fields.keys())
         self.assertTrue("VALUE2" in dt.fields.keys())

From d1e2b3901908d277d2377590521bca83fe5b6157 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 16 Apr 2025 16:03:36 +0200
Subject: [PATCH 034/129] fix for scalar json to arr conversion

---
 src/h5json/array_util.py     |  6 +++--
 test/unit/array_util_test.py | 52 ++++++++++++++++++++++--------------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 91b5e499..73ec40cb 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -105,7 +105,9 @@ def jsonToArray(data_shape, data_dtype, data_json):
     """
     Return numpy array from the given json array.
     """
+    # print(f"jsonToArray: data_shape: {data_shape}, data_dtype: {data_dtype} data_json: {data_json}")
     def fillVlenArray(rank, data, arr, index):
+        # print(f"fillVlenArray rank: {rank} data: {data} arr: {arr} index: {index}")
         if arr.shape == ():
             arr[()] = data
         else:
@@ -134,8 +136,8 @@ def fillVlenArray(rank, data, arr, index):
 
     if type(data_json) in (list, tuple):
         converted_data = []
-        if npoints == 1:
-            converted_data = toTuple(np_shape_rank, data_json)
+        if np_shape_rank > 0 and npoints == 1 and len(data_json) == len(data_dtype):
+            converted_data.append(toTuple(0, data_json))
         else:
             converted_data = toTuple(np_shape_rank, data_json)
         data_json = converted_data
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index cc2f63c3..699a80bc 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -202,6 +202,11 @@ def testJsonToArray(self):
         except UnicodeEncodeError:
             pass  # expected
 
+        # UTF8 encode the data first
+        out = jsonToArray(shape, dt, data.encode('utf8'))
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode('utf8'))
+
         # VLEN data
         dt = special_dtype(vlen=np.dtype("int32"))
         shape = [4, ]
@@ -298,46 +303,53 @@ def testJsonToArray(self):
         e1 = out[1].tolist()
         self.assertEqual(e1, (5, b"five"))
 
-        # compound with VLEN element
-
-        dt_str = special_dtype(vlen=str)
-        dt = np.dtype([("a", "i4"), ("b", dt_str)])
-        shape = [1, ]
-        data = [[6, "six"],]
+        data = [6, "six"]
+        shape = [1,]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, (1,))
-        e0 = out[0]
+        self.assertTrue(isinstance(out[0], np.void))
+        e1 = out[0].tolist()
+        self.assertEqual(e1, (6, b"six"))
 
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (6, "six"))
+        data = [7, "seven"]
         shape = []
-        data = [6, "six",]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, ())
-        e0 = out[()]
-        self.assertEqual(len(e0), 2)
-        self.assertEqual(e0[0], 6)
-        self.assertEqual(e0[1], "six")
+        self.assertTrue(isinstance(out[()], np.void))
+        e1 = out[()].tolist()
+        self.assertEqual(e1, (7, b"seven"))
+
+        # compound with VLEN element
+
+        dt_str = special_dtype(vlen=str)
+        dt = np.dtype([("a", "i4"), ("b", dt_str)])
+        shape = [2, ]
+        data = [[4, "four"], [5, "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (2,))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (4, "four"))
 
-        # one element compound
         shape = [1, ]
-        data = [[6, "six"],]
+        data = [6, "six"]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, (1,))
         e0 = out[0].tolist()
         self.assertEqual(e0, (6, "six"))
 
-        # scalar compound
         shape = []
-        data = [6, "six"]
+        data = [7, "seven",]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, ())
-        e0 = out[()].tolist()
-        self.assertEqual(e0, (6, "six"))
+        e0 = out[()]
+        self.assertEqual(len(e0), 2)
+        self.assertEqual(e0[0], 7)
+        self.assertEqual(e0[1], "seven")
 
         # compound type with array field
         dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])

From c6d77f877957fc1c16714ada9433b1fa9909b038 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 17 Apr 2025 19:42:57 +0200
Subject: [PATCH 035/129] support jsontoarray for all byte strings

---
 src/h5json/array_util.py     | 10 ++++----
 test/unit/array_util_test.py | 44 +++++++++++++++++++++++++-----------
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 73ec40cb..bb416423 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -61,7 +61,7 @@ def bytesArrayToList(data):
     return out
 
 
-def toTuple(rank, data):
+def toTuple(rank, data, encoding=None):
     """
     Convert a list to a tuple, recursively.
     Example. [[1,2],[3,4]] -> ((1,2),(3,4))
@@ -72,6 +72,8 @@ def toTuple(rank, data):
         else:
             return tuple(toTuple(rank - 1, x) for x in data)
     else:
+        if encoding:
+            data = data.encode(encoding, "surrogateesacpe")
         return data
 
 
@@ -153,9 +155,9 @@ def fillVlenArray(rank, data, arr, index):
         try:
             arr = np.array(data_json, dtype=data_dtype)
         except UnicodeEncodeError:
-            # Unable to encode data
-            # TBD: look into using surrogate encoding here
-            raise
+            # Unable to encode data, encode as utf8 with surrogate escaping
+            data_json = toTuple(np_shape_rank, data_json, encoding="utf8")
+            arr = np.array(data_json, dtype=data_dtype)
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
     # numpy is ok with this
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 699a80bc..13692625 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -116,6 +116,21 @@ def testJsonToArray(self):
         self.assertEqual(out.shape, ())
         self.assertEqual(out[()], 42)
 
+        dt = np.dtype("S10")  # fixed size string
+        shape = [5, ]
+        data = ["parting", "is", "such", "sweet", "sorrow"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, ))
+        self.assertEqual(out[4], b'sorrow')
+
+        shape = ()  # scalar
+        data = "a string"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertEqual(out[()], b'a string')
+
         # VLEN Scalar str
         dt = special_dtype(vlen=str)
         data = "I'm a string!"
@@ -179,34 +194,37 @@ def testJsonToArray(self):
         self.assertEqual(out.dtype.kind, "O")
         self.assertEqual(out[2], "three")
 
-        # test ascii chars >127
+        # test utf8 strings
         dt = np.dtype("S26")
         shape = []
-        data = "extended ascii char 241: " + chr(241)
-        try:
-            jsonToArray(shape, dt, data)
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode("utf8"))
 
         dt = special_dtype(vlen=str)
-        out = jsonToArray(shape, dt, data)  # vlen str should be ok
+        out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out[()], data)
 
         dt = np.dtype("S12")
         data = "eight: \u516b"
-        try:
-            jsonToArray(shape, dt, data)
-            self.assertTrue(False)
-        except UnicodeEncodeError:
-            pass  # expected
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode("utf8"))
 
         # UTF8 encode the data first
         out = jsonToArray(shape, dt, data.encode('utf8'))
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out[()], data.encode('utf8'))
 
+        # one-element array
+        shape = [1,]
+        dt = np.dtype("S12")
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertEqual(out[0], b'eight: \xe5\x85\xab')
+
         # VLEN data
         dt = special_dtype(vlen=np.dtype("int32"))
         shape = [4, ]

From cb3419afa0989fe17dc816a56f669cdf76cad2d8 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 18 Apr 2025 22:53:55 +0200
Subject: [PATCH 036/129] fix errors in jsonToArray function

---
 src/h5json/array_util.py     | 47 +++++++--------------------
 test/unit/array_util_test.py | 63 +++++++++++++++++++++++++-----------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index bb416423..ed3ba979 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -107,19 +107,6 @@ def jsonToArray(data_shape, data_dtype, data_json):
     """
     Return numpy array from the given json array.
     """
-    # print(f"jsonToArray: data_shape: {data_shape}, data_dtype: {data_dtype} data_json: {data_json}")
-    def fillVlenArray(rank, data, arr, index):
-        # print(f"fillVlenArray rank: {rank} data: {data} arr: {arr} index: {index}")
-        if arr.shape == ():
-            arr[()] = data
-        else:
-            for i in range(len(data)):
-                if rank > 1:
-                    index = fillVlenArray(rank - 1, data[i], arr, index)
-                else:
-                    arr[index] = data[i]
-                    index += 1
-        return index
 
     if data_json is None:
         return np.array([]).astype(data_dtype)
@@ -131,33 +118,23 @@ def fillVlenArray(rank, data, arr, index):
     # need some special conversion for compound types --
     # each element must be a tuple, but the JSON decoder
     # gives us a list instead.
-    if len(data_dtype) > 1 and not isinstance(data_json, (list, tuple)):
+    if len(data_dtype) > 0 and not isinstance(data_json, (list, tuple)):
         raise TypeError("expected list data for compound data type")
     npoints = getNumElements(data_shape)
     np_shape_rank = len(data_shape)
 
     if type(data_json) in (list, tuple):
-        converted_data = []
-        if np_shape_rank > 0 and npoints == 1 and len(data_json) == len(data_dtype):
-            converted_data.append(toTuple(0, data_json))
-        else:
-            converted_data = toTuple(np_shape_rank, data_json)
-        data_json = converted_data
+        data_json = toTuple(np_shape_rank, data_json)
 
-    if isVlen(data_dtype):
-        if np_shape_rank == 0 and npoints == 1:
-            arr_shape = ()
-        else:
-            arr_shape = (npoints,)
-        arr = np.zeros(arr_shape, dtype=data_dtype)
-        fillVlenArray(np_shape_rank, data_json, arr, 0)
-    else:
-        try:
-            arr = np.array(data_json, dtype=data_dtype)
-        except UnicodeEncodeError:
-            # Unable to encode data, encode as utf8 with surrogate escaping
-            data_json = toTuple(np_shape_rank, data_json, encoding="utf8")
-            arr = np.array(data_json, dtype=data_dtype)
+    arr = np.zeros(data_shape, dtype=data_dtype)
+
+    try:
+        # arr = np.array(data_json, dtype=data_dtype)
+        arr[...] = data_json
+    except UnicodeEncodeError:
+        # Unable to encode data, encode as utf8 with surrogate escaping
+        data_json = toTuple(np_shape_rank, data_json, encoding="utf8")
+        arr[...] = data_json
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
     # numpy is ok with this
@@ -165,8 +142,6 @@ def fillVlenArray(rank, data, arr, index):
         msg = "Input data doesn't match selection number of elements"
         msg += f" Expected {npoints}, but received: {arr.size}"
         raise ValueError(msg)
-    if arr.shape != data_shape:
-        arr = arr.reshape(data_shape)  # reshape to match selection
 
     return arr
 
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 13692625..21a5849b 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -82,6 +82,27 @@ def testToTuple(self):
         out = toTuple(1, data3d)  # treat input a 1d array of compound type of compound types
         self.assertEqual([((0, 0.0), (1, 0.1)), ((2, 0.2), (3, 0.3))], out)
 
+    def testToTupleStrData(self):
+        data = "a string!"
+        out = toTuple(0, data)
+        self.assertEqual(data, out)
+
+        data = ["a string!"]
+        out = toTuple(1, data)
+        self.assertEqual(data, out)
+
+        data = ["a string2"]
+        out = toTuple(1, data)
+        self.assertEqual(data, out)
+
+        data = [["partA", "partB", "partC"],]
+        out = toTuple(1, data)
+        self.assertEqual([("partA", "partB", "partC"), ], out)
+
+        data = [[[4, 8, 12], "four"], [[5, 10, 15], "five"]]
+        out = toTuple(1, data)
+        self.assertEqual([((4, 8, 12), 'four'), ((5, 10, 15), 'five')], out)
+
     def testGetNumElements(self):
         shape = (4,)
         nelements = getNumElements(shape)
@@ -98,7 +119,6 @@ def testGetNumElements(self):
     def testJsonToArray(self):
 
         # simple integer
-
         dt = np.dtype("i4")
         shape = [4, ]
         data = [0, 2, 4, 6]
@@ -151,6 +171,14 @@ def testJsonToArray(self):
         val = out[0]
         self.assertEqual(val, data)
 
+        # VLEN multi element
+        shape = [5, ]
+        data = ["parting", "is", "such", "sweet", "sorrow"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, ))
+        self.assertEqual(out[4], 'sorrow')
+
         # VLEN ascii
         dt = special_dtype(vlen=bytes)
         data = [b"one", b"two", b"three", b"four", b"five"]
@@ -167,22 +195,6 @@ def testJsonToArray(self):
         self.assertEqual(out[2], b"three")
         self.assertEqual(out[3], b"four")
 
-        # VLEN str
-        dt = special_dtype(vlen=str)
-        data = [
-            [b"part 1 - section A", b"part 1 - section B"],
-            [b"part 2 - section A", b"part 2 - section B"],
-        ]
-        shape = [2,]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertTrue("vlen" in out.dtype.metadata)
-        self.assertEqual(out.dtype.metadata["vlen"], str)
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out.shape, (2,))
-        self.assertEqual(out[0], tuple(data[0]))
-        self.assertEqual(out[1], tuple(data[1]))
-
         # VLEN unicode
         dt = special_dtype(vlen=bytes)
         data = ["one", "two", "three", "four", "five"]
@@ -207,6 +219,12 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out[()], data)
 
+        data = ["I'm an UTF-8 null terminated string",]
+        shape = [1,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[0], data[0])
+
         dt = np.dtype("S12")
         data = "eight: \u516b"
         out = jsonToArray(shape, dt, data)
@@ -223,9 +241,16 @@ def testJsonToArray(self):
         dt = np.dtype("S12")
         data = "eight: \u516b"
         out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out[0], b'eight: \xe5\x85\xab')
 
         # VLEN data
+        shape = []
+        dt = special_dtype(vlen=np.dtype("S10"))
+        data = ["foo", "bar"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
         dt = special_dtype(vlen=np.dtype("int32"))
         shape = [4, ]
         data = [
@@ -321,7 +346,7 @@ def testJsonToArray(self):
         e1 = out[1].tolist()
         self.assertEqual(e1, (5, b"five"))
 
-        data = [6, "six"]
+        data = [[6, "six"],]
         shape = [1,]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
@@ -352,7 +377,7 @@ def testJsonToArray(self):
         self.assertEqual(e0, (4, "four"))
 
         shape = [1, ]
-        data = [6, "six"]
+        data = [[6, "six"],]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
         self.assertEqual(out.shape, (1,))

From 15133347da056714f6626aacdc761acd594817c9 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 21 Apr 2025 16:10:59 +0200
Subject: [PATCH 037/129] added extra jsonToArray test

---
 src/h5json/array_util.py     | 3 +++
 test/unit/array_util_test.py | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index ed3ba979..eed15af6 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -108,6 +108,8 @@ def jsonToArray(data_shape, data_dtype, data_json):
     Return numpy array from the given json array.
     """
 
+    print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
+
     if data_json is None:
         return np.array([]).astype(data_dtype)
 
@@ -141,6 +143,7 @@ def jsonToArray(data_shape, data_dtype, data_json):
     if arr.size != npoints:
         msg = "Input data doesn't match selection number of elements"
         msg += f" Expected {npoints}, but received: {arr.size}"
+        print(msg)
         raise ValueError(msg)
 
     return arr
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 21a5849b..fc8167bf 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -364,6 +364,15 @@ def testJsonToArray(self):
         e1 = out[()].tolist()
         self.assertEqual(e1, (7, b"seven"))
 
+        data = [8, "eight"],
+        shape = [1,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e1 = out[0].tolist()
+        self.assertEqual(e1, (8, b"eight"))
+
         # compound with VLEN element
 
         dt_str = special_dtype(vlen=str)

From 289bacbbd0bb4e66729dd60f2205ac2ee4b198d1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 21 Apr 2025 16:49:48 +0200
Subject: [PATCH 038/129] support setting single element compounds with a list

---
 src/h5json/array_util.py     | 7 ++++++-
 test/unit/array_util_test.py | 9 +++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index eed15af6..f68391cd 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -131,12 +131,17 @@ def jsonToArray(data_shape, data_dtype, data_json):
     arr = np.zeros(data_shape, dtype=data_dtype)
 
     try:
-        # arr = np.array(data_json, dtype=data_dtype)
         arr[...] = data_json
     except UnicodeEncodeError:
         # Unable to encode data, encode as utf8 with surrogate escaping
         data_json = toTuple(np_shape_rank, data_json, encoding="utf8")
         arr[...] = data_json
+    except ValueError:
+        if npoints == 1:
+            # try setting the first and only element
+            arr[0] = tuple(data_json)
+        else:
+            raise
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
     # numpy is ok with this
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index fc8167bf..e9b1acd1 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -373,6 +373,15 @@ def testJsonToArray(self):
         e1 = out[0].tolist()
         self.assertEqual(e1, (8, b"eight"))
 
+        dt = np.dtype([("a", "i4"), ("b", "f4")])
+        shape = [1, ]
+        data = [42, 0.42]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1, ))
+        e1 = out[0]
+        self.assertEqual(e1[0], 42)
+
         # compound with VLEN element
 
         dt_str = special_dtype(vlen=str)

From 135d88f7c0a1fd33ac01370339a94132aa04bc50 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 21 Apr 2025 18:23:46 +0200
Subject: [PATCH 039/129] handle assigning sequence to multi-dim array

---
 src/h5json/array_util.py     | 43 ++++++++++++++++++++++++------------
 test/unit/array_util_test.py | 30 +++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index f68391cd..87d24da6 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -110,6 +110,16 @@ def jsonToArray(data_shape, data_dtype, data_json):
 
     print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
 
+    def get_array(data, rank, dtype):
+        # helper function to create an array with encoding if needed
+        try:
+            arr = np.array(data, dtype=dtype)
+        except UnicodeEncodeError:
+            # Unable to encode data, encode as utf8 with surrogate escaping
+            data = toTuple(rank, data, encoding="utf8")
+            arr = np.array(data, dtype=dtype)
+        return arr
+
     if data_json is None:
         return np.array([]).astype(data_dtype)
 
@@ -127,21 +137,16 @@ def jsonToArray(data_shape, data_dtype, data_json):
 
     if type(data_json) in (list, tuple):
         data_json = toTuple(np_shape_rank, data_json)
+        print("data_json after toTuple:", data_json)
 
-    arr = np.zeros(data_shape, dtype=data_dtype)
-
-    try:
+    if isVlen(data_dtype):
+        # for vlen data we need to initialize of zero numpy array to ensure the right shape
+        arr = np.zeros(data_shape, dtype=data_dtype)
+        print("made vlen arr:", arr)
         arr[...] = data_json
-    except UnicodeEncodeError:
-        # Unable to encode data, encode as utf8 with surrogate escaping
-        data_json = toTuple(np_shape_rank, data_json, encoding="utf8")
-        arr[...] = data_json
-    except ValueError:
-        if npoints == 1:
-            # try setting the first and only element
-            arr[0] = tuple(data_json)
-        else:
-            raise
+    else:
+        arr = get_array(data_json, np_shape_rank, data_dtype)
+
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
     # numpy is ok with this
@@ -149,7 +154,17 @@ def jsonToArray(data_shape, data_dtype, data_json):
         msg = "Input data doesn't match selection number of elements"
         msg += f" Expected {npoints}, but received: {arr.size}"
         print(msg)
-        raise ValueError(msg)
+        # try adding an extra dimension to data_json
+        # for cases where e.g. compound types are not getting interpreted correctly
+        data_json = toTuple(np_shape_rank, [data_json, ])
+        arr = get_array(data_json, np_shape_rank, data_dtype)
+        if arr.size != npoints:
+            # still no good, raise error
+            raise ValueError(msg)
+
+    if arr.shape != tuple(data_shape):
+        print("reshaping to:", data_shape)
+        arr = arr.reshape(tuple(data_shape))
 
     return arr
 
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index e9b1acd1..b413d2e6 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -136,6 +136,36 @@ def testJsonToArray(self):
         self.assertEqual(out.shape, ())
         self.assertEqual(out[()], 42)
 
+        shape = (1, )  # one element
+        data = 42
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1, ))
+        self.assertEqual(out[0], 42)
+
+        shape = (10, )  # multi-1D
+        data = list(range(10))
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10, ))
+        self.assertEqual(out[5], 5)
+
+        shape = (5, 4)  # multi-2D
+        data = []
+        for i in range(5):
+            data.append([42, ] * 4)
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, 4))
+        self.assertEqual(out[2, 3], 42)
+
+        shape = (5, 4)  # multi-2D, reshape input data
+        data = [42, ] * 20
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, 4))
+        self.assertEqual(out[2, 3], 42)
+
         dt = np.dtype("S10")  # fixed size string
         shape = [5, ]
         data = ["parting", "is", "such", "sweet", "sorrow"]

From 13ea473a41f06da32343dc0d2cb401c54d2bdfb2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 21 Apr 2025 19:15:24 +0200
Subject: [PATCH 040/129] clean up debug print messages

---
 src/h5json/array_util.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 87d24da6..f47512fc 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -108,7 +108,7 @@ def jsonToArray(data_shape, data_dtype, data_json):
     Return numpy array from the given json array.
     """
 
-    print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
+    # print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
 
     def get_array(data, rank, dtype):
         # helper function to create an array with encoding if needed
@@ -137,12 +137,10 @@ def get_array(data, rank, dtype):
 
     if type(data_json) in (list, tuple):
         data_json = toTuple(np_shape_rank, data_json)
-        print("data_json after toTuple:", data_json)
 
     if isVlen(data_dtype):
         # for vlen data we need to initialize of zero numpy array to ensure the right shape
         arr = np.zeros(data_shape, dtype=data_dtype)
-        print("made vlen arr:", arr)
         arr[...] = data_json
     else:
         arr = get_array(data_json, np_shape_rank, data_dtype)
@@ -153,7 +151,6 @@ def get_array(data, rank, dtype):
     if arr.size != npoints:
         msg = "Input data doesn't match selection number of elements"
         msg += f" Expected {npoints}, but received: {arr.size}"
-        print(msg)
         # try adding an extra dimension to data_json
         # for cases where e.g. compound types are not getting interpreted correctly
         data_json = toTuple(np_shape_rank, [data_json, ])
@@ -163,7 +160,6 @@ def get_array(data, rank, dtype):
             raise ValueError(msg)
 
     if arr.shape != tuple(data_shape):
-        print("reshaping to:", data_shape)
         arr = arr.reshape(tuple(data_shape))
 
     return arr

From 3b87203523a65e2a3fca48b17e659f6ac20c748d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 9 May 2025 14:17:11 +0200
Subject: [PATCH 041/129] fix jsonToArray for single element compoound values

---
 src/h5json/array_util.py     | 9 ++++++++-
 test/unit/array_util_test.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index f47512fc..cb39cd55 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -143,7 +143,14 @@ def get_array(data, rank, dtype):
         arr = np.zeros(data_shape, dtype=data_dtype)
         arr[...] = data_json
     else:
-        arr = get_array(data_json, np_shape_rank, data_dtype)
+        try:
+            arr = get_array(data_json, np_shape_rank, data_dtype)
+        except ValueError:
+            if npoints <= 1 and isinstance(data_json, list):
+                # try converting data to a tuple
+                arr = get_array(tuple(data_json), np_shape_rank, data_dtype)
+            else:
+                raise
 
     # raise an exception of the array shape doesn't match the selection shape
     # allow if the array is a scalar and the selection shape is one element,
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index b413d2e6..1ede343d 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -394,7 +394,7 @@ def testJsonToArray(self):
         e1 = out[()].tolist()
         self.assertEqual(e1, (7, b"seven"))
 
-        data = [8, "eight"],
+        data = [8, "eight"]
         shape = [1,]
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))

From ef390ec8c663bfe07fa62dbbcdd80aef47ace16e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 30 May 2025 13:46:46 +0200
Subject: [PATCH 042/129] resturcture soruce tree

---
 pyproject.toml                                    | 4 ++--
 src/h5json/{reader => h5pystore}/h5py_reader.py   | 2 +-
 src/h5json/{writer => h5pystore}/h5py_writer.py   | 2 +-
 src/h5json/{reader => }/h5reader.py               | 0
 src/h5json/h5tojson/h5tojson.py                   | 4 ++--
 src/h5json/{writer => }/h5writer.py               | 0
 src/h5json/{reader => jsonstore}/__init__.py      | 0
 src/h5json/{reader => jsonstore}/h5json_reader.py | 2 +-
 src/h5json/{writer => jsonstore}/h5json_writer.py | 2 +-
 src/h5json/jsontoh5/jsontoh5.py                   | 4 ++--
 test/unit/h5json_reader_test.py                   | 2 +-
 test/unit/h5json_writer_test.py                   | 2 +-
 test/unit/h5py_reader_test.py                     | 2 +-
 test/unit/h5py_writer_test.py                     | 4 ++--
 14 files changed, 15 insertions(+), 15 deletions(-)
 rename src/h5json/{reader => h5pystore}/h5py_reader.py (99%)
 rename src/h5json/{writer => h5pystore}/h5py_writer.py (99%)
 rename src/h5json/{reader => }/h5reader.py (100%)
 rename src/h5json/{writer => }/h5writer.py (100%)
 rename src/h5json/{reader => jsonstore}/__init__.py (100%)
 rename src/h5json/{reader => jsonstore}/h5json_reader.py (99%)
 rename src/h5json/{writer => jsonstore}/h5json_writer.py (99%)

diff --git a/pyproject.toml b/pyproject.toml
index b45d1203..26997ae8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,8 +51,8 @@ build-backend = "setuptools.build_meta"
 package-dir = { "" = "src" }
 packages = [
     "h5json",
-    "h5json.reader",
-    "h5json.writer",
+    "h5json.jsonstore",
+    "h5json.h5pystore",
     "h5json.h5tojson",
     "h5json.jsontoh5",
     "h5json.schema",
diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
similarity index 99%
rename from src/h5json/reader/h5py_reader.py
rename to src/h5json/h5pystore/h5py_reader.py
index 7042a259..3510b328 100644
--- a/src/h5json/reader/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -20,7 +20,7 @@
 from .. import filters
 
 from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
-from .h5reader import H5Reader
+from ..h5reader import H5Reader
 
 
 class H5pyReader(H5Reader):
diff --git a/src/h5json/writer/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
similarity index 99%
rename from src/h5json/writer/h5py_writer.py
rename to src/h5json/h5pystore/h5py_writer.py
index 2d281338..f2487826 100644
--- a/src/h5json/writer/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -19,7 +19,7 @@
 from ..array_util import jsonToArray
 from .. import selections
 from .. import filters
-from .h5writer import H5Writer
+from ..h5writer import H5Writer
 
 
 class H5pyWriter(H5Writer):
diff --git a/src/h5json/reader/h5reader.py b/src/h5json/h5reader.py
similarity index 100%
rename from src/h5json/reader/h5reader.py
rename to src/h5json/h5reader.py
diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index a2259dae..b479cdd4 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -15,8 +15,8 @@
 import logging.handlers
 
 from h5json import Hdf5db
-from h5json.writer.h5json_writer import H5JsonWriter
-from h5json.reader.h5py_reader import H5pyReader
+from h5json.jsonstore.h5json_writer import H5JsonWriter
+from h5json.h5pystore.h5py_reader import H5pyReader
 
 
 def main():
diff --git a/src/h5json/writer/h5writer.py b/src/h5json/h5writer.py
similarity index 100%
rename from src/h5json/writer/h5writer.py
rename to src/h5json/h5writer.py
diff --git a/src/h5json/reader/__init__.py b/src/h5json/jsonstore/__init__.py
similarity index 100%
rename from src/h5json/reader/__init__.py
rename to src/h5json/jsonstore/__init__.py
diff --git a/src/h5json/reader/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
similarity index 99%
rename from src/h5json/reader/h5json_reader.py
rename to src/h5json/jsonstore/h5json_reader.py
index 455b185c..4c4eef90 100644
--- a/src/h5json/reader/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -17,7 +17,7 @@
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray
 from .. import selections
-from .h5reader import H5Reader
+from ..h5reader import H5Reader
 
 
 class H5JsonReader(H5Reader):
diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
similarity index 99%
rename from src/h5json/writer/h5json_writer.py
rename to src/h5json/jsonstore/h5json_writer.py
index 759f0aa2..4a94ad02 100644
--- a/src/h5json/writer/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -12,7 +12,7 @@
 
 import json
 
-from .h5writer import H5Writer
+from ..h5writer import H5Writer
 from ..objid import getUuidFromId, getCollectionForId
 from ..array_util import bytesArrayToList
 from .. import selections
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index fb58abb7..d572e58e 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -15,8 +15,8 @@
 import logging.handlers
 
 from h5json import Hdf5db
-from h5json.writer.h5py_writer import H5pyWriter
-from h5json.reader.h5json_reader import H5JsonReader
+from h5json.h5pystore.h5py_writer import H5pyWriter
+from h5json.jsonstore.h5json_reader import H5JsonReader
 
 
 def main():
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index 1c44e13c..f49a86a8 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -13,7 +13,7 @@
 import logging
 import numpy as np
 from h5json import Hdf5db
-from h5json.reader.h5json_reader import H5JsonReader
+from h5json.jsonstore.h5json_reader import H5JsonReader
 from h5json import selections
 
 
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index e68314d7..0f1fb59a 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -14,7 +14,7 @@
 import logging
 import numpy as np
 from h5json import Hdf5db
-from h5json.writer.h5json_writer import H5JsonWriter
+from h5json.jsonstore.h5json_writer import H5JsonWriter
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index ef42a29d..45de125e 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -13,7 +13,7 @@
 
 import logging
 from h5json import Hdf5db
-from h5json.reader.h5py_reader import H5pyReader
+from h5json.h5pystore.h5py_reader import H5pyReader
 
 
 class H5pyReaderTest(unittest.TestCase):
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index a103873b..f70acb59 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -15,8 +15,8 @@
 import h5py
 import numpy as np
 from h5json import Hdf5db
-from h5json.reader.h5json_reader import H5JsonReader
-from h5json.writer.h5py_writer import H5pyWriter
+from h5json.jsonstore.h5json_reader import H5JsonReader
+from h5json.h5pystore.h5py_writer import H5pyWriter
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 

From 8b426258d79e4813ada53cf37a782a2bedcb9c6c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 30 May 2025 20:00:41 +0200
Subject: [PATCH 043/129] added proptype hsdsreader

---
 pyproject.toml                        |   1 +
 src/h5json/config.py                  | 213 +++++++++++++
 src/h5json/h5pystore/__init__.py      |   0
 src/h5json/h5pystore/h5py_reader.py   |   2 +-
 src/h5json/h5reader.py                |   2 +-
 src/h5json/hdf5db.py                  |   2 +-
 src/h5json/jsonstore/h5json_reader.py |   3 +-
 src/h5json/openid.py                  | 438 ++++++++++++++++++++++++++
 test/unit/hsds_reader_test.py         | 114 +++++++
 9 files changed, 770 insertions(+), 5 deletions(-)
 create mode 100755 src/h5json/config.py
 create mode 100644 src/h5json/h5pystore/__init__.py
 create mode 100644 src/h5json/openid.py
 create mode 100644 test/unit/hsds_reader_test.py

diff --git a/pyproject.toml b/pyproject.toml
index 26997ae8..879e7ffb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ packages = [
     "h5json",
     "h5json.jsonstore",
     "h5json.h5pystore",
+    "h5json.hsdsstore",
     "h5json.h5tojson",
     "h5json.jsontoh5",
     "h5json.schema",
diff --git a/src/h5json/config.py b/src/h5json/config.py
new file mode 100755
index 00000000..b7602ffd
--- /dev/null
+++ b/src/h5json/config.py
@@ -0,0 +1,213 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import os
+import json
+
+
+class Config:
+    """
+    User Config state
+    """
+    _cfg = {}  # global state
+
+    def __init__(self, config_file=None, **kwargs):
+        if Config._cfg:
+            return  # already initialized
+        if config_file:
+            self._config_file = config_file
+        elif os.path.isfile(".hscfg"):
+            self._config_file = ".hscfg"
+        else:
+            self._config_file = os.path.expanduser("~/.hscfg")
+        # process config file if found
+        if os.path.isfile(self._config_file):
+            line_number = 0
+            with open(self._config_file) as f:
+                for line in f:
+                    line_number += 1
+                    s = line.strip()
+                    if not s:
+                        continue
+                    if s[0] == '#':
+                        # comment line
+                        continue
+                    fields = s.split('=')
+                    if len(fields) < 2:
+                        print(f"config file: {self._config_file} line: {line_number} is not valid")
+                        continue
+                    k = fields[0].strip()
+                    v = fields[1].strip()
+                    if k == "complex_names":
+                        self.complex_names = v
+                    elif k == "bool_names":
+                        self.bool_names = v
+                    elif k == "track_order":
+                        self.track_order = v
+                    else:
+                        Config._cfg[k] = v
+
+        # add standard keys if not already picked up
+        for k in ("hs_endpoint", "hs_username", "hs_password", "hs_api_key"):
+            if k not in Config._cfg:
+                Config._cfg[k] = ""
+
+        # override any config values with environment variable if found
+        for k in Config._cfg.keys():
+            if k.upper() in os.environ:
+                Config._cfg[k] = os.environ[k.upper()]
+
+        # update any values that are passed in to the constructor
+        for k in kwargs.keys():
+            Config._cfg[k] = kwargs[k]
+
+        # finally, set defaults for any expected keys that are not already set
+        for k in ("hs_endpoint", "hs_username", "hs_endpoint"):
+            if k not in Config._cfg:
+                Config._cfg[k] = None
+        if "bool_names" not in Config._cfg:
+            Config._cfg["bool_names"] = (b"FALSE", b"TRUE")
+        if "complex_names" not in Config._cfg:
+            Config._cfg["complex_names"] = ("r", "i")
+        if "track_order" not in Config._cfg:
+            Config._cfg["track_order"] = False
+
+    def __getitem__(self, name):
+        """ Get a config item  """
+        if name not in Config._cfg:
+            if name.upper() in os.environ:
+                Config._cfg[name] = os.environ[name.upper()]
+            else:
+                return None
+        return Config._cfg[name]
+
+    def get(self, name, default):
+        """ return config value for name or default if None """
+        val = self.__getitem__(name)
+        if val is None:
+            return default
+        else:
+            return default
+
+    def __setitem__(self, name, obj):
+        """ set config item """
+        Config._cfg[name] = obj
+
+    def __delitem__(self, name):
+        """ Delete option. """
+        del Config._cfg[name]
+
+    def __len__(self):
+        return len(Config._cfg)
+
+    def __iter__(self):
+        """ Iterate over config names """
+        keys = Config._cfg.keys()
+        for key in keys:
+            yield key
+
+    def __contains__(self, name):
+        return name in Config._cfg
+
+    def __repr__(self):
+        return json.dumps(Config._cfg)
+
+    def keys(self):
+        return Config._cfg.keys()
+
+    @property
+    def hs_endpoint(self):
+        return Config._cfg.get("hs_endpoint")
+
+    @property
+    def hs_username(self):
+        return Config._cfg.get("hs_username")
+
+    @property
+    def hs_password(self):
+        return Config._cfg.get("hs_password")
+
+    @property
+    def hs_api_key(self):
+        return Config._cfg.get("hs_api_key")
+
+    @property
+    def bool_names(self):
+        if "bool_names" in Config._cfg:
+            names = Config._cfg["bool_names"]
+        else:
+            names = (b"FALSE", b"TRUE")
+        return names
+
+    @bool_names.setter
+    def bool_names(self, value):
+        if isinstance(value, str):
+            names = value.split(())
+            if len(names) < 2:
+                raise ValueError("bool_names must have two items")
+            elif len(names) == 2:
+                pass
+            else:
+                names = names[:2]  # just use the first two items
+        elif len(value) != 2:
+            raise ValueError("expected two-element list for bool_names")
+        else:
+            names = value
+        Config._cfg["bool_names"] = tuple(names)
+
+    @property
+    def complex_names(self):
+        if "complex_names" in Config._cfg:
+            names = Config._cfg["complex_names"]
+        else:
+            names = ("r", "i")
+        return names
+
+    @complex_names.setter
+    def complex_names(self, value):
+        if isinstance(value, str):
+            names = value.split()
+            if len(names) < 2:
+                raise ValueError("complex_names must have two items")
+            elif len(names) == 2:
+                pass
+            else:
+                names = names[:2]  # just use the first two items
+        elif len(value) != 2:
+            raise ValueError("complex_names must have two values")
+        else:
+            names = value
+
+        Config._cfg["complex_names"] = tuple(names)
+
+    @property
+    def track_order(self):
+        if "track_order" in Config._cfg:
+            track = Config._cfg["track_order"]
+        else:
+            track = False
+        return track
+
+    @track_order.setter
+    def track_order(self, value):
+        if isinstance(value, str):
+            tokens = value.split()
+            if len(tokens) == 0:
+                track = False
+            else:
+                track = bool(tokens[0])  # strip any comments
+        else:
+            track = bool(value)
+        Config._cfg["track_order"] = track
+
+
+def get_config(config_file=None, **kwargs):
+    return Config(config_file=config_file, **kwargs)
diff --git a/src/h5json/h5pystore/__init__.py b/src/h5json/h5pystore/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 3510b328..dab44078 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -465,7 +465,7 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
 
         return obj_json
 
-    def getDatasetValues(self, dset_id, sel=None):
+    def getDatasetValues(self, dset_id, sel=None, dtype=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index 377bc3f9..541bb262 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -50,7 +50,7 @@ def getAttribute(self, obj_id, name, includeData=True):
         pass
 
     @abstractmethod
-    def getDatasetValues(self, obj_id, sel=None):
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index c632d93c..6ee8aaa2 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -472,7 +472,7 @@ def getDatasetValues(self, dset_id, sel):
 
         dtype = self.getDtype(dset_json)
         if self.reader:
-            arr = self.reader.getDatasetValues(dset_id, sel)
+            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
         else:
             # TBD: Initialize with fill value if non-zero
             arr = np.zeros(sel.shape, dtype=dtype)
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index 4c4eef90..78df4567 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -165,7 +165,7 @@ def getDtype(self, obj_json):
         dtype = createDataType(type_item)
         return dtype
 
-    def getDatasetValues(self, obj_id, sel=None):
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
         """
         Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
@@ -191,7 +191,6 @@ def getDatasetValues(self, obj_id, sel=None):
         else:
             dims = shape_json["dims"]
 
-        dtype = self.getDtype(json_obj)
         arr = jsonToArray(dims, dtype, json_value)
         if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
             pass  # just return the entire array
diff --git a/src/h5json/openid.py b/src/h5json/openid.py
new file mode 100644
index 00000000..bb59af54
--- /dev/null
+++ b/src/h5json/openid.py
@@ -0,0 +1,438 @@
+import os
+import sys
+import json
+import requests
+import time
+from abc import ABC, abstractmethod
+from datetime import datetime
+
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+# Azure
+try:
+    import adal
+except ModuleNotFoundError:
+    pass  # change this to the eprint below to see the import error
+    # eprint()"Unable to import azure auth packages")
+
+# Google
+try:
+    from google_auth_oauthlib.flow import InstalledAppFlow as GoogleInstalledAppFlow
+    from google.auth.transport.requests import Request as GoogleRequest
+    from google.oauth2.credentials import Credentials as GoogleCredentials
+    from google.oauth2 import id_token as GoogleIDToken
+except ModuleNotFoundError:
+    pass  # change this to the eprint below to see the import error
+    # eprint("Unable to import google auth packages")
+
+
+from . import config as hsconfig
+
+
+class OpenIDHandler(ABC):
+
+    def __init__(self, endpoint, use_token_cache=True, username=None, password=None):
+        """Initialize the token."""
+
+        # Location of the token cache.
+        self._token_cache_file = os.path.expanduser('~/.hstokencfg')
+        self._endpoint = endpoint
+        self._username = username
+        self._password = password
+
+        # The _token attribute should be a dict with at least the following keys:
+        #
+        # accessToken - The OpenID token to send.
+        # refreshToken - The refresh token (optional).
+        # expiresOn - The unix timestamp when the token expires (optional).
+
+        if not use_token_cache or not os.path.isfile(self._token_cache_file):
+            self._token = None
+        else:
+            if username:
+                file_key = username + '@' + endpoint
+            else:
+                file_key = endpoint
+            with open(self._token_cache_file, 'r') as token_file:
+                self._token = json.load(token_file).get(file_key, None)
+
+    @abstractmethod
+    def acquire(self):
+        """Acquire a new token from the provider."""
+        pass
+
+    @abstractmethod
+    def refresh(self):
+        """Refresh an existing token with the provider."""
+        pass
+
+    @property
+    def username(self):
+        """ Return username if known """
+        return self._username
+
+    @property
+    def expired(self):
+        """Return if the token is expired."""
+        t = self._token
+        # add some buffer to account for clock skew
+        return t is not None and 'expiresOn' in t and time.time() + 10.0 >= t['expiresOn']
+
+    @property
+    def token(self):
+        """Return the token if valid, otherwise get a new one."""
+
+        if self.expired:
+            self.refresh()
+            if self._token:
+                self.write_token_cache()
+
+        if self._token is None:
+            self.acquire()
+            self.write_token_cache()
+
+        return self._token['accessToken']
+
+    def write_token_cache(self):
+        """Write the token to a file cache."""
+
+        cache_exists = os.path.isfile(self._token_cache_file)
+
+        if self._username:
+            file_key = self._username + '@' + self._endpoint
+        else:
+            file_key = self._endpoint
+
+        # Create a new cache file.
+        if not cache_exists and self._token is not None:
+            with open(self._token_cache_file, 'w') as token_file:
+                json.dump({file_key: self._token}, token_file)
+
+        # Update an exisiting cache file.
+        elif cache_exists:
+            with open(self._token_cache_file, 'r+') as token_file:
+                cache = json.loads(token_file.read())
+
+                # Store valid tokens.
+                if self._token is not None:
+                    cache[file_key] = self._token
+
+                # Delete invalid tokens.
+                elif file_key in cache:
+                    del cache[file_key]
+
+                token_file.seek(0)
+                token_file.truncate(0)
+                json.dump(cache, token_file)
+
+
+class AzureOpenID(OpenIDHandler):
+
+    AUTHORITY_URI = 'https://login.microsoftonline.com'  # login endpoint for AD auth
+
+    def __init__(self, endpoint, config=None):
+        """Store configuration."""
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        # Config is a dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Maybe client_secrets are in environment variables?
+        else:
+
+            self.config = {
+                'AD_APP_ID': hs_config.get("hs_ad_app_id", None),
+                'AD_TENANT_ID': hs_config.get("hs_ad_tenant_id", None),
+                'AD_RESOURCE_ID': hs_config.get("hs_ad_resource_id", None),
+                'AD_CLIENT_SECRET': hs_config.get("hs_ad_client_secret", None)
+            }
+
+        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
+            use_token_cache = False
+        else:
+            use_token_cache = True
+
+        super().__init__(endpoint, use_token_cache=use_token_cache)
+
+    def write_token_cache(self):
+        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
+            pass  # don't use token cache for unattended authentication
+        else:
+            super().write_token_cache()
+
+    def acquire(self):
+        """Acquire a new Azure token."""
+
+        if "adal" not in sys.modules:
+            msg = "adal module not found, run: pip install -e . '.[azure]'"
+            raise ModuleNotFoundError(msg)
+
+        app_id = self.config["AD_APP_ID"]
+        resource_id = self.config["AD_RESOURCE_ID"]
+        tenant_id = self.config["AD_TENANT_ID"]
+        client_secret = self.config.get("AD_CLIENT_SECRET", None)
+        authority_uri = self.AUTHORITY_URI + '/' + tenant_id
+
+        # Try to get a token using different oauth flows.
+        context = adal.AuthenticationContext(authority_uri, enable_pii=True, api_version=None)
+
+        try:
+            if client_secret is not None:
+                code = context.acquire_token_with_client_credentials(resource_id, app_id, client_secret)
+            else:
+                code = context.acquire_user_code(resource_id, app_id)
+
+        except Exception as e:
+            eprint(f"unable to process AD token: {e}")
+            self._token = None
+            self.write_token_cache()
+            raise
+
+        if "message" in code:
+            eprint(code["message"])
+            mgmt_token = context.acquire_token_with_device_code(resource_id, code, app_id)
+
+        elif "accessToken" in code:
+            mgmt_token = code
+
+        else:
+            eprint("Could not authenticate with AD")
+
+        # Only store some fields.
+        self._token = {
+            'accessToken': mgmt_token['accessToken'],
+            'refreshToken': mgmt_token.get('refreshToken', None),
+            'tenantId': mgmt_token.get('tenantId', tenant_id),
+            'clientId': mgmt_token.get('_clientId', app_id),
+            'resource': mgmt_token.get('resource', resource_id)
+        }
+
+        # Parse time to timestamp.
+        if 'expiresOn' in mgmt_token:
+            expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
+            self._token['expiresOn'] = expire_dt.timestamp()
+
+    def refresh(self):
+        """Try to renew an Azure token."""
+
+        try:
+
+            # This will work for device code flow, but not with client
+            # credentials. If we have the secret, we can just request a new
+            # token anyways.
+
+            authority_uri = self.AUTHORITY_URI + '/' + self._token['tenantId']
+            context = adal.AuthenticationContext(authority_uri, api_version=None)
+            mgmt_token = context.acquire_token_with_refresh_token(self._token['refreshToken'],
+                                                                  self._token['clientId'],
+                                                                  self._token['resource'],
+                                                                  None)
+
+            # New token does not have all the metadata.
+            self._token['accessToken'] = mgmt_token['accessToken']
+            self._token['refreshToken'] = mgmt_token['refreshToken']
+
+            # Parse time to timestamp.
+            if 'expiresOn' in mgmt_token:
+                expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
+                self._token['expiresOn'] = expire_dt.timestamp()
+
+        except Exception:
+            self._token = None
+
+
+class GoogleOpenID(OpenIDHandler):
+
+    def __init__(self, endpoint, config=None, scopes=None):
+        """Store configuration."""
+
+        if "google.oauth2" not in sys.modules:
+            msg = "google.oauth2 module not found, run: pip install -e . '.[google]'"
+            raise ModuleNotFoundError(msg)
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        if scopes is None:
+            scopes = hs_config.get('hs_google_scopes', 'openid').split()
+        self.scopes = scopes
+
+        # Config is a client_secrets dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Config points to a client_secrets.json file.
+        elif isinstance(config, str) and os.path.isfile(config):
+            with open(config, 'r') as f:
+                self.config = json.loads(f.read())
+
+        # Maybe client_secrets are in environment variables?
+        else:
+            self.config = {
+                'installed': {
+                    'project_id': hs_config.get('hs_google_project_id', None),
+                    'client_id': hs_config.get('hs_google_client_id', None),
+                    'client_secret': hs_config.get('hs_google_client_secret', None),
+                    'auth_uri': 'https://accounts.google.com/o/oauth2/auth',
+                    'token_uri': 'https://oauth2.googleapis.com/token',
+                    'auth_provider_x509_cert_url': 'https://www.googleapis.com/oauth2/v1/certs',
+                    'redirect_uris': ['urn:ietf:wg:oauth:2.0:oob', 'http://localhost']
+                }
+            }
+
+        super().__init__(endpoint)
+
+    def _parse(self, creds):
+        """Parse credentials."""
+
+        # NOTE: In Google OpenID, if a client is set up for InstalledAppFlow
+        # then the client_secret is not actually treated as a secret. Acquire
+        # will ALWAYS prompt for user input before granting a token.
+
+        token = {
+            'accessToken': creds.id_token,
+            'refreshToken': creds.refresh_token,
+            'tokenUri': creds.token_uri,
+            'clientId': creds.client_id,
+            'clientSecret': creds.client_secret,
+            'scopes': creds.scopes
+        }
+
+        # The expiry field that is in creds is for the OAuth token, not the
+        # OpenID token. We need to validate the OpenID tokenn to get the exp.
+        idinfo = GoogleIDToken.verify_oauth2_token(creds.id_token, GoogleRequest())
+        if 'exp' in idinfo:
+            token['expiresOn'] = idinfo['exp']
+
+        return token
+
+    def acquire(self):
+        """Acquire a new Google token."""
+
+        flow = GoogleInstalledAppFlow.from_client_config(self.config,
+                                                         scopes=self.scopes)
+        creds = flow.run_console()
+        self._token = self._parse(creds)
+
+    def refresh(self):
+        """Try to renew a token."""
+
+        try:
+
+            token = self._token
+            creds = GoogleCredentials(token=None,
+                                      refresh_token=token['refreshToken'],
+                                      scopes=token['scopes'],
+                                      token_uri=token['tokenUri'],
+                                      client_id=token['clientId'],
+                                      client_secret=token['clientSecret'])
+
+            creds.refresh(GoogleRequest())
+            self._token = self._parse(creds)
+
+        except Exception:
+            self._token = None
+
+
+class KeycloakOpenID(OpenIDHandler):
+
+    def __init__(self, endpoint, config=None, scopes=None, username=None, password=None):
+        """Store configuration."""
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        if scopes is None:
+            scopes = hs_config.get('hs_keycloak_scopes', 'openid').split()
+        self.scopes = scopes
+
+        # Config is a client_secrets dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Config points to a client_secrets.json file.
+        elif isinstance(config, str) and os.path.isfile(config):
+            with open(config, 'r') as f:
+                self.config = json.loads(f.read())
+
+        # Maybe configs are in environment variables?
+        else:
+            self.config = {
+                'keycloak_client_id': hs_config.get('hs_keycloak_client_id', None),
+                'keycloak_client_secret': hs_config.get('hs_keycloak_client_secret', None),
+                'keycloak_realm': hs_config.get('hs_keycloak_realm', None),
+                'keycloak_uri': hs_config.get('hs_keycloak_uri', None)
+            }
+
+        super().__init__(endpoint, username=username, password=password)
+
+    def _getKeycloakUrl(self):
+        if not self.config['keycloak_uri']:
+            raise KeyError("keycloak_uri not set")
+        if not self.config['keycloak_realm']:
+            raise KeyError("Keycloak realm not set")
+        if not self.config['keycloak_client_id']:
+            raise KeyError("keycloak client_id not set")
+
+        url = self.config['keycloak_uri']
+        url += "/realms/"
+        url += self.config['keycloak_realm']
+        url += "/protocol/openid-connect/token"
+
+        return url
+
+    def _parse(self, creds):
+        """Parse credentials."""
+
+        # validate json returned by keycloak
+        if "token_type" not in creds:
+            raise IOError("Unexpected Keycloak JWT, no token_type")
+        if creds["token_type"].lower() != "bearer":
+            raise IOError("Unexpected Keycloak JWT, expected Bearer token")
+
+        token = {}
+        if "access_token" not in creds:
+            raise IOError("Unexpected Keycloak JWT, no access_token")
+        token["accessToken"] = creds["access_token"]
+        if "refesh_token" in creds:
+            token["refreshToken"] = creds["refresh_token"]
+        if "expires_in" in creds:
+            now = time.time()
+            token['expiresOn'] = now + creds["expires_in"]
+
+        # TBD: client_secret
+        # TBD: scopes
+        # TBD: client_id
+
+        return token
+
+    def acquire(self):
+        """Acquire a new Keycloak token."""
+        keycloak_url = self._getKeycloakUrl()
+
+        headers = {"Content-Type": "application/x-www-form-urlencoded"}
+        body = {}
+        body["username"] = self._username
+        body["password"] = self._password
+        body["grant_type"] = "password"
+        body["client_id"] = self.config.get("keycloak_client_id")
+        rsp = requests.post(keycloak_url, data=body, headers=headers)
+
+        if rsp.status_code not in (200, 201):
+            print(f"POST error: {rsp.status_code}")
+            raise IOError(f"Keycloak response: {rsp.status_code}")
+
+        creds = rsp.json()  # TBD: catch json format errors?
+        self._token = self._parse(creds)
+
+    def refresh(self):
+        """Try to renew a token."""
+        # TBD
+        # unclear if refresh is supported without a client secret
+        self._token = None
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
new file mode 100644
index 00000000..cbc7f8bb
--- /dev/null
+++ b/test/unit/hsds_reader_test.py
@@ -0,0 +1,114 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.hsdsstore.hsds_reader import HSDSReader
+from h5json import selections
+
+
+def get_endpoint():
+    return "http://hsds.hdf.test:5101"
+
+
+def get_username():
+    return "test_user1"
+
+
+def get_password():
+    return "test"
+
+
+class HSDSReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(HSDSReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        handler = logging.FileHandler("./hsds_reader_test.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = "/home/test_user1/test/tall.h5"
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(**kwargs) as db:
+            kwargs["username"] = get_username()
+            kwargs["password"] = get_password()
+            kwargs["endpoint"] = get_endpoint()
+            hsds_reader = HSDSReader(filepath, **kwargs)
+            db.reader = hsds_reader
+            root_id = db.getObjectIdByPath("/")
+            root_json = db.getObjectById(root_id)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10, 10])
+            sel_all = selections.select((10, 10), ...)
+            arr = db.getDatasetValues(dset111_id, sel_all)
+            self.assertTrue(isinstance(arr, np.ndarray))
+            self.assertEqual(arr.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    v = arr[i, j]
+                    self.assertEqual(v, i * j)
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()

From 638ab00940c99f03f883e8b0992fa030704567cc Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 2 Jun 2025 20:38:53 +0200
Subject: [PATCH 044/129] fix flake8 error

---
 src/h5json/openid.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/h5json/openid.py b/src/h5json/openid.py
index bb59af54..af38d94a 100644
--- a/src/h5json/openid.py
+++ b/src/h5json/openid.py
@@ -6,6 +6,8 @@
 from abc import ABC, abstractmethod
 from datetime import datetime
 
+from . import config as hsconfig
+
 
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
@@ -29,9 +31,6 @@ def eprint(*args, **kwargs):
     # eprint("Unable to import google auth packages")
 
 
-from . import config as hsconfig
-
-
 class OpenIDHandler(ABC):
 
     def __init__(self, endpoint, use_token_cache=True, username=None, password=None):

From 7e17e7b76cf102cce3f04e927629437e705b224a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 3 Jun 2025 11:43:35 +0200
Subject: [PATCH 045/129] added missing hsds_reaader files

---
 src/h5json/hsdsstore/hsds_reader.py | 281 ++++++++++
 src/h5json/hsdsstore/httpconn.py    | 791 ++++++++++++++++++++++++++++
 2 files changed, 1072 insertions(+)
 create mode 100644 src/h5json/hsdsstore/hsds_reader.py
 create mode 100644 src/h5json/hsdsstore/httpconn.py

diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
new file mode 100644
index 00000000..5740a29c
--- /dev/null
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -0,0 +1,281 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import json
+import logging
+
+from ..objid import getCollectionForId, getUuidFromId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray
+from .. import selections
+from ..h5reader import H5Reader
+from .httpconn import HttpConn
+
+
+class HSDSReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+    def __init__(
+        self,
+        domain_path,
+        app_logger=None,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        mode='r',
+        api_key=None,
+        use_session=True,
+        expire_time=0,
+        max_objects=0,
+        max_age=0,
+        retries=3,
+        timeout=30.0,
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        self.log.debug("HSDSReader init(")
+
+        kwargs = {}
+        self.log.debug(f"    domain_path: {domain_path}")
+        if endpoint:
+            self.log.debug(f"    endpoint: {endpoint}")
+            kwargs["endpoint"] = endpoint
+        if username:
+            self.log.debug(f"    username: {username}")
+            kwargs["username"] = username
+        if password:
+            self.log.debug(f"    password: {"*" * len(password)}")
+            kwargs["password"] = password
+        if bucket:
+            self.log.debug(f"    bucket: {bucket}")
+            kwargs["bucket"] = bucket
+        if mode:
+            self.log.debug(f"    mode: {mode}")
+            kwargs["mode"] = mode
+        if api_key:
+            self.log.debug(f"    apI_key: {"*" * len(api_key)}")
+            kwargs["api_key"] = api_key
+        if use_session:
+            self.log.debug(f"    use_session: {use_session}")
+            kwargs["user_session"] = use_session
+
+        if expire_time:
+            self.log.debug(f"    expire_time: {expire_time}")
+            kwargs["expire_time"] = expire_time
+        if max_objects:
+            self.log.debug(f"    max_objects: {max_objects}")
+            kwargs["max_objects"] = max_objects
+        if max_age:
+            self.log.debug(f"    max_age: {max_age}")
+            kwargs["max_age"] = max_age
+        if retries:
+            self.log.debug(f"    retries: {retries}")
+            kwargs["retries"] = retries
+        if timeout:
+            self.log.debug(f"    timeout: {timeout}")
+            kwargs["timeout"] = timeout
+
+        super().__init__(domain_path, app_logger=app_logger)
+
+        http_conn = HttpConn(domain_path, **kwargs)
+
+        hsds_info = http_conn.serverInfo()
+        self.log.debug(f"got hsds info: {hsds_info}")
+
+        # try to do a GET from the domain
+        req = "/"
+        params = {}
+        """
+        if max_objects is None or max_objects > 0:
+            # get object meta objects
+            # TBD: have hsds support a max limit of objects to return
+            params["getobjs"] = 1
+        params["include_attrs"] = 1
+        params["include_links"] = 1
+        """
+
+        rsp = http_conn.GET(req, params=params)
+
+        if rsp.status_code != 200:
+            # file must exist
+            http_conn.close()
+            raise IOError(rsp.status_code, rsp.reason)
+
+        domain_json = rsp.json()
+        self.log.debug(f"got domain_json: {domain_json}")
+
+        if "root" not in domain_json:
+            http_conn.close()
+            raise IOError(404, "Location is a folder, not a file")
+
+        root_uuid = domain_json["root"]
+
+        if mode in ("w", "w-", "x", "a"):
+            http_conn._mode = "r+"
+
+        """
+        if "domain_objs" in root_json:
+            domain_objs = root_json["domain_objs"]
+            objdb.load(domain_objs)
+        """
+
+        self._root_id = root_uuid
+        self._verboseInfo = None  # additional state we'll get when requested
+        self._verboseUpdated = None  # when the verbose data was fetched
+        self._lastScan = None  # when summary stats where last updated by server
+
+        if "limits" in domain_json:
+            self._limits = domain_json["limits"]
+        else:
+            self._limits = None
+        if "version" in domain_json:
+            self._version = domain_json["version"]
+        else:
+            self._version = None
+
+        self._http_conn = http_conn
+        self._domain_json = domain_json
+
+        """
+        # parse the json file
+        h5json = json.loads(text)
+
+        self._h5json = h5json
+
+        if "root" not in h5json:
+            raise Exception("no root key in input file")
+        self._root_id = "g-" + h5json["root"]
+        """
+
+    def close(self):
+        pass
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_values=False):
+        """ return object with given id """
+
+        collection = getCollectionForId(obj_id)
+
+        req = f"/{collection}/{obj_id}"
+        self.log.debug("sending req: {req}")
+
+        params = {}
+        if include_attrs:
+            params["include_attrs"] = 1
+        if include_links:
+            params["include_links"] = 1
+
+        rsp = self._http_conn.GET(req, params=params)
+
+        if rsp.status_code != 200:
+            raise IOError(rsp.status_code, rsp.reason)
+
+        obj_json = rsp.json()
+        if "hrefs" in obj_json:
+            # don't need these
+            del obj_json["hrefs"]
+
+        self.log.debug(f"got json for id: {obj_id}: {obj_json}")
+        return obj_json
+
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        self.log.debug(f"getAttribute({obj_id}), [{name}], include_data={includeData})")
+        collection = getCollectionForId(obj_id)
+        req = f"/{collection}/{obj_id}/attributes/{name}"
+
+        params = {}
+        params["IncludeData"] = 1 if includeData else 0
+
+        rsp = self._http_conn.GET(req, params=params)
+
+        if rsp.status_code in (404, 410):
+            self.log.warning(f"attribute {name} not found")
+            return None
+
+        if rsp.status_code != 200:
+            self.log.error(f"GET {req} failed with status_code: {rsp.status_code}")
+            raise IOError(rsp.status_code, rsp.reason)
+        attr_json = rsp.json()
+
+        if "hrefs" in attr_json:
+            del attr_json["hrefs"]
+
+        return attr_json
+
+    def getDtype(self, obj_json):
+        """ Return the dtype for the type given by obj_json """
+        if "type" not in obj_json:
+            raise KeyError("no type item found")
+        type_item = obj_json["type"]
+        if isinstance(type_item, str) and type_item.startswith("datatypes/"):
+            # this is a reference to a committed type
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if "type" not in ctype_json:
+                raise KeyError(f"Unexpected datatype: {ctype_json}")
+            # Use the ctype's item json
+            type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        return dtype
+
+    def getDatasetValues(self, dset_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        self.log.debug(f"getDatasetValues({dset_id}), sel={sel}")
+        collection = getCollectionForId(dset_id)
+        if collection != "datasets":
+            msg = f"unexpected id: {dset_id} for getDatasetValues"
+            self.log.warning(msg)
+            return ValueError(msg)
+
+        params = {}
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+            pass  # just return the entire array
+        elif isinstance(sel, selections.SimpleSelection):
+            params["select"] = sel.getQueryParam()
+        else:
+            raise NotImplementedError("selection type not supported")
+
+        req = f"/{collection}/{dset_id}/value"
+        rsp = self._http_conn.GET(req, params=params)
+        if rsp.status_code != 200:
+            self.log.error(f"GET {req} failed with status_code: {rsp.status_code}")
+            raise IOError(rsp.status_code, rsp.reason)
+
+        rsp_json = rsp.json()
+        if "value" not in rsp_json:
+            self.log.warning(f"value key not found for {dset_id}")
+            return None
+
+        self.log.debug(f"got rsp: {rsp_json}")
+        json_value = rsp_json["value"]
+
+        arr = jsonToArray(sel.mshape, dtype, json_value)
+
+        return arr
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
new file mode 100644
index 00000000..7a686dff
--- /dev/null
+++ b/src/h5json/hsdsstore/httpconn.py
@@ -0,0 +1,791 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+from __future__ import absolute_import
+
+import os
+import sys
+import multiprocessing
+
+import base64
+import requests
+import requests_unixsocket
+from requests import ConnectionError
+from requests.adapters import HTTPAdapter, Retry
+import json
+import logging
+
+from .. import openid
+from .. import config
+
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+DEFAULT_TIMEOUT = (
+    10,
+    1000,
+)  # #20  # 180  # seconds - allow time for hsds service to bounce
+
+"""
+def verifyCert(self):
+    # default to validate CERT for https requests, unless
+    # the H5PYD_VERIFY_CERT environment variable is set and True
+    #
+    # TBD: set default to True once the signing authority of data.hdfgroup.org is
+    # recognized
+    if "H5PYD_VERIFY_CERT" in os.environ:
+        verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
+        if verify_cert.startswith('F'):
+            return False
+    return True
+"""
+
+
+def getAzureApiKey():
+    """construct API key for Active Directory if configured"""
+    # TBD: GoogleID?
+
+    api_key = None
+
+    # if Azure AD ids are set, pass them to HttpConn via api_key dict
+    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
+
+    ad_app_id = None  # Azure AD HSDS Server id
+    if "HS_AD_APP_ID" in os.environ:
+        ad_app_id = os.environ["HS_AD_APP_ID"]
+    elif "hs_ad_app_id" in cfg:
+        ad_app_id = cfg["hs_ad_app_id"]
+    ad_tenant_id = None  # Azure AD tenant id
+    if "HS_AD_TENANT_ID" in os.environ:
+        ad_tenant_id = os.environ["HS_AD_TENANT_ID"]
+    elif "hs_ad_tenant_id" in cfg:
+        ad_tenant_id = cfg["hs_ad_tenant_id"]
+
+    ad_resource_id = None  # Azure AD resource id
+    if "HS_AD_RESOURCE_ID" in os.environ:
+        ad_resource_id = os.environ["HS_AD_RESOURCE_ID"]
+    elif "hs_ad_resource_id" in cfg:
+        ad_resource_id = cfg["hs_ad_resource_id"]
+
+    ad_client_secret = None  # Azure client secret
+    if "HS_AD_CLIENT_SECRET" in os.environ:
+        ad_client_secret = os.environ["HS_AD_CLIENT_SECRET"]
+    elif "hs_ad_client_secret" in cfg:
+        ad_client_secret = cfg["hs_ad_client_secret"]
+
+    if ad_app_id and ad_tenant_id and ad_resource_id:
+        # contruct dict to pass to HttpConn
+        api_key = {
+            "AD_APP_ID": ad_app_id,
+            "AD_TENANT_ID": ad_tenant_id,
+            "AD_RESOURCE_ID": ad_resource_id,
+            "openid_provider": "azure",
+        }
+        # optional config
+        if ad_client_secret:
+            api_key["AD_CLIENT_SECRET"] = ad_client_secret
+    return api_key  # None if AAD not configured
+
+
+def getKeycloakApiKey():
+    # check for keycloak next
+    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
+    api_key = None
+    # check to see if we are configured for keycloak authentication
+    if "HS_KEYCLOAK_URI" in os.environ:
+        keycloak_uri = os.environ["HS_KEYCLOAK_URI"]
+    elif "hs_keycloak_uri" in cfg:
+        keycloak_uri = cfg["hs_keycloak_uri"]
+    else:
+        keycloak_uri = None
+    if "HS_KEYCLOAK_CLIENT_ID" in os.environ:
+        keycloak_client_id = os.environ["HS_KEYCLOAK_CLIENT_ID"]
+    elif "hs_keycloak_client_id" in cfg:
+        keycloak_client_id = cfg["hs_keycloak_client_id"]
+    else:
+        keycloak_client_id = None
+    if "HS_KEYCLOAK_REALM" in os.environ:
+        keycloak_realm = cfg["HS_KEYCLOAK_REALM"]
+    elif "hs_keycloak_realm" in cfg:
+        keycloak_realm = cfg["hs_keycloak_realm"]
+    else:
+        keycloak_realm = None
+
+    if keycloak_uri and keycloak_client_id and keycloak_uri:
+        api_key = {
+            "keycloak_uri": keycloak_uri,
+            "keycloak_client_id": keycloak_client_id,
+            "keycloak_realm": keycloak_realm,
+            "openid_provider": "keycloak",
+        }
+    return api_key
+
+
+class HttpResponse:
+    """ wrapper for http request responses """
+    def __init__(self, rsp, logger=None):
+        self._rsp = rsp
+        self._logger = logger
+        if logger is None:
+            self.log = logging
+        else:
+            self.log = logging.getLogger(logger)
+        self._text = None
+
+    @property
+    def status_code(self):
+        """ return response status code """
+        return self._rsp.status_code
+
+    @property
+    def reason(self):
+        """ return response reason """
+        return self._rsp.reason
+
+    @property
+    def content_type(self):
+        """ return content type """
+        rsp = self._rsp
+        if 'Content-Type' in rsp.headers:
+            content_type = rsp.headers['Content-Type']
+        else:
+            content_type = ""
+        return content_type
+
+    @property
+    def content_length(self):
+        """ Return length of response if available """
+        if 'Content-Length' in self._rsp.headers:
+            content_length = self._rsp.headers['Content-Length']
+        else:
+            content_length = None
+        return content_length
+
+    @property
+    def is_binary(self):
+        """ return True if the response indicates binary data """
+
+        if self.content_type == "application/octet-stream":
+            return True
+        else:
+            return False
+
+    @property
+    def is_json(self):
+        """ return true if response indicates json """
+
+        if self.content_type.startswith("application/json"):
+            return True
+        else:
+            return False
+
+    @property
+    def text(self):
+        """ getresponse content as bytes """
+
+        if not self._text:
+            rsp = self._rsp
+            if not self.is_binary:
+                # hex encoded response?
+                # this is returned by API Gateway for lambda responses
+                self._text = bytes.fromhex(rsp.text)
+            else:
+                if self.content_length:
+                    self.log.debug(f"got binary response, {self.content_length} bytes")
+                else:
+                    self.log.debug("got binary response, content_length unknown")
+
+                HTTP_CHUNK_SIZE = 4096
+                http_chunks = []
+                downloaded_bytes = 0
+                for http_chunk in rsp.iter_content(chunk_size=HTTP_CHUNK_SIZE):
+                    if http_chunk:  # filter out keep alive chunks
+                        self.log.debug(f"got http_chunk - {len(http_chunk)} bytes")
+                        downloaded_bytes += len(http_chunk)
+                        http_chunks.append(http_chunk)
+                if len(http_chunks) == 0:
+                    raise IOError("no data returned")
+                if len(http_chunks) == 1:
+                    # can return first and only chunk as response
+                    self._text = http_chunks[0]
+                else:
+                    msg = f"retrieved {len(http_chunks)} http_chunks "
+                    msg += f" {downloaded_bytes} total bytes"
+                    self.log.info(msg)
+                    self._text = bytearray(downloaded_bytes)
+                    index = 0
+                    for http_chunk in http_chunks:
+                        self._text[index:(index + len(http_chunk))] = http_chunk
+                        index += len(http_chunk)
+
+        return self._text
+
+    def json(self):
+        """ Return json from response"""
+
+        rsp = self._rsp
+
+        if not self.is_json:
+            raise IOError("response is not json")
+
+        rsp_json = json.loads(rsp.text)
+        self.log.debug(f"rsp_json - {len(rsp.text)} bytes")
+        return rsp_json
+
+
+class HttpConn:
+    """
+    Some utility methods based on equivalents in base class.
+    """
+
+    def __init__(
+        self,
+        domain_name,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        api_key=None,
+        mode="a",
+        use_session=True,
+        expire_time=1.0,
+        max_objects=None,
+        max_age=1.0,
+        logger=None,
+        retries=3,
+        timeout=DEFAULT_TIMEOUT,
+        **kwds,
+    ):
+        self._domain = domain_name
+        self._mode = mode
+        self._domain_json = None
+        self._use_session = use_session
+        self._retries = retries
+        self._timeout = timeout
+        self._api_key = api_key
+        self._s = None  # Sessions
+        self._server_info = None
+        self._external_refs = []
+
+        self._logger = logger
+        if logger is None:
+            self.log = logging
+        else:
+            self.log = logging.getLogger(logger)
+        msg = f"HttpConn.init(domain: {domain_name} use_session: {use_session} "
+        msg += f"expire_time: {expire_time:6.2f} sec retries: {retries}"
+        self.log.debug(msg)
+
+        if self._timeout != DEFAULT_TIMEOUT:
+            self.log.info(f"HttpConn.init - timeout = {self._timeout}")
+        if endpoint is None:
+            if "HS_ENDPOINT" in os.environ:
+                endpoint = os.environ["HS_ENDPOINT"]
+
+        if not endpoint:
+            msg = "no endpoint set"
+            raise ValueError(msg)
+
+        self._endpoint = endpoint
+
+        if username is None:
+            if "HS_USERNAME" in os.environ:
+                username = os.environ["HS_USERNAME"]
+        if isinstance(username, str) and (not username or username.upper() == "NONE"):
+            username = None
+        self._username = username
+
+        if password is None:
+            if "HS_PASSWORD" in os.environ:
+                password = os.environ["HS_PASSWORD"]
+        if isinstance(password, str) and (not password or password.upper() == "NONE"):
+            password = None
+        self._password = password
+
+        if bucket is None:
+            if "HS_BUCKET" in os.environ:
+                bucket = os.environ["HS_BUCKET"]
+            if isinstance(bucket, str) and (not bucket or bucket.upper() == "NONE"):
+                bucket = None
+        self._bucket = bucket
+
+        if api_key is None and "HS_API_KEY" in os.environ:
+            api_key = os.environ["HS_API_KEY"]
+        if isinstance(api_key, str) and (not api_key or api_key.upper() == "NONE"):
+            api_key = None
+        if not api_key:
+            api_key = getAzureApiKey()
+        if not api_key:
+            api_key = getKeycloakApiKey()
+
+        # Convert api_key to OpenIDHandler
+        if isinstance(api_key, dict):
+            # Maintain Azure-defualt backwards compatibility, but allow
+            # both environment variable and kwarg override.
+            provider = api_key.get("openid_provider", "azure")
+            if provider == "azure":
+                self.log.debug("creating OpenIDHandler for Azure")
+                self._api_key = openid.AzureOpenID(endpoint, api_key)
+            elif provider == "google":
+                self.log.debug("creating OpenIDHandler for Google")
+
+                config = api_key.get("client_secret", None)
+                scopes = api_key.get("scopes", None)
+                self._api_key = openid.GoogleOpenID(
+                    endpoint, config=config, scopes=scopes
+                )
+            elif provider == "keycloak":
+                self.log.debug("creating OpenIDHandler for Keycloak")
+
+                # for Keycloak, pass in username and password
+                self._api_key = openid.KeycloakOpenID(
+                    endpoint, config=api_key, username=username, password=password
+                )
+            else:
+                self.log.error(f"Unknown openid provider: {provider}")
+
+    def __del__(self):
+        if self._s:
+            self.log.debug("close session")
+            self._s.close()
+            self._s = None
+
+    def getHeaders(self, username=None, password=None, headers=None):
+
+        if headers is None:
+            headers = {}
+
+        # This should be the default - but explicitly set anyway
+        if "Accept-Encoding" not in headers:
+            headers['Accept-Encoding'] = "deflate, gzip"
+
+        elif "Authorization" in headers:
+            return headers  # already have auth key
+        if username is None:
+            username = self._username
+        if password is None:
+            password = self._password
+
+        if self._api_key:
+            self.log.debug("using api key")
+            # use OpenId handler to get a bearer token
+            token = ""
+
+            # Get a token, possibly refreshing if needed.
+            if isinstance(self._api_key, openid.OpenIDHandler):
+                token = self._api_key.token
+
+            # Token was provided as a string.
+            elif isinstance(self._api_key, str):
+                token = self._api_key
+
+            if token:
+                auth_string = b"Bearer " + token.encode("ascii")
+                headers["Authorization"] = auth_string
+        elif username is not None and password is not None:
+            self.log.debug(f"use basic auth with username: {username}")
+            auth_string = username + ":" + password
+            auth_string = auth_string.encode("utf-8")
+            auth_string = base64.b64encode(auth_string)
+            auth_string = b"Basic " + auth_string
+            headers["Authorization"] = auth_string
+        else:
+            self.log.debug("no auth header")
+            # no auth header
+            pass
+
+        return headers
+
+    def serverInfo(self):
+        if self._server_info:
+            return self._server_info
+
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+
+        # make an about request
+        rsp = self.GET("/about")
+        if rsp.status_code != 200:
+            raise IOError(rsp.status_code, rsp.reason)
+        server_info = rsp.json()
+        if server_info:
+            self._server_info = server_info
+        return server_info
+
+    def server_version(self):
+        server_info = self.serverInfo()
+        if "hsds_version" in server_info:
+            server_version = server_info["hsds_version"]
+        else:
+            # no standard way to get version for other implements...
+            server_version = None
+        return server_version
+
+    def verifyCert(self):
+        # default to validate CERT for https requests, unless
+        # the H5PYD_VERIFY_CERT environment variable is set and True
+        #
+        # TBD: set default to True once the signing authority of data.hdfgroup.org is
+        # recognized
+        if "H5PYD_VERIFY_CERT" in os.environ:
+            verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
+            if verify_cert.startswith("F"):
+                return False
+        return True
+
+    def GET(self, req, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        # check that domain is defined (except for some specific requests)
+        if req not in ("/domains", "/about", "/info", "/") and self._domain is None:
+            raise IOError(f"no domain defined: req: {req}")
+
+        rsp = None
+
+        headers = self.getHeaders(headers=headers)
+
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key and not isinstance(self._api_key, dict):
+            params["api_key"] = self._api_key
+        domain = params["domain"]
+        self.log.debug(f"GET: {req} [{domain}] bucket: {self._bucket}")
+
+        if format == "binary":
+            headers["accept"] = "application/octet-stream"
+
+        self.log.info(f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}")
+
+        for k in params:
+            if k != "domain":
+                v = params[k]
+                self.log.debug(f"GET params {k}:{v}")
+
+        try:
+            s = self.session
+            stream = True  # tbd  - config for no streaming?
+
+            rsp = s.get(
+                self._endpoint + req,
+                params=params,
+                headers=headers,
+                stream=stream,
+                timeout=self._timeout,
+                verify=self.verifyCert(),
+            )
+            self.log.info(f"status: {rsp.status_code}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+        except Exception as e:
+            self.log.error(f"got {type(e)} exception: {e}")
+            raise IOError("Unexpected exception")
+
+        return HttpResponse(rsp)
+
+    def PUT(self, req, body=None, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        if self._domain is None:
+            raise IOError("no domain defined")
+
+        if params:
+            self.log.info(f"PUT params: {params}")
+        else:
+            params = {}
+
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify the file was open for modification
+        if self._mode == "r":
+            raise IOError("Unable to create group (No write intent on file)")
+
+        # try to do a PUT to the domain
+
+        headers = self.getHeaders(headers=headers)
+
+        if format == "binary":
+            headers["Content-Type"] = "application/octet-stream"
+            # binary write
+            data = body
+        else:
+            headers["Content-Type"] = "application/json"
+            data = json.dumps(body)
+
+        self.log.info(f"PUT: {req} format: {format} [{len(data)} bytes]")
+
+        try:
+            s = self.session
+            rsp = s.put(
+                self._endpoint + req,
+                data=data,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+            self.log.info(f"status: {rsp.status_code}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+
+        if rsp.status_code == 201 and req == "/":
+            self.log.info("clearing domain_json cache")
+            self._domain_json = None
+        self.log.info(f"PUT returning: {rsp}")
+        return HttpResponse(rsp)
+
+    def POST(self, req, body=None, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        if self._domain is None:
+            raise IOError("no domain defined")
+
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify we have write intent (unless this is a dataset point selection)
+        if req.startswith("/datasets/") and req.endswith("/value"):
+            point_sel = True
+        else:
+            point_sel = False
+        if self._mode == "r" and not point_sel:
+            raise IOError("Unable perform request (No write intent on file)")
+
+        # try to do a POST to the domain
+
+        headers = self.getHeaders(headers=headers)
+
+        if isinstance(body, bytes):
+            headers["Content-Type"] = "application/octet-stream"
+            data = body
+        else:
+            # assume json
+            try:
+                data = json.dumps(body)
+            except TypeError:
+                msg = f"Unable to convert {body} to json"
+                self.log.error(msg)
+                raise IOError("JSON encoding error")
+        if format == "binary":
+            # recieve data as binary
+            headers["accept"] = "application/octet-stream"
+
+        self.log.info("POST: " + req)
+
+        try:
+            s = self.session
+            rsp = s.post(
+                self._endpoint + req,
+                data=data,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+        except ConnectionError as ce:
+            self.log.warning(f"connection error: {ce}")
+            raise IOError(str(ce))
+
+        if rsp.status_code not in (200, 201):
+            self.log.error(f"POST error: {rsp.status_code}")
+
+        return HttpResponse(rsp)
+
+    def DELETE(self, req, params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+
+        if req not in ("/domains", "/") and self._domain is None:
+            raise IOError("no domain defined")
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify we have write intent
+        if self._mode == "r":
+            raise IOError("Unable perform request (No write intent on file)")
+
+        # try to do a DELETE of the resource
+
+        headers = self.getHeaders(headers=headers)
+
+        self.log.info("DEL: " + req)
+        try:
+            s = self.session
+            rsp = s.delete(
+                self._endpoint + req,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+            self.log.info(f"status: {rsp.status_code}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+
+        if rsp.status_code == 200 and req == "/":
+            self.log.info("clearing domain_json cache")
+            self._domain_json = None
+
+        return HttpResponse(rsp)
+
+    @property
+    def session(self):
+        # create a session object to re-use http connection when possible
+        s = requests
+        retries = self._retries
+        backoff_factor = 1
+        status_forcelist = (500, 502, 503, 504)
+
+        if self._use_session:
+            if self._s is None:
+                if self._endpoint.startswith("http+unix://"):
+                    self.log.debug(f"create unixsocket session: {self._endpoint}")
+                    s = requests_unixsocket.Session()
+                else:
+                    # regular request session
+                    s = requests.Session()
+
+                retry = Retry(
+                    total=retries,
+                    read=retries,
+                    connect=retries,
+                    backoff_factor=backoff_factor,
+                    status_forcelist=status_forcelist,
+                )
+
+                s.mount(
+                    "http://",
+                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
+                )
+                s.mount(
+                    "https://",
+                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
+                )
+                self._s = s
+            else:
+                s = self._s
+        return s
+
+    def add_external_ref(self, fid):
+        # this is used by the group class to keep references to external links open
+        if fid.__class__.__name__ != "FileID":
+            raise TypeError("add_external_ref, expected FileID type")
+        self._external_refs.append(fid)
+
+    def close(self):
+        if self._s:
+            self._s.close()
+            self._s = None
+
+    @property
+    def domain(self):
+        return self._domain
+
+    @property
+    def username(self):
+        return self._username
+
+    @property
+    def endpoint(self):
+        return self._endpoint
+
+    @property
+    def password(self):
+        return self._password
+
+    @property
+    def mode(self):
+        return self._mode
+
+    @property
+    def domain_json(self):
+        if self._domain_json is None:
+            rsp = self.GET("/")
+            if rsp.status_code != 200:
+                raise IOError(rsp.reason)
+            # assume JSON
+            self._domain_json = rsp.json()
+        return self._domain_json
+
+    @property
+    def root_uuid(self):
+        domain_json = self.domain_json
+        if "root" not in domain_json:
+            raise IOError("Unexpected response")
+        root_uuid = domain_json["root"]
+        return root_uuid
+
+    @property
+    def compressors(self):
+        compressors = []
+        if "compressors" in self.domain_json:
+            compressors = self.domain_json["compressors"]
+        if not compressors:
+            compressors = [
+                "gzip",
+            ]
+        return compressors
+
+    @property
+    def modified(self):
+        """Last modified time of the domain as a datetime object."""
+        domain_json = self.domain_json
+        if "lastModified" not in domain_json:
+            raise IOError("Unexpected response")
+        last_modified = domain_json["lastModified"]
+        return last_modified
+
+    @property
+    def created(self):
+        """Creation time of the domain"""
+        domain_json = self.domain_json
+        if "created" not in domain_json:
+            raise IOError("Unexpected response")
+        created = domain_json["created"]
+        return created
+
+    @property
+    def owner(self):
+        """username of creator of domain"""
+        domain_json = self.domain_json
+        username = None
+        if "owner" in domain_json:
+            # currently this is only available for HSDS
+            username = domain_json["owner"]
+        return username
+
+    @property
+    def logging(self):
+        """return name of logging handler"""
+        return self.log

From e07bd8115b7ae27c46d1fa8bda91991077de222f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 3 Jun 2025 13:21:05 +0200
Subject: [PATCH 046/129] fix flake8 error

---
 src/h5json/hsdsstore/hsds_reader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index 5740a29c..13f3b8ce 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -59,7 +59,7 @@ def __init__(
             self.log.debug(f"    username: {username}")
             kwargs["username"] = username
         if password:
-            self.log.debug(f"    password: {"*" * len(password)}")
+            self.log.debug(f"    password: {'*' * len(password)}")
             kwargs["password"] = password
         if bucket:
             self.log.debug(f"    bucket: {bucket}")
@@ -68,7 +68,7 @@ def __init__(
             self.log.debug(f"    mode: {mode}")
             kwargs["mode"] = mode
         if api_key:
-            self.log.debug(f"    apI_key: {"*" * len(api_key)}")
+            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
             kwargs["api_key"] = api_key
         if use_session:
             self.log.debug(f"    use_session: {use_session}")

From 69baad8acec5e1e411f879ad78bdc670b40bd0b0 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 3 Jun 2025 13:34:02 +0200
Subject: [PATCH 047/129] fix import paths

---
 .github/workflows/ci.yml | 40 ++++++++++++++++++++++++++++++++++++++++
 src/h5json/hdf5db.py     |  4 ++--
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4e1040ca..9108d474 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,11 +25,13 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+
       - name: Install dependencies
         shell: bash
         run: |
           python -m pip install --upgrade pip
           python -m pip install flake8 pytest
+
       - name: Lint with flake8
         shell: bash
         run: |
@@ -37,10 +39,48 @@ jobs:
           flake8 . --count  --select=E9,F63,F7,F82 --show-source --statistics
           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
           flake8 . --count --ignore=F401,W503,E203 --max-complexity=99 --max-line-length=127 --statistics
+
       - name: Install h5json
         shell: bash
         run: |
           pip install -e .
+
+      - name: Checkout HSDS 
+        uses: actions/checkout@v4
+        with:
+          repository: HDFGroup/hsds
+          path: ${{github.workspace}}/hsds
+
+      - name: Install HSDS
+        working-directory: ${{github.workspace}}/hsds
+        shell: bash
+        run: |
+          pip install -e .
+
+      - name: Start HSDS
+        shell: bash
+        working-directory: ${{github.workspace}}/hsds
+        run: |
+          mkdir hsds_root
+          mkdir hsds_root/hsds_bucket
+          cp admin/config/groups.default admin/config/groups.txt
+          cp admin/config/passwd.default admin/config/passwd.txt
+          hsds --root_dir hsds_root --host localhost --port 5101 --password_file admin/config/passwd.txt --logfile hs.log --loglevel DEBUG --config_dir=admin/config --count=4 &
+
+      - name: Wait for node startup
+        shell: bash
+        run: |
+          sleep 30
+      
+      - name: HSDS Setup
+        shell: bash
+        env:
+          ADMIN_PASSWORD: admin
+          ADMIN_USERNAME: admin
+        working-directory: ${{github.workspace}}/hsds
+        run: |
+          python tests/integ/setup_test.py
+
       - name: Run tests
         shell: bash
         run: |
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 6ee8aaa2..c00f91a1 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -18,8 +18,8 @@
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId
 from . import selections
 from .apiversion import _apiver
-from .reader.h5reader import H5Reader
-from .writer.h5writer import H5Writer
+from .h5reader import H5Reader
+from .h5writer import H5Writer
 
 
 class Hdf5db:

From 66b5b15fbac75ece472649cf7c104605e071ddf3 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 3 Jun 2025 16:44:32 +0200
Subject: [PATCH 048/129] use binary for dataset reads

---
 .github/workflows/ci.yml            |  3 ++
 src/h5json/hsdsstore/hsds_reader.py | 81 +++++++++++++++++++++--------
 src/h5json/hsdsstore/httpconn.py    | 41 ++++++++++-----
 test/unit/hsds_reader_test.py       | 25 ++++-----
 testall.py                          | 16 +++++-
 5 files changed, 115 insertions(+), 51 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9108d474..ba618d56 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -83,5 +83,8 @@ jobs:
 
       - name: Run tests
         shell: bash
+        HS_ENDPOINT: http://localhost:5101
+        HS_USERNAME: test_user1
+        HS_PASSWORD: test
         run: |
           python testall.py
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index 13f3b8ce..1c4eb28b 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -9,13 +9,12 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import json
 import logging
 
 from ..objid import getCollectionForId, getUuidFromId
 
 from ..hdf5dtype import createDataType
-from ..array_util import jsonToArray
+from ..array_util import jsonToArray, bytesToArray
 from .. import selections
 from ..h5reader import H5Reader
 from .httpconn import HttpConn
@@ -162,6 +161,10 @@ def __init__(
         self._root_id = "g-" + h5json["root"]
         """
 
+    @property
+    def http_conn(self):
+        return self._http_conn
+
     def close(self):
         pass
 
@@ -183,15 +186,17 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_
         if include_links:
             params["include_links"] = 1
 
-        rsp = self._http_conn.GET(req, params=params)
+        rsp = self.http_conn.GET(req, params=params)
 
         if rsp.status_code != 200:
             raise IOError(rsp.status_code, rsp.reason)
 
         obj_json = rsp.json()
-        if "hrefs" in obj_json:
-            # don't need these
-            del obj_json["hrefs"]
+        # remove any unneeded keys
+        redundant_keys = ("hrefs", "root", "domain", "bucket", "linkCount", "attributeCount")
+        for key in redundant_keys:
+            if key in obj_json:
+                del obj_json[key]
 
         self.log.debug(f"got json for id: {obj_id}: {obj_json}")
         return obj_json
@@ -208,7 +213,7 @@ def getAttribute(self, obj_id, name, includeData=True):
         params = {}
         params["IncludeData"] = 1 if includeData else 0
 
-        rsp = self._http_conn.GET(req, params=params)
+        rsp = self.http_conn.GET(req, params=params)
 
         if rsp.status_code in (404, 410):
             self.log.warning(f"attribute {name} not found")
@@ -254,28 +259,60 @@ def getDatasetValues(self, dset_id, sel=None, dtype=None):
             self.log.warning(msg)
             return ValueError(msg)
 
-        params = {}
         if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
-            pass  # just return the entire array
-        elif isinstance(sel, selections.SimpleSelection):
-            params["select"] = sel.getQueryParam()
+            query_param = None  # just return the entire array
+        elif isinstance(sel, (selections.SimpleSelection, selections.FancySelection)):
+            query_param = sel.getQueryParam()
         else:
-            raise NotImplementedError("selection type not supported")
+            raise NotImplementedError(f"selection type: {type(sel)} not supported")
+
+        mtype = dtype  # TBD - support read time dtype
+        mshape = sel.mshape
 
         req = f"/{collection}/{dset_id}/value"
-        rsp = self._http_conn.GET(req, params=params)
+        params = {}
+
+        if query_param:
+            params["select"] = query_param
+
+        if mtype.names != dtype.names:
+            params["fields"] = ":".join(mtype.names)
+
+        MAX_SELECT_QUERY_LEN = 100
+        if len(query_param) > MAX_SELECT_QUERY_LEN:
+            # use a post method to avoid possible long query strings
+            try:
+                rsp = self.http_conn.POST(req, body=params, format="binary")
+            except IOError as ioe:
+                self.log.info(f"got IOError: {ioe.errno}")
+                raise IOError(f"Error retrieving data: {ioe.errno}")
+        else:
+            # make a http GET
+            try:
+                rsp = self.http_conn.GET(req, params=params, format="binary")
+            except IOError as ioe:
+                self.log.info(f"got IOError: {ioe.errno}")
+                raise IOError(ioe.errno, "Error retrieving data")
+
         if rsp.status_code != 200:
-            self.log.error(f"GET {req} failed with status_code: {rsp.status_code}")
-            raise IOError(rsp.status_code, rsp.reason)
+            self.log.info(f"got http error: {rsp.status_code}")
+            raise IOError(rsp.status_code, "Error retrieving data")
 
-        rsp_json = rsp.json()
-        if "value" not in rsp_json:
-            self.log.warning(f"value key not found for {dset_id}")
-            return None
+        if rsp.is_binary:
+            # got binary response
+            self.log.info(f"binary response, {len(rsp.text)} bytes")
+            arr = bytesToArray(rsp.text, mtype, mshape)
+        else:
+            # got JSON response
+            # need some special conversion for compound types --
+            # each element must be a tuple, but the JSON decoder
+            # gives us a list instead.
+            self.log.info("json response")
 
-        self.log.debug(f"got rsp: {rsp_json}")
-        json_value = rsp_json["value"]
+            data = rsp.json()["value"]
+            # self.log.debug(data)
 
-        arr = jsonToArray(sel.mshape, dtype, json_value)
+            arr = jsonToArray(mshape, mtype, data)
+            self.log.debug(f"jsonToArray returned: {arr}")
 
         return arr
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
index 7a686dff..14b3d54d 100644
--- a/src/h5json/hsdsstore/httpconn.py
+++ b/src/h5json/hsdsstore/httpconn.py
@@ -14,9 +14,9 @@
 
 import os
 import sys
-import multiprocessing
-
+import time
 import base64
+
 import requests
 import requests_unixsocket
 from requests import ConnectionError
@@ -289,7 +289,7 @@ def __init__(
 
         if self._timeout != DEFAULT_TIMEOUT:
             self.log.info(f"HttpConn.init - timeout = {self._timeout}")
-        if endpoint is None:
+        if not endpoint:
             if "HS_ENDPOINT" in os.environ:
                 endpoint = os.environ["HS_ENDPOINT"]
 
@@ -299,21 +299,21 @@ def __init__(
 
         self._endpoint = endpoint
 
-        if username is None:
+        if not username:
             if "HS_USERNAME" in os.environ:
                 username = os.environ["HS_USERNAME"]
         if isinstance(username, str) and (not username or username.upper() == "NONE"):
             username = None
         self._username = username
 
-        if password is None:
+        if not password:
             if "HS_PASSWORD" in os.environ:
                 password = os.environ["HS_PASSWORD"]
         if isinstance(password, str) and (not password or password.upper() == "NONE"):
             password = None
         self._password = password
 
-        if bucket is None:
+        if not bucket:
             if "HS_BUCKET" in os.environ:
                 bucket = os.environ["HS_BUCKET"]
             if isinstance(bucket, str) and (not bucket or bucket.upper() == "NONE"):
@@ -479,7 +479,7 @@ def GET(self, req, format="json", params=None, headers=None):
         try:
             s = self.session
             stream = True  # tbd  - config for no streaming?
-
+            ts = time.time()
             rsp = s.get(
                 self._endpoint + req,
                 params=params,
@@ -488,7 +488,8 @@ def GET(self, req, format="json", params=None, headers=None):
                 timeout=self._timeout,
                 verify=self.verifyCert(),
             )
-            self.log.info(f"status: {rsp.status_code}")
+            elapsed = time.time() - ts
+            self.log.info(f"status: GET {rsp.status_code}, elapsed: {elapsed:.4f}")
         except ConnectionError as ce:
             self.log.error(f"connection error: {ce}")
             raise IOError("Connection Error")
@@ -496,6 +497,9 @@ def GET(self, req, format="json", params=None, headers=None):
             self.log.error(f"got {type(e)} exception: {e}")
             raise IOError("Unexpected exception")
 
+        if rsp.status_code != 200:
+            self.log.warning(f"GET {req} returned status: {rsp.status_code}")
+
         return HttpResponse(rsp)
 
     def PUT(self, req, body=None, format="json", params=None, headers=None):
@@ -536,6 +540,7 @@ def PUT(self, req, body=None, format="json", params=None, headers=None):
 
         try:
             s = self.session
+            ts = time.time()
             rsp = s.put(
                 self._endpoint + req,
                 data=data,
@@ -543,7 +548,8 @@ def PUT(self, req, body=None, format="json", params=None, headers=None):
                 params=params,
                 verify=self.verifyCert(),
             )
-            self.log.info(f"status: {rsp.status_code}")
+            elapsed = time.time() - ts
+            self.log.info(f"status: PUT {rsp.status_code}, elapsed: {elapsed:.4f}")
         except ConnectionError as ce:
             self.log.error(f"connection error: {ce}")
             raise IOError("Connection Error")
@@ -551,7 +557,10 @@ def PUT(self, req, body=None, format="json", params=None, headers=None):
         if rsp.status_code == 201 and req == "/":
             self.log.info("clearing domain_json cache")
             self._domain_json = None
+        if rsp.status_code not in (200, 201):
+            self.log.warning(f"got status code: {rsp.status_code} for PUT {req}")
         self.log.info(f"PUT returning: {rsp}")
+
         return HttpResponse(rsp)
 
     def POST(self, req, body=None, format="json", params=None, headers=None):
@@ -593,13 +602,14 @@ def POST(self, req, body=None, format="json", params=None, headers=None):
                 self.log.error(msg)
                 raise IOError("JSON encoding error")
         if format == "binary":
-            # recieve data as binary
+            # receive data as binary
             headers["accept"] = "application/octet-stream"
 
         self.log.info("POST: " + req)
 
         try:
             s = self.session
+            ts = time.time()
             rsp = s.post(
                 self._endpoint + req,
                 data=data,
@@ -607,12 +617,14 @@ def POST(self, req, body=None, format="json", params=None, headers=None):
                 params=params,
                 verify=self.verifyCert(),
             )
+            elapsed = time.time() - ts
+            self.log.info(f"status: POST {rsp.status_code}, elapsed: {elapsed:.4f}")
         except ConnectionError as ce:
             self.log.warning(f"connection error: {ce}")
             raise IOError(str(ce))
 
         if rsp.status_code not in (200, 201):
-            self.log.error(f"POST error: {rsp.status_code}")
+            self.log.error(f"got status_code: {rsp.status_code} for DELETE: {req}")
 
         return HttpResponse(rsp)
 
@@ -636,12 +648,12 @@ def DELETE(self, req, params=None, headers=None):
             raise IOError("Unable perform request (No write intent on file)")
 
         # try to do a DELETE of the resource
-
         headers = self.getHeaders(headers=headers)
 
         self.log.info("DEL: " + req)
         try:
             s = self.session
+            ts = time.time()
             rsp = s.delete(
                 self._endpoint + req,
                 headers=headers,
@@ -649,6 +661,8 @@ def DELETE(self, req, params=None, headers=None):
                 verify=self.verifyCert(),
             )
             self.log.info(f"status: {rsp.status_code}")
+            elapsed = time.time() - ts
+            self.log.info(f"status: DELETE {rsp.status_code}, elapsed: {elapsed:.4f}")
         except ConnectionError as ce:
             self.log.error(f"connection error: {ce}")
             raise IOError("Connection Error")
@@ -657,6 +671,9 @@ def DELETE(self, req, params=None, headers=None):
             self.log.info("clearing domain_json cache")
             self._domain_json = None
 
+        if rsp.status_code != 200:
+            self.log.warning(f"got status_code: {rsp.status_code} for DELETE {req}")
+
         return HttpResponse(rsp)
 
     @property
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
index cbc7f8bb..72cf6017 100644
--- a/test/unit/hsds_reader_test.py
+++ b/test/unit/hsds_reader_test.py
@@ -17,18 +17,6 @@
 from h5json import selections
 
 
-def get_endpoint():
-    return "http://hsds.hdf.test:5101"
-
-
-def get_username():
-    return "test_user1"
-
-
-def get_password():
-    return "test"
-
-
 class HSDSReaderTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(HSDSReaderTest, self).__init__(*args, **kwargs)
@@ -52,9 +40,6 @@ def testSimple(self):
         filepath = "/home/test_user1/test/tall.h5"
         kwargs = {"app_logger": self.log}
         with Hdf5db(**kwargs) as db:
-            kwargs["username"] = get_username()
-            kwargs["password"] = get_password()
-            kwargs["endpoint"] = get_endpoint()
             hsds_reader = HSDSReader(filepath, **kwargs)
             db.reader = hsds_reader
             root_id = db.getObjectIdByPath("/")
@@ -81,6 +66,16 @@ def testSimple(self):
             dset_shape = dset_json["shape"]
             self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
             self.assertEqual(dset_shape["dims"], [10, 10])
+
+            # got the 5th row of the dataset
+            sel_row = selections.select((10, 10), (5, slice(0, 10)))
+            row = db.getDatasetValues(dset111_id, sel_row)
+            self.assertTrue(isinstance(row, np.ndarray))
+            self.assertEqual(row.shape, (10,))
+            for i in range(10):
+                v = row[i]
+                self.assertEqual(v, i * 5)
+
             sel_all = selections.select((10, 10), ...)
             arr = db.getDatasetValues(dset111_id, sel_all)
             self.assertTrue(isinstance(arr, np.ndarray))
diff --git a/testall.py b/testall.py
index 5ca1934c..45e06106 100755
--- a/testall.py
+++ b/testall.py
@@ -15,7 +15,7 @@
 import shutil
 import h5py
 
-unit_tests = (
+unit_tests = [
     "array_util_test",
     "objid_test",
     "hdf5dtype_test",
@@ -24,7 +24,19 @@
     "h5json_writer_test",
     "h5py_reader_test",
     "h5py_writer_test",
-)
+]
+
+use_hsds = True
+for key in ("HS_ENDPOINT", "HS_USERNAME", "HS_PASSWORD"):
+    if key not in os.environ:
+        use_hsds = False
+        print(f"not including HSDS tests, no {key} environment set")
+        break
+
+if use_hsds:
+    unit_tests.append("hsds_reader_test")
+unit_tests = tuple(unit_tests)
+
 integ_tests = ("h5tojson_test", "jsontoh5_test")
 
 # verify the hdf5 lib version is recent

From 0090d56a0f3e126b57372a4d5eee583071eac5cc Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 10 Jun 2025 17:00:41 +0200
Subject: [PATCH 049/129] add hsds_writer class

---
 src/h5json/h5pystore/h5py_reader.py   |  29 ++-
 src/h5json/h5pystore/h5py_writer.py   |  82 +++++---
 src/h5json/h5reader.py                |  30 +++
 src/h5json/h5writer.py                |  30 ++-
 src/h5json/hdf5db.py                  |  86 +++++++--
 src/h5json/hsdsstore/hsds_reader.py   |  43 ++---
 src/h5json/hsdsstore/hsds_writer.py   | 264 ++++++++++++++++++++++++++
 src/h5json/jsonstore/h5json_reader.py |  17 +-
 src/h5json/jsonstore/h5json_writer.py |  24 ++-
 test/unit/h5py_reader_test.py         |   1 +
 test/unit/h5py_writer_test.py         |  62 +++---
 11 files changed, 564 insertions(+), 104 deletions(-)
 create mode 100644 src/h5json/hsdsstore/hsds_writer.py

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index dab44078..034566c6 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -148,20 +148,45 @@ def __init__(
             self.log = app_logger
         else:
             self.log = logging.getLogger()
+        if not h5py.is_hdf5(filepath):
+            self.log.warn(f"File: {filepath} is not an HDF5 file")
+            raise IOError("not an HDF5 file")
         super().__init__(filepath, app_logger=app_logger)
-        f = h5py.File(self._filepath)
+        self._f = None
+        self._root_id = None
+        
+
+    def open(self):
+        if self._f:
+            return  # already open
+        if self._id_map:
+            return  # objects already loaded
+        if not self._root_id:
+            # get the root id from db if available
+            if self.db.root_id:
+                self.log.info("H5pyReader: got root_id from db")
+                self._root_id = self.db.root_id
+            else:
+                self.log.info("H5pyReader: creating root id")
+                self._root_id = createObjId(obj_type="groups")
+        
+        f = h5py.File(self.filepath)
         self._f = f
-        self._root_id = createObjId(obj_type="groups")
         self._id_map[self._root_id] = f
         addr = h5py.h5o.get_info(f.id).addr
         self._addr_map[addr] = self._root_id
         f.visititems(self.visit)
 
+        return self._root_id
+
     def close(self):
         if self._f:
             self._f.close()
             self._f = None
 
+    def isClosed(self):
+        return False if self._f else True
+
     def get_root_id(self):
         """ Return root id """
         return self._root_id
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index f2487826..16c69681 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -13,7 +13,7 @@
 import numpy as np
 import time
 
-from ..objid import getCollectionForId, isValidUuid, getUuidFromId, isObjId
+from ..objid import getCollectionForId, isValidUuid, createObjId
 from ..hdf5dtype import createDataType
 from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
 from ..array_util import jsonToArray
@@ -41,6 +41,7 @@ def __init__(
         else:
             self._init = True
         self._flush_time = 0.0
+        self._f = None  # h5py file handle
 
     def _copy_element(self, val, src_dt, tgt_dt, fout=None):
         """ convert the given dataset or attribute element to h5py equivalent """
@@ -390,40 +391,69 @@ def updateAttributes(self, obj_id, obj):
     def flush(self):
         """ Write dirty items """
 
-        if not self.db:
+        if self.closed:
             # no db set yet
             return False
+        if not self._f:
+            raise IOError("open not called")
+        
         self.log.info("h5py_writer.flush()")
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
-        mode = 'w' if self._init else 'a'
-        with h5py.File(self._filepath, mode=mode) as f:
-            if self.db.new_objects or self._init:
-                root_json = self.db.getObjectById(root_id)
-
-                if "links" in root_json:
-                    root_links = root_json["links"]
-                    self._createObjects(f, root_links, visited=set((root_id,)))
-            # update attributes, dataset values
-            for obj_id in self._id_map:
-                if self.db.is_dirty(obj_id) or self._init:
-                    h5path = self._id_map[obj_id]
-                    obj = f[h5path]
-                    self.updateAttributes(obj_id, obj)
-                    collection = getCollectionForId(obj_id)
-                    if collection == "datasets":
-                        if self._init:
-                            self.initializeDatasetValues(obj_id, obj)
-                        else:
-                            self.updateDatasetValues(obj_id, obj)
-            # mark time write is complete
-            # updates before this time will not need to be written
-            # TBD: possible race condition with multithreading
-            self._flush_time = time.time()
+        
+        if self.db.new_objects or self._init:
+            root_json = self.db.getObjectById(root_id)
+
+            if "links" in root_json:
+                root_links = root_json["links"]
+                self._createObjects(self._f, root_links, visited=set((root_id,)))
+        # update attributes, dataset values
+        for obj_id in self._id_map:
+            if self.db.is_dirty(obj_id) or self._init:
+                h5path = self._id_map[obj_id]
+                obj = self._f[h5path]
+                self.updateAttributes(obj_id, obj)
+                collection = getCollectionForId(obj_id)
+                if collection == "datasets":
+                    if self._init:
+                        self.initializeDatasetValues(obj_id, obj)
+                    else:
+                        self.updateDatasetValues(obj_id, obj)
+        # mark time write is complete
+        # updates before this time will not need to be written
+        # TBD: possible race condition with multithreading
+        self._flush_time = time.time()
 
         self._init = False  # done with init after first flush
         return True  # all objects written successfully
+    
+    def open(self):
+        """ open HDF5 file """
+        self.log.debug("h5pyWriter open")
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+        mode = 'w' if self._init else 'a'
+        self.log.info(f"creating h5py file: {self._filepath} mode: {mode}")
+        self._f = h5py.File(self._filepath, mode=mode) 
+        if self.db.root_id:
+            self._root_id = self.db.root_id
+        else:
+            self._root_id = createObjId(obj_type="groups")
+        return self._root_id
+
 
     def close(self):
         """ close storage handle """
+        self.log.debug("h5py_writer.close()")
+        if not self._f:
+            # no open on file
+            return
         self.flush()
+        self._f.close()
+        self._f = None
+
+    def isClosed(self):
+        """ return closed status """
+        return False if self._f else True
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index 541bb262..fbc53491 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -10,6 +10,7 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 from abc import ABC, abstractmethod
+import weakref
 
 import logging
 
@@ -31,6 +32,25 @@ def __init__(
         else:
             self.log = logging.getLogger()
 
+    def set_db(self, db):
+        self._db_ref = weakref.ref(db)
+
+    @property
+    def db(self):
+        if not self._db_ref:
+            raise ValueError("db not available")
+        return self._db_ref()
+    
+    @property
+    def filepath(self):
+        """ return filepath """
+        return self._filepath
+    
+    @property
+    def closed(self):
+        """ return True if the reader handle is closed (or never opened) """
+        return self.isClosed()
+
     @abstractmethod
     def get_root_id(self):
         """ Return root id """
@@ -58,7 +78,17 @@ def getDatasetValues(self, obj_id, sel=None, dtype=None):
         """
         pass
 
+    @abstractmethod
+    def open(self):
+        """ Open data source for reading """
+        pass
+
     @abstractmethod
     def close(self):
         """ close any open handles to the storage """
         pass
+
+    @abstractmethod
+    def isClosed(self):
+        """ return True if handle is closed """
+        pass
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index aaab2e51..bc52523d 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -39,12 +39,35 @@ def __init__(
 
     def set_db(self, db):
         self._db_ref = weakref.ref(db)
+        self.log.debug("writer set db ref")
+
+    @property
+    def filepath(self):
+        return self._filepath
+    
+    @property
+    def closed(self):
+        return self.isClosed()
 
     @property
     def db(self):
         if not self._db_ref:
-            raise ValueError("db not available")
+            self.log.debug("db not available")
+            return None
         return self._db_ref()
+    
+    @property
+    def append(self):
+        return self._append
+    
+    #property
+    def no_data(self):
+        return self._no_data
+    
+    @abstractmethod
+    def open(self):
+        """ open storage handle, return root_id"""
+        return None
 
     @abstractmethod
     def flush(self):
@@ -55,3 +78,8 @@ def flush(self):
     def close(self):
         """ close storage handle """
         pass
+
+    @abstractmethod
+    def isClosed(self):
+        """ return True if handle is closed """
+        pass
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index c00f91a1..b28ebbc6 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -48,26 +48,32 @@ def __init__(
 
         self._db = {}
 
-        self._reader = h5_reader
-        self._writer = h5_writer
+        self._new_objects = set()  # set of for newly created objects
+        self._dirty_objects = set()  # set of modified objects
+        self._deleted_objects = set() # set of deleted objects
 
-        self._new_objects = set()  # set of obj_id's
-        self._dirty_objects = set()  # set of obj_id's
+        self._root_id = None
 
-        if self._reader:
-            root_id = self._reader.get_root_id()
-            group_json = self._reader.getObjectById(root_id)
+        if h5_reader:
+            self._reader = h5_reader
+            self._reader.set_db(self)
         else:
-            root_id = createObjId(obj_type="groups")
-            # create a root group
-            group_json = {"links": {}, "attributes": {}, "cpl": {}}
-            group_json["created"] = time.time()
+            self._reader = None
 
-        if self._writer:
+        if h5_writer:
+            self._writer = h5_writer
             self._writer.set_db(self)
+        else:
+            self._writer = None
+            
+        #root_id = createObjId(obj_type="groups")
+        # create a root group
+        #group_json = {"links": {}, "attributes": {}, "cpl": {}}
+        #group_json["created"] = time.time()
 
-        self._db[root_id] = group_json
-        self._root_id = root_id
+         
+        #self._db[root_id] = group_json
+        # self._root_id = root_id
 
     @property
     def db(self):
@@ -86,6 +92,9 @@ def reader(self, value: H5Reader):
             self.flush()
         if self._reader:
             self._reader.close()
+        self._reader = value
+        self._reader.set_db(self)
+        """
         root_id = value.get_root_id()
         if not root_id:
             raise ValueError(f"reader {type(value)} unable to return root_id")
@@ -95,6 +104,7 @@ def reader(self, value: H5Reader):
         self._reader = value
         self._db[root_id] = group_json
         self._root_id = root_id
+        """
 
     @property
     def writer(self):
@@ -108,6 +118,7 @@ def writer(self, value: H5Writer):
             self._writer.close()
         self._writer = value
         if self._writer:
+            self.log.debug("writer set_db")
             self._writer.set_db(self)
 
     @property
@@ -132,6 +143,10 @@ def new_objects(self):
     @property
     def dirty_objects(self):
         return self._dirty_objects
+    
+    @property
+    def deleted_objects(self):
+        return self._deleted_objects
 
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
@@ -161,6 +176,31 @@ def flush(self):
         self._new_objects = set()
         self._dirty_objects = set()
 
+    def open(self):
+        """ open reader and writer if set """
+        if self.root_id:
+            self.log.warning("root id already set, multiple db.open calls")
+            return self.root_id
+        
+        if self.writer and self.writer.append:
+            # append mode for the writer, open writer and get the root id
+            self._root_id = self.writer.open()
+        elif self.reader:
+            self._root_id = self.reader.open()
+        else:
+            # no root id set by writer or reader, initialize now
+            self._root_id = createObjId(obj_type="groups")
+            if self.writer:
+                # open writer in create mode now that we have a root id
+                self.writer.open()
+            
+            # create a root group just as a memory object
+            group_json = {"links": {}, "attributes": {}, "cpl": {}}
+            group_json["created"] = time.time()
+            self._db[self._root_id] = group_json
+
+        return self._root_id
+
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
@@ -172,6 +212,10 @@ def close(self):
         self._root_id = None
         self._db = {}
 
+    @property
+    def closed(self):
+        return False if self.root_id else True
+
     def __enter__(self):
         """ called on package init """
         self.log.info("Hdf5db __enter")
@@ -180,6 +224,7 @@ def __enter__(self):
     def __exit__(self, type, value, traceback):
         """ called on package exit """
         self.log.info("Hdf5db __exit")
+        print("__exit__")
         self.close()
 
     def getObjectById(self, obj_id):
@@ -190,6 +235,7 @@ def getObjectById(self, obj_id):
                 obj_json = self.reader.getObjectById(obj_id)
                 self.db[obj_id] = obj_json
             else:
+                print("keyerror - self.db:", self.db)
                 raise KeyError(f"obj_id: {obj_id} not found")
         obj_json = self.db[obj_id]
 
@@ -199,6 +245,9 @@ def getObjectIdByPath(self, h5path, parent_id=None):
         """ Return id for the given link path starting from parent_id if set,
         otherwise the root_id """
 
+        if self.closed:
+            self.open()  # initiate db
+
         if h5path == "/":
             return self.root_id  # just return root id
 
@@ -551,6 +600,8 @@ def deleteObject(self, obj_id):
         if obj_id in self._dirty_objects:
             self._dirty_objects.remove(obj_id)
 
+        self._deleted_objects.add(obj_id)
+
     def getLinks(self, grp_id):
         """ Get the links for the given group """
         grp_json = self.getObjectById(grp_id)
@@ -621,7 +672,8 @@ def deleteLink(self, grp_id, name):
 
     def createGroup(self, cpl=None):
         """ Create a new group """
-
+        if self.closed:
+            raise ValueError("db is closed")
         grp_id = createObjId("groups", root_id=self.root_id)
         group_json = {"attributes": {}, "links": {}}
         if cpl:
@@ -638,6 +690,8 @@ def createCommittedType(self, datatype, cpl=None):
         createCommittedType - creates new named datatype
         Returns item
         """
+        if self.closed:
+            raise ValueError("db is closed")
         self.log.info("createCommittedType")
         if cpl is None:
             cpl = {}
@@ -667,6 +721,8 @@ def createDataset(
         createDataset - creates new dataset given shape and datatype
         Returns obj_id
         """
+        if self.closed:
+            raise ValueError("db is closed")
         type_json = getTypeItem(dtype)
         if shape == "H5S_NULL":
             shape_json = {"class": "H5S_NULL"}
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index 1c4eb28b..b4de31d2 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -33,7 +33,6 @@ def __init__(
         username=None,
         password=None,
         bucket=None,
-        mode='r',
         api_key=None,
         use_session=True,
         expire_time=0,
@@ -63,9 +62,6 @@ def __init__(
         if bucket:
             self.log.debug(f"    bucket: {bucket}")
             kwargs["bucket"] = bucket
-        if mode:
-            self.log.debug(f"    mode: {mode}")
-            kwargs["mode"] = mode
         if api_key:
             self.log.debug(f"    apI_key: {'*' * len(api_key)}")
             kwargs["api_key"] = api_key
@@ -88,10 +84,17 @@ def __init__(
         if timeout:
             self.log.debug(f"    timeout: {timeout}")
             kwargs["timeout"] = timeout
+        # save these for when we create the connection
+        self._http_kwargs = kwargs
 
         super().__init__(domain_path, app_logger=app_logger)
 
-        http_conn = HttpConn(domain_path, **kwargs)
+    def open(self):
+        if self._http_conn:
+            return  # open already called
+        
+        kwargs = self._http_kwargs
+        http_conn = HttpConn(self.filepath, **kwargs)
 
         hsds_info = http_conn.serverInfo()
         self.log.debug(f"got hsds info: {hsds_info}")
@@ -122,22 +125,14 @@ def __init__(
             http_conn.close()
             raise IOError(404, "Location is a folder, not a file")
 
-        root_uuid = domain_json["root"]
-
-        if mode in ("w", "w-", "x", "a"):
-            http_conn._mode = "r+"
+        root_id = domain_json["root"]
+        self._root_id = root_id
 
         """
         if "domain_objs" in root_json:
             domain_objs = root_json["domain_objs"]
             objdb.load(domain_objs)
-        """
-
-        self._root_id = root_uuid
-        self._verboseInfo = None  # additional state we'll get when requested
-        self._verboseUpdated = None  # when the verbose data was fetched
-        self._lastScan = None  # when summary stats where last updated by server
-
+        """ 
         if "limits" in domain_json:
             self._limits = domain_json["limits"]
         else:
@@ -150,23 +145,19 @@ def __init__(
         self._http_conn = http_conn
         self._domain_json = domain_json
 
-        """
-        # parse the json file
-        h5json = json.loads(text)
-
-        self._h5json = h5json
+        return self._root_id
 
-        if "root" not in h5json:
-            raise Exception("no root key in input file")
-        self._root_id = "g-" + h5json["root"]
-        """
 
     @property
     def http_conn(self):
         return self._http_conn
 
     def close(self):
-        pass
+        if self._http_conn:
+            self._http_conn.close()
+
+    def isClosed(self):
+        return False is self._http_conn else True
 
     def get_root_id(self):
         """ Return root id """
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
new file mode 100644
index 00000000..8144e085
--- /dev/null
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -0,0 +1,264 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import logging
+
+from ..objid import getCollectionForId, getUuidFromId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray, bytesToArray
+from .. import selections
+from ..h5writer import H5Writer
+from .httpconn import HttpConn
+
+
+class HSDSWriter(H5Writer):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+    def __init__(
+        self,
+        domain_path,
+        append=False,
+        no_data=False,
+        app_logger=None,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        api_key=None,
+        use_session=True,
+        expire_time=0,
+        max_objects=0,
+        max_age=0,
+        retries=3,
+        timeout=30.0,
+        track_order=False,
+        owner=None,
+        linked_domain=None
+
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        if append:
+            self._init = False
+        else:
+            self._init = True
+
+        if no_data:
+            self._no_data = True
+        else:
+            self._no_data = False
+
+        self.log.debug("HSDSWriter init")
+
+        kwargs = {}
+        self.log.debug(f"    domain_path: {domain_path}")
+        self.log.debug(f"    append: {append}")
+        if endpoint:
+            self.log.debug(f"    endpoint: {endpoint}")
+            kwargs["endpoint"] = endpoint
+        if username:
+            self.log.debug(f"    username: {username}")
+            kwargs["username"] = username
+        if password:
+            self.log.debug(f"    password: {'*' * len(password)}")
+            kwargs["password"] = password
+        if bucket:
+            self.log.debug(f"    bucket: {bucket}")
+            kwargs["bucket"] = bucket
+        if api_key:
+            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
+            kwargs["api_key"] = api_key
+        if use_session:
+            self.log.debug(f"    use_session: {use_session}")
+            kwargs["user_session"] = use_session
+        if expire_time:
+            self.log.debug(f"    expire_time: {expire_time}")
+            kwargs["expire_time"] = expire_time
+        if max_objects:
+            self.log.debug(f"    max_objects: {max_objects}")
+            kwargs["max_objects"] = max_objects
+        if max_age:
+            self.log.debug(f"    max_age: {max_age}")
+            kwargs["max_age"] = max_age
+        if retries:
+            self.log.debug(f"    retries: {retries}")
+            kwargs["retries"] = retries
+        if timeout:
+            self.log.debug(f"    timeout: {timeout}")
+            kwargs["timeout"] = timeout
+        self._http_kwargs = kwargs  # save for when we create the connection
+
+        super().__init__(domain_path, app_logger=app_logger)
+
+        self._http_conn = None
+        self._root_id = None
+        self._append = append
+        self._owner = owner
+        self._track_order = track_order
+        self._linked_domain = linked_domain
+        self._domain_json = None
+
+    def open(self):
+        """ setup domain for writing """
+
+        if self._http_conn:
+            http_conn = self._http_conn
+        else:
+            kwargs = self._http_kwargs
+            http_conn = HttpConn(self.filepath, **kwargs)
+            if self._append:
+                http_conn._mode = "a"
+            self._http_conn = http_conn
+            hsds_info = http_conn.serverInfo()
+            self.log.debug(f"got hsds info: {hsds_info}")
+
+        if not self._domain_json:
+            # haven't fetched the domain json yet, do it now
+
+            # try to do a GET from the domain
+            req = "/"
+            params = {}
+            """
+            if max_objects is None or max_objects > 0:
+                # get object meta objects
+                # TBD: have hsds support a max limit of objects to return
+                params["getobjs"] = 1
+                params["include_attrs"] = 1
+                params["include_links"] = 1
+            """
+        
+            domain_json = None
+            rsp = http_conn.GET(req, params=params)
+
+            if rsp.status_code not in (200, 404, 410):
+                msg = f"Got status code: {rsp.status_code} on initial domain get"
+                self.log.warning(msg)
+                raise IOError(msg)
+
+            if rsp.status_code == 200:
+                if self._append:
+                    # domain exists already
+                    domain_json = rsp.json()
+                    if "root" not in domain_json:
+                        # this a folder not a domain
+                        self.log.warning(f"folder: {self.filepath} has no root property")
+                        http_conn.close()
+                        raise IOError(404, "Location is a folder, not a file")
+                else:
+                    # not append - delete existing domain
+                    self.log.info(f"sending delete request for {self.filepath}")
+                    delete_rsp = http_conn.DELETE(req, params=params)
+                    if delete_rsp.status_code not in (200, 410):
+                        # failed to delete
+                        http_conn.close()
+                        raise IOError(rsp.status_code, rsp.reason)
+                        
+            if not domain_json:
+                # domain doesn't exist, create it
+                body = {}
+                if self.db.root_id:
+                    # initialize domain using the db's root_id
+                    body["root_id"] = self.db.root_id
+                if self._owner:
+                    body["owner"] = self._owner
+                if self._linked_domain:
+                    body["linked_domain"] = linked_domain
+                if self._track_order:
+                    create_props = {"CreateOrder": 1}
+                    group_body = {"creationProperties": create_props}
+                    body["group"] = group_body
+                rsp = http_conn.PUT(req, params=params, body=body)
+                if rsp.status_code != 201:
+                    http_conn.close()
+                    raise IOError(rsp.status_code, rsp.reason)
+                domain_json = rsp.json()
+                self.log.info(f"got rsp on PUT domain: {domain_json}")
+                if "root" not in domain_json:
+                    http_conn.close()
+                    raise IOError(404, "Unexpected error")
+
+            self.log.debug(f"got domain_json: {domain_json}")
+
+            if "root" not in domain_json:
+                http_conn.close()
+                raise IOError(404, "Location is a folder, not a file")
+
+            root_id = domain_json["root"]
+
+            self._root_id = root_id
+       
+            if "limits" in domain_json:
+                self._limits = domain_json["limits"]
+            else:
+                self._limits = None
+            if "version" in domain_json:
+                self._version = domain_json["version"]
+            else:
+                self._version = None
+
+            self._domain_json = domain_json
+
+        return self._root_id
+
+
+    @property
+    def http_conn(self):
+        return self._http_conn
+    
+    def flush(self):
+        """ Write dirty items """
+
+        if not self.db:
+            # no db set yet
+            return False
+        self.log.info("hsds_writer.flush()")
+        self.log.debug(f"    new object count: {len(self.db.new_objects)}")
+        self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
+        self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
+
+        #root_id = self.db.root_id
+        if self._init:
+            # initialize all existing objects
+            self.log.debug("flush -- init is true")
+            for obj_id in self.db:
+                self.log.debug(f"init: {obj_id}")
+            self._init = False
+        elif self.db.new_objects:
+            for obj_id in self.db.new_objects:
+                self.log.debug(f"new obj id: {obj_id}")
+
+        for obj_id in self.db.dirty_objects:
+            self.log.debug(f"dirty object id: {obj_id}")
+
+        for obj_id in self.db.deleted_objects:
+            self.log.debug(f"deleted object: {obj_id}")
+        
+        return True  # all objects written successfully
+
+    def close(self):
+        # over-ride of H5Writer method
+        self.flush()
+        self.http_conn.close()
+        self._http_conn = None
+
+    def isClosed(self):
+        """ return closed status """
+        return False if self._http_conn else True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index 78df4567..2013332a 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -36,8 +36,14 @@ def __init__(
             self.log = logging.getLogger()
 
         super().__init__(filepath, app_logger=app_logger)
+        self._root_id = None
+        self._h5json = None
 
-        with open(filepath) as f:
+    def open(self):
+        if self._h5json:
+            return  # already read JSON file
+
+        with open(self.filepath) as f:
             text = f.read()
 
         # parse the json file
@@ -47,11 +53,20 @@ def __init__(
 
         if "root" not in h5json:
             raise Exception("no root key in input file")
+        
         self._root_id = "g-" + h5json["root"]
+        if self.db.root_id and self.db.root_id != self._root_id:
+            self.log.warning("h5json root id doesn't match db root id")
+            raise IOError("root id mismatch")
+
+        return self._root_id
 
     def close(self):
         pass
 
+    def isClosed(self):
+        return False if self._h5json else False
+
     def get_root_id(self):
         """ Return root id """
         return self._root_id
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index 4a94ad02..c8da27ec 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -13,7 +13,7 @@
 import json
 
 from ..h5writer import H5Writer
-from ..objid import getUuidFromId, getCollectionForId
+from ..objid import getUuidFromId, getCollectionForId, createObjId
 from ..array_util import bytesArrayToList
 from .. import selections
 
@@ -32,21 +32,41 @@ def __init__(
         app_logger=None
     ):
         super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
+        if append:
+            raise ValueError("H5JsonWriter does not support append mode")
         self.alias_db = {}
         self.json = {}
-        self._root_uuid = None
+        self._root_id = None
 
     def flush(self):
         """ Write dirty items """
         # json writer doesn't support incremental updates, so we'll wait
         # for close to write out database
+        if not self._root_id:
+            msg = "flush called prior to open"
+            self.log.warning(msg)
+            raise IOError(msg)
+        
         self.log.info("flush")
         return False
+    
+    def open(self):
+        """ file open """
+        # no incremental updates with h5json writer, so just fetch the root_id here
+        if self.db.root_id:
+            self._root_id = self.db.root_id
+        else:
+            self._root_id = createObjId(obj_type="groups")
+        return self._root_id
 
     def close(self):
         """ close storage handle """
         self.dumpFile()
 
+    def isClosed(self):
+        """ return closed status """
+        return False if self._root_id else True
+
     def getAliasList(self, obj_id):
         """ return list of alias """
         if obj_id not in self.alias_db:
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index 45de125e..7c11c4f5 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -40,6 +40,7 @@ def testSimple(self):
         kwargs = {"app_logger": self.log}
         with Hdf5db(h5_reader=H5pyReader(filepath, **kwargs), **kwargs) as db:
             root_id = db.getObjectIdByPath("/")
+            print("got root_id:", root_id)
             root_json = db.getObjectById(root_id)
 
             root_attrs = root_json["attributes"]
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index f70acb59..3b4a14ab 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -35,7 +35,7 @@ def __init__(self, *args, **kwargs):
         self.log.setLevel(logging.DEBUG)
         # create logger
 
-        handler = logging.FileHandler("./hdf5dbtest.log")
+        handler = logging.FileHandler("./h5pywriterbtest.log")
         # add handler to logger
         self.log.addHandler(handler)
 
@@ -71,28 +71,27 @@ def testSimple(self):
             db.createSoftLink(g2_id, "slink", "somewhere")
             db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
             db.createCustomLink(g2_id, "cust", {"foo": "bar"})
-            db.flush()
-            with h5py.File(filepath) as f:
-                self.assertTrue("attr1", f.attrs)
-                self.assertTrue("attr2", f.attrs)
-                self.assertTrue("g1" in f)
-                g1 = f["g1"]
-                self.assertTrue("a1" in g1.attrs)
-                self.assertTrue("g1.1" in g1)
-                g11 = g1["g1.1"]
-                self.assertTrue("dset1.1.1" in g11)
-                dset = g11["dset1.1.1"]
-                self.assertEqual(dset.shape, (10, 10))
-                for i in range(10):
-                    for j in range(10):
-                        self.assertEqual(dset[i, j], i * j)
-                self.assertTrue("g2" in f)
-                g2 = f["g2"]
-                self.assertTrue("extlink" in g2)
-                self.assertTrue("slink" in g2)
 
+        # open file with h5py and verify changes
+        with h5py.File(filepath) as f:
+            self.assertTrue("attr1", f.attrs)
+            self.assertTrue("attr2", f.attrs)
+            self.assertTrue("g1" in f)
+            g1 = f["g1"]
+            self.assertTrue("a1" in g1.attrs)
+            self.assertTrue("g1.1" in g1)
+            g11 = g1["g1.1"]
+            self.assertTrue("dset1.1.1" in g11)
+            dset = g11["dset1.1.1"]
+            self.assertEqual(dset.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    self.assertEqual(dset[i, j], i * j)
+            self.assertTrue("g2" in f)
+            g2 = f["g2"]
+            self.assertTrue("extlink" in g2)
+            self.assertTrue("slink" in g2)
             db.createAttribute(g1_id, "a2", "bye-bye")
-            db.flush()
 
             with h5py.File(filepath) as f:
                 g1 = f["g1"]
@@ -114,16 +113,16 @@ def testSimple(self):
             db.setDatasetValues(dset_111_id, sel, arr)
             db.flush()
 
-            with h5py.File(filepath) as f:
-                dset = f["/g1/g1.1/dset1.1.1"]
-                for i in range(10):
-                    for j in range(10):
-                        if i == 4 and j == 4:
-                            # this is the one element that was updated
-                            expected = 42
-                        else:
-                            expected = i * j
-                        self.assertEqual(dset[i, j], expected)
+        with h5py.File(filepath) as f:
+            dset = f["/g1/g1.1/dset1.1.1"]
+            for i in range(10):
+                for j in range(10):
+                    if i == 4 and j == 4:
+                        # this is the one element that was updated
+                        expected = 42
+                    else:
+                        expected = i * j
+                    self.assertEqual(dset[i, j], expected)
 
     def testNullSpaceAttribute(self):
 
@@ -487,6 +486,7 @@ def testReaderWithUpdate(self):
             with h5py.File(file_out) as f:
                 self.assertTrue("/g1/g1.1/dset1.1.1" in f)
                 dset111 = f["/g1/g1.1/dset1.1.1"]
+                print("dset111 attrs:", list(dset111.attrs.keys()))
                 self.assertEqual(len(dset111.attrs), 2)
 
             dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")

From 9d59f8c1e17aed2a5e7b3796190237390b99953d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 11 Jun 2025 16:17:57 +0200
Subject: [PATCH 050/129] fix db re-open logic

---
 src/h5json/h5pystore/h5py_writer.py | 11 ++--
 src/h5json/hdf5db.py                | 43 +++++++-------
 test/unit/h5py_writer_test.py       | 88 +++++++++++++++--------------
 3 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 16c69681..49acb4eb 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -384,20 +384,22 @@ def updateAttributes(self, obj_id, obj):
         for name in attrs:
             attr_json = attrs[name]
             if "created" in attr_json and attr_json["created"] < self._flush_time:
-                # ttribute should be saved already
+                # attribute should be saved already
                 continue
             self.createAttribute(obj, name, attr_json)
 
     def flush(self):
         """ Write dirty items """
-
         if self.closed:
             # no db set yet
+            self.log.warning("h5py_writer - flush called but no db")
             return False
         if not self._f:
+            self.log.warning("h5py_writer file not open")
             raise IOError("open not called")
         
         self.log.info("h5py_writer.flush()")
+        
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
         
@@ -434,9 +436,10 @@ def open(self):
             # no db set yet
             self.log.warning("no self.db db_ref")
             raise ValueError("no db")
-        mode = 'w' if self._init else 'a'
+        mode = 'a' if self._append else 'w'
         self.log.info(f"creating h5py file: {self._filepath} mode: {mode}")
-        self._f = h5py.File(self._filepath, mode=mode) 
+        self._f = h5py.File(self._filepath, mode=mode)
+        self._append = True  # switch to append mode for next file open 
         if self.db.root_id:
             self._root_id = self.db.root_id
         else:
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index b28ebbc6..340adc31 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -165,9 +165,9 @@ def make_dirty(self, obj_id):
 
     def flush(self):
         """ write out any changes """
+        self.log.debug("db.flush()")
         if not self.writer:
             return  # nothing to do
-
         if not self.writer.flush():
             # flush not successful, don't clear dirty set
             return
@@ -179,25 +179,28 @@ def flush(self):
     def open(self):
         """ open reader and writer if set """
         if self.root_id:
-            self.log.warning("root id already set, multiple db.open calls")
-            return self.root_id
-        
-        if self.writer and self.writer.append:
-            # append mode for the writer, open writer and get the root id
-            self._root_id = self.writer.open()
-        elif self.reader:
-            self._root_id = self.reader.open()
-        else:
-            # no root id set by writer or reader, initialize now
-            self._root_id = createObjId(obj_type="groups")
+            self.log.debug("root id already set, re-open call")
             if self.writer:
-                # open writer in create mode now that we have a root id
                 self.writer.open()
+            if self.reader:
+                self.reader.open()
+        else:
+            if self.writer and self.writer.append:
+                # append mode for the writer, open writer and get the root id
+                self._root_id = self.writer.open()
+            elif self.reader:
+                self._root_id = self.reader.open()
+            else:
+                # no root id set by writer or reader, initialize now
+                self._root_id = createObjId(obj_type="groups")
+                if self.writer:
+                    # open writer in create mode now that we have a root id
+                    self.writer.open()
             
-            # create a root group just as a memory object
-            group_json = {"links": {}, "attributes": {}, "cpl": {}}
-            group_json["created"] = time.time()
-            self._db[self._root_id] = group_json
+                # create a root group just as a memory object
+                group_json = {"links": {}, "attributes": {}, "cpl": {}}
+                group_json["created"] = time.time()
+                self._db[self._root_id] = group_json
 
         return self._root_id
 
@@ -209,8 +212,8 @@ def close(self):
             self.writer.close()
         if self.reader:
             self.reader.close()
-        self._root_id = None
-        self._db = {}
+        #self._root_id = None
+        #self._db = {}
 
     @property
     def closed(self):
@@ -224,7 +227,6 @@ def __enter__(self):
     def __exit__(self, type, value, traceback):
         """ called on package exit """
         self.log.info("Hdf5db __exit")
-        print("__exit__")
         self.close()
 
     def getObjectById(self, obj_id):
@@ -235,7 +237,6 @@ def getObjectById(self, obj_id):
                 obj_json = self.reader.getObjectById(obj_id)
                 self.db[obj_id] = obj_json
             else:
-                print("keyerror - self.db:", self.db)
                 raise KeyError(f"obj_id: {obj_id} not found")
         obj_json = self.db[obj_id]
 
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 3b4a14ab..042f01df 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -47,30 +47,31 @@ def __init__(self, *args, **kwargs):
     def testSimple(self):
 
         filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
-            db.createAttribute(root_id, "attr2", 42)
-            g1_id = db.createGroup()
-            db.createHardLink(root_id, "g1", g1_id)
-            db.createAttribute(g1_id, "a1", "hello")
-            g2_id = db.createGroup()
-            db.createHardLink(root_id, "g2", g2_id)
-
-            g1_1_id = db.createGroup()
-            db.createHardLink(g1_id, "g1.1", g1_1_id)
-            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-            arr = np.zeros((10, 10), dtype=np.int32)
-            for i in range(10):
-                for j in range(10):
-                    arr[i, j] = i * j
-            sel_all = selections.select((10, 10), ...)
-            db.setDatasetValues(dset_111_id, sel_all, arr)
-            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
-            db.createSoftLink(g2_id, "slink", "somewhere")
-            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
-            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+        db.createAttribute(root_id, "attr2", 42)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        db.createAttribute(g1_id, "a1", "hello")
+        g2_id = db.createGroup()
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db.close()
 
         # open file with h5py and verify changes
         with h5py.File(filepath) as f:
@@ -91,27 +92,32 @@ def testSimple(self):
             g2 = f["g2"]
             self.assertTrue("extlink" in g2)
             self.assertTrue("slink" in g2)
-            db.createAttribute(g1_id, "a2", "bye-bye")
 
-            with h5py.File(filepath) as f:
-                g1 = f["g1"]
-                self.assertEqual(len(g1.attrs), 2)
-                self.assertTrue("a1" in g1.attrs)
-                self.assertTrue("a2" in g1.attrs)
+        db.open()
+        db.createAttribute(g1_id, "a2", "bye-bye")
+        db.close()
 
-            g21 = db.createGroup()
-            db.createHardLink(g2_id, "g2.1", g21)
-            db.flush()
+        with h5py.File(filepath) as f:
+            g1 = f["g1"]
+            self.assertEqual(len(g1.attrs), 2)
+            self.assertTrue("a1" in g1.attrs)
+            self.assertTrue("a2" in g1.attrs)
 
-            with h5py.File(filepath) as f:
-                g2 = f["g2"]
-                self.assertTrue("g2.1" in g2)
+        db.open()
+        g21 = db.createGroup()
+        db.createHardLink(g2_id, "g2.1", g21)
+        db.close()
 
-            sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
-            arr = np.zeros((), dtype=np.int32)
-            arr[()] = 42
-            db.setDatasetValues(dset_111_id, sel, arr)
-            db.flush()
+        with h5py.File(filepath) as f:
+            g2 = f["g2"]
+            self.assertTrue("g2.1" in g2)
+
+        db.open()
+        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        db.setDatasetValues(dset_111_id, sel, arr)
+        db.close()
 
         with h5py.File(filepath) as f:
             dset = f["/g1/g1.1/dset1.1.1"]

From 4894e6d498f6e8b30b57af378609723459ae8750 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 16 Jun 2025 20:56:57 +0200
Subject: [PATCH 051/129] support for h5py_writer

---
 src/h5json/h5pystore/h5py_reader.py   |   3 +-
 src/h5json/h5pystore/h5py_writer.py   |  11 +-
 src/h5json/h5reader.py                |   4 +-
 src/h5json/h5writer.py                |  10 +-
 src/h5json/hdf5db.py                  |  34 +-
 src/h5json/jsonstore/h5json_reader.py |   2 +-
 src/h5json/jsonstore/h5json_writer.py |   4 +-
 test/unit/h5py_writer_test.py         | 594 +++++++++++++-------------
 8 files changed, 342 insertions(+), 320 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 034566c6..bc4b5820 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -154,7 +154,6 @@ def __init__(
         super().__init__(filepath, app_logger=app_logger)
         self._f = None
         self._root_id = None
-        
 
     def open(self):
         if self._f:
@@ -169,7 +168,7 @@ def open(self):
             else:
                 self.log.info("H5pyReader: creating root id")
                 self._root_id = createObjId(obj_type="groups")
-        
+
         f = h5py.File(self.filepath)
         self._f = f
         self._id_map[self._root_id] = f
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 49acb4eb..b4f81658 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -397,12 +397,12 @@ def flush(self):
         if not self._f:
             self.log.warning("h5py_writer file not open")
             raise IOError("open not called")
-        
+
         self.log.info("h5py_writer.flush()")
-        
+
         root_id = self.db.root_id
         self._id_map[root_id] = "/"
-        
+
         if self.db.new_objects or self._init:
             root_json = self.db.getObjectById(root_id)
 
@@ -428,7 +428,7 @@ def flush(self):
 
         self._init = False  # done with init after first flush
         return True  # all objects written successfully
-    
+
     def open(self):
         """ open HDF5 file """
         self.log.debug("h5pyWriter open")
@@ -439,14 +439,13 @@ def open(self):
         mode = 'a' if self._append else 'w'
         self.log.info(f"creating h5py file: {self._filepath} mode: {mode}")
         self._f = h5py.File(self._filepath, mode=mode)
-        self._append = True  # switch to append mode for next file open 
+        self._append = True  # switch to append mode for next file open
         if self.db.root_id:
             self._root_id = self.db.root_id
         else:
             self._root_id = createObjId(obj_type="groups")
         return self._root_id
 
-
     def close(self):
         """ close storage handle """
         self.log.debug("h5py_writer.close()")
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index fbc53491..3bf49ca7 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -40,12 +40,12 @@ def db(self):
         if not self._db_ref:
             raise ValueError("db not available")
         return self._db_ref()
-    
+
     @property
     def filepath(self):
         """ return filepath """
         return self._filepath
-    
+
     @property
     def closed(self):
         """ return True if the reader handle is closed (or never opened) """
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index bc52523d..3dfb8da8 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -44,7 +44,7 @@ def set_db(self, db):
     @property
     def filepath(self):
         return self._filepath
-    
+
     @property
     def closed(self):
         return self.isClosed()
@@ -55,15 +55,15 @@ def db(self):
             self.log.debug("db not available")
             return None
         return self._db_ref()
-    
+
     @property
     def append(self):
         return self._append
-    
-    #property
+
+    @property
     def no_data(self):
         return self._no_data
-    
+
     @abstractmethod
     def open(self):
         """ open storage handle, return root_id"""
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 340adc31..581399f6 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -50,7 +50,7 @@ def __init__(
 
         self._new_objects = set()  # set of for newly created objects
         self._dirty_objects = set()  # set of modified objects
-        self._deleted_objects = set() # set of deleted objects
+        self._deleted_objects = set()  # set of deleted objects
 
         self._root_id = None
 
@@ -65,15 +65,6 @@ def __init__(
             self._writer.set_db(self)
         else:
             self._writer = None
-            
-        #root_id = createObjId(obj_type="groups")
-        # create a root group
-        #group_json = {"links": {}, "attributes": {}, "cpl": {}}
-        #group_json["created"] = time.time()
-
-         
-        #self._db[root_id] = group_json
-        # self._root_id = root_id
 
     @property
     def db(self):
@@ -143,7 +134,7 @@ def new_objects(self):
     @property
     def dirty_objects(self):
         return self._dirty_objects
-    
+
     @property
     def deleted_objects(self):
         return self._deleted_objects
@@ -178,6 +169,7 @@ def flush(self):
 
     def open(self):
         """ open reader and writer if set """
+        self.log.debug("db.open()")
         if self.root_id:
             self.log.debug("root id already set, re-open call")
             if self.writer:
@@ -185,23 +177,39 @@ def open(self):
             if self.reader:
                 self.reader.open()
         else:
+            self.log.debug("db.open, getting root_id")
+
             if self.writer and self.writer.append:
                 # append mode for the writer, open writer and get the root id
+                self.log.debug("db.open, write append, getting root_id from writer")
                 self._root_id = self.writer.open()
+                if self.reader:
+                    reader_root_id = self.reader.open()
+                    if reader_root_id != self._root_id:
+                        # TBD: need someway to reconcile if both reader and writer have
+                        # an potentiated idea on what there root id is
+                        self.log.warn("reader root_id does not match writer root_id")
             elif self.reader:
+                self.log.debug("db.open, getting root_id from reader")
                 self._root_id = self.reader.open()
+                if self.writer:
+                    writer_root_id = self.writer.open()
+                    if writer_root_id != self._root_id:
+                        # TBD: same as above, need to deal with inconsistent root ids
+                        self.log.warning("writer root_id does not match reader root_id")
             else:
                 # no root id set by writer or reader, initialize now
                 self._root_id = createObjId(obj_type="groups")
                 if self.writer:
                     # open writer in create mode now that we have a root id
                     self.writer.open()
-            
+
                 # create a root group just as a memory object
                 group_json = {"links": {}, "attributes": {}, "cpl": {}}
                 group_json["created"] = time.time()
                 self._db[self._root_id] = group_json
 
+        self.log.debug(f"db.open() - returning root_id: {self._root_id}")
         return self._root_id
 
     def close(self):
@@ -212,8 +220,6 @@ def close(self):
             self.writer.close()
         if self.reader:
             self.reader.close()
-        #self._root_id = None
-        #self._db = {}
 
     @property
     def closed(self):
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index 2013332a..40f8e5e4 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -53,7 +53,7 @@ def open(self):
 
         if "root" not in h5json:
             raise Exception("no root key in input file")
-        
+
         self._root_id = "g-" + h5json["root"]
         if self.db.root_id and self.db.root_id != self._root_id:
             self.log.warning("h5json root id doesn't match db root id")
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index c8da27ec..709f34fd 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -46,10 +46,10 @@ def flush(self):
             msg = "flush called prior to open"
             self.log.warning(msg)
             raise IOError(msg)
-        
+
         self.log.info("flush")
         return False
-    
+
     def open(self):
         """ file open """
         # no incremental updates with h5json writer, so just fetch the root_id here
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 042f01df..b0889b3d 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -12,6 +12,8 @@
 import unittest
 import time
 import logging
+import os
+
 import h5py
 import numpy as np
 from h5json import Hdf5db
@@ -47,9 +49,13 @@ def __init__(self, *args, **kwargs):
     def testSimple(self):
 
         filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+
         db = Hdf5db(app_logger=self.log)
         db.writer = H5pyWriter(filepath, no_data=False)
         root_id = db.open()
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
         db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
         db.createAttribute(root_id, "attr2", 42)
         g1_id = db.createGroup()
@@ -133,19 +139,21 @@ def testSimple(self):
     def testNullSpaceAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testNullSpaceAttribute.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in item)
-            shape_item = item["shape"]
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_NULL")
-            self.assertTrue(item["created"] > time.time() - 1.0)
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(value, None)
-            db.flush()
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in item)
+        shape_item = item["shape"]
+        self.assertTrue("class" in shape_item)
+        self.assertEqual(shape_item["class"], "H5S_NULL")
+        self.assertTrue(item["created"] > time.time() - 1.0)
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(value, None)
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -154,28 +162,30 @@ def testNullSpaceAttribute(self):
     def testScalarAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testNullScalarAttribute.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            dims = ()
-            value = 42
-            print("test create attribute A1")
-            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
-            self.assertEqual(item["value"], 42)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dims = ()
+        value = 42
+        db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+        self.assertEqual(item["value"], 42)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        shape = item["shape"]
+        self.assertEqual(shape["class"], "H5S_SCALAR")
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -186,22 +196,25 @@ def testScalarAttribute(self):
     def testFixedStringAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testFixedStringAttribute.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            value = "Hello, world!"
-            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["length"], 13)
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        value = "Hello, world!"
+        db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["length"], 13)
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -212,28 +225,29 @@ def testFixedStringAttribute(self):
     def testVlenAsciiAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testVlenAsciiAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
         value = b"Hello, world!"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-
-            dt = special_dtype(vlen=bytes)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt = special_dtype(vlen=bytes)
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -244,28 +258,29 @@ def testVlenAsciiAttribute(self):
     def testVlenUtf8Attribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testVlenUtf8Attribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
         value = "one: \u4e00"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-
-            dt = special_dtype(vlen=str)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item["value"], value)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt = special_dtype(vlen=str)
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+        self.assertEqual(item["value"], value)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -276,22 +291,25 @@ def testVlenUtf8Attribute(self):
     def testIntAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testIntAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
         value = [2, 3, 5, 7, 11]
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "A1", value, dtype=np.int16)
-            item = db.getAttribute(root_id, "A1")
-            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(item_shape["dims"], [5,])
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", value, dtype=np.int16)
+        item = db.getAttribute(root_id, "A1")
+        self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(item_shape["dims"], [5,])
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -304,27 +322,26 @@ def testIntAttribute(self):
     def testCreateReferenceAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testCreateReferenceAttribute.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
-
-            dt = special_dtype(ref=Reference)
-
-            ds1_ref = "datasets/" + dset_id
-            value = [ds1_ref,]
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            attr = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in attr)
-
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
-            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
-            attr_value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(len(attr_value), 1)
-            self.assertEqual(attr_value[0], ds1_ref.encode('ascii'))
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        dt = special_dtype(ref=Reference)
+        ds1_ref = "datasets/" + dset_id
+        value = [ds1_ref,]
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        attr = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in attr)
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+        self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+        attr_value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(len(attr_value), 1)
+        self.assertEqual(attr_value[0], ds1_ref.encode('ascii'))
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("A1" in f.attrs)
@@ -336,37 +353,35 @@ def testCreateReferenceAttribute(self):
     def testCreateVlenReferenceAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testVlenReferenceAttribute.h5"
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
-            grp_id = db.createGroup()
-            db.createHardLink(root_id, "G1", grp_id)
-
-            dt_base = special_dtype(ref=Reference)
-            dt = special_dtype(vlen=dt_base)
-
-            ds1_ref = "datasets/" + dset_id
-            grp_ref = "groups/" + grp_id
-            ref_arr = np.zeros((2,), dtype=dt_base)
-            ref_arr[0] = ds1_ref
-            ref_arr[1] = grp_ref
-            vlen_arr = np.zeros((), dtype=dt)
-            vlen_arr[()] = ref_arr
-
-            db.createAttribute(root_id, "A1", vlen_arr)
-            item = db.getAttribute(root_id, "A1")
-
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_VLEN")
-            self.assertEqual(item_type["size"], "H5T_VARIABLE")
-            base_type = item_type["base"]
-            self.assertEqual(base_type["class"], "H5T_REFERENCE")
-            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
-
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        grp_id = db.createGroup()
+        db.createHardLink(root_id, "G1", grp_id)
+        dt_base = special_dtype(ref=Reference)
+        dt = special_dtype(vlen=dt_base)
+        ds1_ref = "datasets/" + dset_id
+        grp_ref = "groups/" + grp_id
+        ref_arr = np.zeros((2,), dtype=dt_base)
+        ref_arr[0] = ds1_ref
+        ref_arr[1] = grp_ref
+        vlen_arr = np.zeros((), dtype=dt)
+        vlen_arr[()] = ref_arr
+        db.createAttribute(root_id, "A1", vlen_arr)
+        item = db.getAttribute(root_id, "A1")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_VLEN")
+        self.assertEqual(item_type["size"], "H5T_VARIABLE")
+        base_type = item_type["base"]
+        self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("DS1" in f)
@@ -383,35 +398,33 @@ def testCreateVlenReferenceAttribute(self):
     def testCommittedType(self):
 
         filepath = "test/unit/out/h5py_writer_test_testCommittedType.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
         dt = np.dtype("S15")
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            db.createHardLink(root_id, "T1", ctype_id)
-
-            item_type = item["type"]
-
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 15)
-
-            # create an attribute using the committed type
-            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], "hello world!")
-
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_STRING")
-            self.assertEqual(attr_type["length"], 15)
-            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.createHardLink(root_id, "T1", ctype_id)
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item_type["length"], 15)
+        # create an attribute using the committed type
+        db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], "hello world!")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_STRING")
+        self.assertEqual(attr_type["length"], 15)
+        self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("T1" in f)
@@ -426,44 +439,41 @@ def testCommittedType(self):
     def testCommittedCompoundType(self):
 
         filepath = "test/unit/out/h5py_writer_test_testCommittedCompoundType.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5pyWriter(filepath, no_data=False)
-            root_id = db.getObjectIdByPath("/")
-
-            dt_str = special_dtype(vlen=str)
-            fields = []
-            fields.append(("field_1", np.dtype(">i8")))
-            fields.append(("field_2", np.dtype(">f8")))
-            fields.append(("field_3", np.dtype("S15")))
-            fields.append(("field_4", dt_str))
-            dt = np.dtype(fields)
-
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            db.createHardLink(root_id, "T1", ctype_id)
-
-            item_type = item["type"]
-
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            fields = item_type["fields"]
-            self.assertEqual(len(fields), 4)
-
-            # create an attribute using the committed type
-            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
-            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], list(attr_value))
-            attr_shape = attr["shape"]
-            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
-
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-            arr = db.getAttributeValue(root_id, "A1")
-            self.assertTrue(isinstance(arr, np.ndarray))
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt_str = special_dtype(vlen=str)
+        fields = []
+        fields.append(("field_1", np.dtype(">i8")))
+        fields.append(("field_2", np.dtype(">f8")))
+        fields.append(("field_3", np.dtype("S15")))
+        fields.append(("field_4", dt_str))
+        dt = np.dtype(fields)
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.createHardLink(root_id, "T1", ctype_id)
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_COMPOUND")
+        fields = item_type["fields"]
+        self.assertEqual(len(fields), 4)
+        # create an attribute using the committed type
+        attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+        db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], list(attr_value))
+        attr_shape = attr["shape"]
+        self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+        arr = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(arr, np.ndarray))
+        db.close()
 
         with h5py.File(filepath) as f:
             self.assertTrue("T1" in f)
@@ -483,74 +493,82 @@ def testReaderWithUpdate(self):
 
         file_in = "data/json/tall.json"
         file_out = "test/unit/out/h5py_writer_test_testReaderWithUpdate.h5"
+        if os.path.isfile(file_out):
+            os.remove(file_out)  # cleanup any previous run
+
+        db = Hdf5db(app_logger=self.log)
+        db.reader = H5JsonReader(file_in)
+        db.writer = H5pyWriter(file_out)
+        db.open()
+        # close should create everything the json reader read to the output file
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 2)
+
+        db.open()
+        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+        db.createAttribute(dset111_id, "attr3", "hello")
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 3)
+            self.assertEqual(dset111.attrs["attr3"], b"hello")
+
+        db.open()
+        db.createAttribute(dset111_id, "attr3", "bye-bye")
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 3)
+            self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
+            g1 = f["g1"]
+
+        db.open()
+        # create a new group
+        g13_id = db.createGroup()
+        g1_id = db.getObjectIdByPath("/g1")
+        db.createHardLink(g1_id, "g1.3", g13_id)
+        db.close()
+
+        with h5py.File(file_out) as f:
+            g1 = f["g1"]
+            self.assertEqual(len(g1), 3)
+            self.assertTrue("g1.3" in g1)
+
+        db.open()
+        # create a new dataset
+        dset_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        db.createHardLink(g1_id, "DS1", dset_id)
+        db.close()
+
+        with h5py.File(file_out) as f:
+            g1 = f["g1"]
+            self.assertTrue("DS1" in g1)
+            ds1 = g1["DS1"]
+            self.assertEqual(ds1.shape, (10, 10))
+
+        db.open()
+        arr = np.asarray(range(10), dtype=np.int32)
+        sel = selections.select((10, 10), (slice(5, 6), slice(0, 10)))
+        db.setDatasetValues(dset_id, sel, arr)
+        db.close()
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.reader = H5JsonReader(file_in)
-            db.writer = H5pyWriter(file_out, no_data=False)
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
-                dset111 = f["/g1/g1.1/dset1.1.1"]
-                print("dset111 attrs:", list(dset111.attrs.keys()))
-                self.assertEqual(len(dset111.attrs), 2)
-
-            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
-            db.createAttribute(dset111_id, "attr3", "hello")
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
-                dset111 = f["/g1/g1.1/dset1.1.1"]
-                self.assertEqual(len(dset111.attrs), 3)
-                self.assertEqual(dset111.attrs["attr3"], b"hello")
-
-            db.createAttribute(dset111_id, "attr3", "bye-bye")
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                self.assertTrue("/g1/g1.1/dset1.1.1" in f)
-                dset111 = f["/g1/g1.1/dset1.1.1"]
-                self.assertEqual(len(dset111.attrs), 3)
-                self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
-                g1 = f["g1"]
-
-            # create a new group
-            g13_id = db.createGroup()
-            g1_id = db.getObjectIdByPath("/g1")
-            db.createHardLink(g1_id, "g1.3", g13_id)
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                g1 = f["g1"]
-                self.assertEqual(len(g1), 3)
-                self.assertTrue("g1.3" in g1)
-
-            # create a new dataset
-            dset_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-            db.createHardLink(g1_id, "DS1", dset_id)
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                g1 = f["g1"]
-                self.assertTrue("DS1" in g1)
-                ds1 = g1["DS1"]
-                self.assertEqual(ds1.shape, (10, 10))
-
-            arr = np.asarray(range(10), dtype=np.int32)
-            sel = selections.select((10, 10), (slice(5, 6), slice(0, 10)))
-            db.setDatasetValues(dset_id, sel, arr)
-            db.flush()
-
-            with h5py.File(file_out) as f:
-                ds1 = f["/g1/DS1"]
-                data = ds1[:, :]
-                for i in range(10):
-                    for j in range(10):
-                        if i == 5:
-                            self.assertEqual(data[i, j], j)
-                        else:
-                            self.assertEqual(data[i, j], 0)
+        with h5py.File(file_out) as f:
+            ds1 = f["/g1/DS1"]
+            data = ds1[:, :]
+            for i in range(10):
+                for j in range(10):
+                    if i == 5:
+                        self.assertEqual(data[i, j], j)
+                    else:
+                        self.assertEqual(data[i, j], 0)
 
 
 if __name__ == "__main__":

From 8324a46c1753372b08179b2a5d81024193fd6497 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 18 Jun 2025 20:43:30 +0100
Subject: [PATCH 052/129] fix jsontoh5 and h5tojson for new db interface

---
 src/h5json/h5pystore/h5py_writer.py   |  2 +-
 src/h5json/h5tojson/h5tojson.py       | 27 +++++++++----------------
 src/h5json/jsonstore/h5json_writer.py |  2 +-
 src/h5json/jsontoh5/jsontoh5.py       | 29 ++++++++++-----------------
 4 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index b4f81658..9bea57b0 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -416,7 +416,7 @@ def flush(self):
                 obj = self._f[h5path]
                 self.updateAttributes(obj_id, obj)
                 collection = getCollectionForId(obj_id)
-                if collection == "datasets":
+                if collection == "datasets" and not self.no_data:
                     if self._init:
                         self.initializeDatasetValues(obj_id, obj)
                     else:
diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index b479cdd4..284de84c 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -12,7 +12,6 @@
 import sys
 import os.path as op
 import logging
-import logging.handlers
 
 from h5json import Hdf5db
 from h5json.jsonstore.h5json_writer import H5JsonWriter
@@ -33,28 +32,22 @@ def main():
             filename = sys.argv[i]
 
     # create logger
-    log = logging.getLogger("h5tojson")
-    # log.setLevel(logging.WARN)
-    log.setLevel(logging.INFO)
-    # add log handler
-    handler = logging.FileHandler("./h5tojson.log")
-
-    # add handler to logger
-    log.addHandler(handler)
+    logfname = "h5tojson.log"
+    loglevel = logging.DEBUG
+    logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+    log = logging.getLogger()
 
+    # check that the input file exists
     if not op.isfile(filename):
         sys.exit(f"Cannot find file: {filename}")
 
     log.info(f"h5tojson {filename}")
 
-    kwargs = {"app_logger": log}
-    reader = H5pyReader(filename, **kwargs)
-    writer = H5JsonWriter(None, no_data=no_data, **kwargs)
-    kwargs["h5_reader"] = reader
-    kwargs["h5_writer"] = writer
-
-    with Hdf5db(**kwargs) as db:
-        db.flush()
+    db = Hdf5db(app_logger=log)
+    db.reader = H5pyReader(filename, app_logger=log)
+    db.writer = H5JsonWriter(None, no_data=no_data, app_logger=log)
+    db.open()  # read HDF5 data into db
+    db.close()  # close will trigger write to json file
 
 
 if __name__ == "__main__":
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index 709f34fd..92d3499a 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -227,7 +227,7 @@ def dumpDataset(self, obj_id):
         if attributes:
             response["attributes"] = attributes
 
-        if not self._no_data:
+        if not self.no_data:
             if num_elements > 0:
                 sel_all = selections.select(dims, ...)
                 arr = self.db.getDatasetValues(obj_id, sel_all)
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index d572e58e..28f5e002 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -12,7 +12,6 @@
 import sys
 import os.path as op
 import logging
-import logging.handlers
 
 from h5json import Hdf5db
 from h5json.h5pystore.h5py_writer import H5pyWriter
@@ -36,29 +35,23 @@ def main():
             hdf5_filename = sys.argv[i]
 
     # create logger
-    log = logging.getLogger("h5json")
-    # log.setLevel(logging.WARN)
-    log.setLevel(logging.INFO)
-    # add log handler
-    handler = logging.FileHandler("./jsontoh5.log")
-
-    # add handler to logger
-    log.addHandler(handler)
+    logfname = "jsontoh5.log"
+    loglevel = logging.DEBUG
+    logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+    log = logging.getLogger()
 
+    # check that the input file exists
     if not op.isfile(json_filename):
         sys.exit(f"Cannot find file: {json_filename}")
 
     log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")
 
-    kwargs = {"app_logger": log}
-
-    h5_reader = H5JsonReader(json_filename, **kwargs)
-    h5_writer = H5pyWriter(hdf5_filename, no_data=no_data, **kwargs)
-    kwargs["h5_reader"] = h5_reader
-    kwargs["h5_writer"] = h5_writer
-
-    with Hdf5db(**kwargs) as db:
-        db.flush()
+    db = Hdf5db(app_logger=log)
+    db.reader = H5JsonReader(json_filename, app_logger=log)
+    db.writer = H5pyWriter(hdf5_filename, no_data=no_data, app_logger=log)
+    db.open()  # read json data
+    # close should create everything the json reader read to the output file
+    db.close()
 
 
 if __name__ == "__main__":

From 5c82129b56a4890ffaa4bf6f3e3695e0313e46c9 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 19 Jun 2025 19:49:34 +0100
Subject: [PATCH 053/129] update of hsds_writer

---
 src/h5json/h5pystore/h5py_writer.py |  1 +
 src/h5json/hsdsstore/hsds_reader.py | 11 ++--
 src/h5json/hsdsstore/hsds_writer.py | 80 ++++++++++++++++++++++++----
 test/unit/h5py_writer_test.py       | 13 +++++
 test/unit/hsds_writer_test.py       | 82 +++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 15 deletions(-)
 create mode 100644 test/unit/hsds_writer_test.py

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 9bea57b0..14942c11 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -409,6 +409,7 @@ def flush(self):
             if "links" in root_json:
                 root_links = root_json["links"]
                 self._createObjects(self._f, root_links, visited=set((root_id,)))
+
         # update attributes, dataset values
         for obj_id in self._id_map:
             if self.db.is_dirty(obj_id) or self._init:
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index b4de31d2..55a8c022 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -86,13 +86,14 @@ def __init__(
             kwargs["timeout"] = timeout
         # save these for when we create the connection
         self._http_kwargs = kwargs
+        self._http_conn = None
 
         super().__init__(domain_path, app_logger=app_logger)
 
     def open(self):
         if self._http_conn:
             return  # open already called
-        
+
         kwargs = self._http_kwargs
         http_conn = HttpConn(self.filepath, **kwargs)
 
@@ -132,7 +133,7 @@ def open(self):
         if "domain_objs" in root_json:
             domain_objs = root_json["domain_objs"]
             objdb.load(domain_objs)
-        """ 
+        """
         if "limits" in domain_json:
             self._limits = domain_json["limits"]
         else:
@@ -147,7 +148,6 @@ def open(self):
 
         return self._root_id
 
-
     @property
     def http_conn(self):
         return self._http_conn
@@ -157,7 +157,10 @@ def close(self):
             self._http_conn.close()
 
     def isClosed(self):
-        return False is self._http_conn else True
+        if self._http_conn:
+            return False
+        else:
+            return True
 
     def get_root_id(self):
         """ Return root id """
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 8144e085..c4a7c397 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -10,6 +10,7 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import logging
+import time
 
 from ..objid import getCollectionForId, getUuidFromId
 
@@ -111,6 +112,7 @@ def __init__(
         self._track_order = track_order
         self._linked_domain = linked_domain
         self._domain_json = None
+        self._last_flush_time = 0
 
     def open(self):
         """ setup domain for writing """
@@ -140,7 +142,7 @@ def open(self):
                 params["include_attrs"] = 1
                 params["include_links"] = 1
             """
-        
+
             domain_json = None
             rsp = http_conn.GET(req, params=params)
 
@@ -166,7 +168,7 @@ def open(self):
                         # failed to delete
                         http_conn.close()
                         raise IOError(rsp.status_code, rsp.reason)
-                        
+
             if not domain_json:
                 # domain doesn't exist, create it
                 body = {}
@@ -176,7 +178,7 @@ def open(self):
                 if self._owner:
                     body["owner"] = self._owner
                 if self._linked_domain:
-                    body["linked_domain"] = linked_domain
+                    body["linked_domain"] = self._linked_domain
                 if self._track_order:
                     create_props = {"CreateOrder": 1}
                     group_body = {"creationProperties": create_props}
@@ -200,7 +202,7 @@ def open(self):
             root_id = domain_json["root"]
 
             self._root_id = root_id
-       
+
             if "limits" in domain_json:
                 self._limits = domain_json["limits"]
             else:
@@ -214,11 +216,66 @@ def open(self):
 
         return self._root_id
 
-
     @property
     def http_conn(self):
         return self._http_conn
-    
+
+    def createObjects(self, obj_ids):
+        MAX_OBJECTS_PER_REQUEST = 1
+        collections = ("groups", "datasets", "datatypes")
+        col_items = {}
+        for collection in collections:
+            col_items[collection] = []
+
+        for obj_id in obj_ids:
+            if obj_id == self._root_id:
+                continue  # this was created when the domain was
+            collection = getCollectionForId(obj_id)
+            obj_json = self.db.getObjectById(obj_id)
+            item = {"id": obj_id}
+            for key in ("links", "attributes"):
+                if key in obj_json:
+                    item[key] = obj_json[key]
+            items = col_items[collection]
+            items.append(item)
+            if len(items) == MAX_OBJECTS_PER_REQUEST:
+                print("items:", items)
+                post_rsp = self.http_conn.POST("/" + collection, items)
+                print("post_rsp.status_code:", post_rsp.status_code)
+                if post_rsp.is_json:
+                    print("post_rsp.json:", post_rsp.json())
+                items.clear()
+
+        # handle any remainder items
+        for collection in collections:
+            items = col_items[collection]
+            if items:
+                self.http_conn.POST("/" + collection, items)
+
+    def updateLinks(self, grp_ids):
+        """ update any modified links of the given objects """
+
+        print("updateLinks:", grp_ids)
+        body = {}  # body will hold a map of grp ids to link lists
+
+        for grp_id in grp_ids:
+            if getCollectionForId(grp_id) != "groups":
+                continue  # ignore datasets and datatypes
+            grp_json = self.db.getObjectById(grp_id)
+            grp_links = grp_json["links"]
+            print(f"grp_id {grp_id} links: {grp_links}")
+            for link_json in grp_links:
+                if "created" not in link_json:
+                    self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
+                created = link_json["created"]
+                if created > self._last_flush_time:
+                    # new link, add to our list
+                    if grp_id not in body:
+                        body[grp_id] = {}
+
+        if body:
+            print("updateLinks, body:", body)
+
     def flush(self):
         """ Write dirty items """
 
@@ -230,30 +287,31 @@ def flush(self):
         self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
         self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
 
-        #root_id = self.db.root_id
         if self._init:
             # initialize all existing objects
-            self.log.debug("flush -- init is true")
+            self.log.debug(f"flush -- init is true, self.db: {self.db.db}")
             for obj_id in self.db:
                 self.log.debug(f"init: {obj_id}")
+            self.createObjects(self.db.db.keys())
             self._init = False
         elif self.db.new_objects:
             for obj_id in self.db.new_objects:
                 self.log.debug(f"new obj id: {obj_id}")
+            self.createObjects(self.db.new_objects)
 
         for obj_id in self.db.dirty_objects:
             self.log.debug(f"dirty object id: {obj_id}")
+            self.updateLinks(self.db.dirty_objects)
 
         for obj_id in self.db.deleted_objects:
             self.log.debug(f"deleted object: {obj_id}")
-        
+
+        self._last_flush_time = time.time()
         return True  # all objects written successfully
 
     def close(self):
         # over-ride of H5Writer method
         self.flush()
-        self.http_conn.close()
-        self._http_conn = None
 
     def isClosed(self):
         """ return closed status """
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index b0889b3d..3ff91bee 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -61,6 +61,19 @@ def testSimple(self):
         g1_id = db.createGroup()
         db.createHardLink(root_id, "g1", g1_id)
         db.createAttribute(g1_id, "a1", "hello")
+        db.close()
+
+        # open file with h5py and verify changes
+        with h5py.File(filepath) as f:
+            self.assertTrue("attr1", f.attrs)
+            self.assertTrue("attr2", f.attrs)
+            self.assertEqual(len(f), 1)
+            self.assertTrue("g1" in f)
+            g1 = f["g1"]
+            self.assertTrue("a1" in g1.attrs)
+            self.assertEqual(len(g1), 0)
+
+        db.open()
         g2_id = db.createGroup()
         db.createHardLink(root_id, "g2", g2_id)
 
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
new file mode 100644
index 00000000..a3ba9bea
--- /dev/null
+++ b/test/unit/hsds_writer_test.py
@@ -0,0 +1,82 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import h5py
+import numpy as np
+from h5json import Hdf5db
+from h5json.hsdsstore.hsds_writer import HSDSWriter
+from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
+
+
+class HSDSWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(HSDSWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        # create logger
+        logfname = "hsds_writer_test.log"
+        loglevel = logging.DEBUG
+        logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+        self.log = logging.getLogger()
+        self.log.info("init!")
+
+    def testSimple(self):
+
+        filepath = "/home/test_user1/writer_test.h5"
+        db = Hdf5db(app_logger=self.log)
+        db.writer = HSDSWriter(filepath)
+        root_id = db.open()
+        print("root_id:", root_id)
+        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+        db.createAttribute(root_id, "attr2", 42)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        db.createAttribute(g1_id, "a1", "hello")
+        g2_id = db.createGroup()
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db.flush()
+
+        db.createAttribute(g1_id, "a2", "bye-bye")
+        db.flush()
+
+        g21 = db.createGroup()
+        db.createHardLink(g2_id, "g2.1", g21)
+        db.flush()
+
+        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        db.setDatasetValues(dset_111_id, sel, arr)
+        db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()

From 286f239c6761e5e265b8942f1ba1df46985a7425 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 26 Jun 2025 16:35:18 +0100
Subject: [PATCH 054/129] multi-update for hsds-writer

---
 src/h5json/dset_util.py             |  14 ++
 src/h5json/hdf5db.py                |   4 +-
 src/h5json/hsdsstore/hsds_writer.py | 205 ++++++++++++++++++++++++----
 src/h5json/selections.py            |   2 +-
 test/unit/h5py_writer_test.py       |   5 +-
 test/unit/hsds_reader_test.py       | 119 ++++++++--------
 test/unit/hsds_writer_test.py       |  95 ++++++++++++-
 7 files changed, 353 insertions(+), 91 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 5b10323f..496734d3 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -11,6 +11,7 @@
 ##############################################################################
 
 import time
+import numpy as np
 
 
 def resize_dataset(dset_json, shape):
@@ -40,3 +41,16 @@ def resize_dataset(dset_json, shape):
 
     shape_json["dims"] = list(shape)
     dset_json["modified"] = time.time()
+
+
+def getNumElements(dset_json):
+    shape_json = dset_json["shape"]
+    shape_class = shape_json["class"]
+    if shape_class == "H5S_NULL":
+        num_elements = 0
+    elif shape_class == "H5S_SCALAR":
+        num_elements = 1
+    elif shape_class == "H5S_SIMPLE":
+        dims = shape_json["dims"]
+        num_elements = int(np.prod(dims))
+    return num_elements
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 581399f6..8d88d6ec 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -731,7 +731,9 @@ def createDataset(
         if self.closed:
             raise ValueError("db is closed")
         type_json = getTypeItem(dtype)
-        if shape == "H5S_NULL":
+        if shape is None:
+            raise ValueError("shape not set")
+        elif shape == "H5S_NULL":
             shape_json = {"class": "H5S_NULL"}
         elif shape == ():
             shape_json = {"class": "H5S_SCALAR"}
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index c4a7c397..7b022c34 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -14,8 +14,9 @@
 
 from ..objid import getCollectionForId, getUuidFromId
 
-from ..hdf5dtype import createDataType
-from ..array_util import jsonToArray, bytesToArray
+from ..hdf5dtype import createDataType, isVlen
+from ..array_util import jsonToArray, bytesToArray, arrayToBytes, bytesArrayToList
+from ..dset_util import getNumElements
 from .. import selections
 from ..h5writer import H5Writer
 from .httpconn import HttpConn
@@ -121,6 +122,7 @@ def open(self):
             http_conn = self._http_conn
         else:
             kwargs = self._http_kwargs
+            kwargs["retries"] = 1  # tbd: test setting
             http_conn = HttpConn(self.filepath, **kwargs)
             if self._append:
                 http_conn._mode = "a"
@@ -220,10 +222,36 @@ def open(self):
     def http_conn(self):
         return self._http_conn
 
+    def getDatasetSize(self, dset_id):
+        """ Return the size of the given dataset """
+
+        dset_json = self.db.getObjectById(dset_id)
+        num_elements = getNumElements(dset_json)
+        dtype = self.db.getDtype(dset_json)
+        if isVlen(dtype):
+            item_size = 1024  # random guess at size of variable length types
+        else:
+            item_size = dtype.itemsize
+        return num_elements * item_size
+
     def createObjects(self, obj_ids):
-        MAX_OBJECTS_PER_REQUEST = 1
+        """ create the objects referenced in obj_ids """
+
+        MAX_INIT_SIZE = 4096  # max size to include init values in dataset creation
+
+        def multiPost(items):
+            self.log.debug(f"hsds_writer> POST request {collection} for {len(items)} objects")
+            post_rsp = self.http_conn.POST("/" + collection, items)
+            self.log.debug(f"hsds_writer> POST post_rsp.status_code: {post_rsp.status_code}")
+            if post_rsp.is_json:
+                self.log.debug(f"hsds_writer> post_rsp.json: {post_rsp.json()}")
+            items.clear()
+
+        self.log.debug(f"hsds_writer> createObjects, {len(obj_ids)} objects")
+        MAX_OBJECTS_PER_REQUEST = 3
         collections = ("groups", "datasets", "datatypes")
         col_items = {}
+        dset_value_update_ids = set()
         for collection in collections:
             col_items[collection] = []
 
@@ -233,48 +261,169 @@ def createObjects(self, obj_ids):
             collection = getCollectionForId(obj_id)
             obj_json = self.db.getObjectById(obj_id)
             item = {"id": obj_id}
-            for key in ("links", "attributes"):
-                if key in obj_json:
+            self.log.debug(f"create id: {obj_id}")
+            for key in obj_json:  # ("links", "attributes"):
+                if key == "updates":
+                    # not part of the obj json
+                    continue
+                if key == "shape":
+                    # just send the dims, not the shape json
+                    shape_json = obj_json["shape"]
+                    if shape_json["class"] == "H5S_SIMPLE":
+                        dims = shape_json["dims"]
+                        item[key] = dims
+                else:
+                    # just copy the key value directly
                     item[key] = obj_json[key]
+
+            # initialize dataset values if provided and not too large
+            if "updates" in obj_json:
+                updates = obj_json["updates"]
+                if updates and len(updates) == 1 and self.getDatasetSize(obj_id) < MAX_INIT_SIZE:
+                    sel, arr = updates[0]
+                    if sel.select_type == selections.H5S_SELECT_ALL:
+                        value = bytesArrayToList(arr)
+                        item["value"] = value
+                        updates.clear()  # reset the update list
+                if updates:
+                    dset_value_update_ids.add(obj_id)  # will set dataset value below
+
+            # add to the list of new items for the given collection
             items = col_items[collection]
             items.append(item)
+
             if len(items) == MAX_OBJECTS_PER_REQUEST:
-                print("items:", items)
-                post_rsp = self.http_conn.POST("/" + collection, items)
-                print("post_rsp.status_code:", post_rsp.status_code)
-                if post_rsp.is_json:
-                    print("post_rsp.json:", post_rsp.json())
-                items.clear()
+                multiPost(items)
 
         # handle any remainder items
         for collection in collections:
             items = col_items[collection]
             if items:
-                self.http_conn.POST("/" + collection, items)
+                multiPost(items)
+
+        # write any initial dataset values
+        if dset_value_update_ids:
+            self.updateValues(dset_value_update_ids)
 
     def updateLinks(self, grp_ids):
         """ update any modified links of the given objects """
 
-        print("updateLinks:", grp_ids)
-        body = {}  # body will hold a map of grp ids to link lists
+        self.log.debug("hsds_writer> updateLinks")
+        items = {}  # dict which will hold a map of grp ids to links to create
+        count = 0
 
         for grp_id in grp_ids:
             if getCollectionForId(grp_id) != "groups":
                 continue  # ignore datasets and datatypes
             grp_json = self.db.getObjectById(grp_id)
             grp_links = grp_json["links"]
-            print(f"grp_id {grp_id} links: {grp_links}")
-            for link_json in grp_links:
+            for link_title in grp_links:
+                link_json = grp_links[link_title]
                 if "created" not in link_json:
                     self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
                 created = link_json["created"]
                 if created > self._last_flush_time:
+                    self.log.debug(f"hsds_writer> {grp_id}: new link: {link_title}")
+                    count += 1
                     # new link, add to our list
-                    if grp_id not in body:
-                        body[grp_id] = {}
+                    if grp_id not in items:
+                        items[grp_id] = {"links": {}}
+                    links = items[grp_id]["links"]
+                    link_class = link_json["class"]
+                    new_link = {"class": link_class}
+                    # convert to hsds representation
+                    if link_class == "H5L_TYPE_HARD":
+                        new_link["id"] = link_json["id"]
+                    elif link_class == "H5L_TYPE_SOFT":
+                        new_link["h5path"] = link_json["h5path"]
+                    elif link_class == "H5L_TYPE_EXTERNAL":
+                        new_link["h5path"] = link_json["h5path"]
+                        new_link["h5domain"] = link_json["file"]  # use h5domain for file key
+                    elif link_class == "H5L_TYPE_USER_DEFINED":
+                        self.log.warning(f"ignoring user-defined link: {link_title}")
+                        continue
+                    else:
+                        raise IOError(f"unexpected link class: {link_class}")
+                    links[link_title] = new_link
+                    self.log.debug(f"setting link {link_title} to {new_link}")
+
+        if items:
+            body = {"grp_ids": items}
+            put_rsp = self.http_conn.PUT("/groups/" + self._root_id + "/links", body=body)
+            if put_rsp.status_code not in (200, 201):
+                self.log.error(f"failed to update links for request: {body}")
+                raise IOError("hsds_writer unable to update links")
+            else:
+                self.log.debug(f"hsds_writer> {grp_id} {count} links updated")
 
-        if body:
-            print("updateLinks, body:", body)
+    def updateAttributes(self, obj_ids):
+        """ update any modified links of the given objects """
+
+        self.log.debug("hsds_writer> updateAttributes")
+        items = {}  # dict which will hold a map of objects ids to attributes to create
+        count = 0
+
+        for obj_id in obj_ids:
+            obj_json = self.db.getObjectById(obj_id)
+            obj_attrs = obj_json["attributes"]
+            for attr_name in obj_attrs:
+                attr_json = obj_attrs[attr_name]
+                if "created" not in attr_json:
+                    self.log.error(f"hsds_writer> expected created timestamp in attr: {attr_json}")
+                created = attr_json["created"]
+                if created > self._last_flush_time:
+                    self.log.debug(f"hsds_writer> {obj_id} attribute {attr_name} created")
+                    count += 1
+                    # new attribute, add to our list
+                    if obj_id not in items:
+                        items[obj_id] = {"attributes": {}}
+                    attrs = items[obj_id]["attributes"]
+                    attrs[attr_name] = attr_json
+
+        if items:
+            body = {"obj_ids": items}
+            req = f"/groups/{self._root_id}/attributes"
+            put_rsp = self.http_conn.PUT(req, body=body)
+            if put_rsp.status_code not in (200, 201):
+                self.log.error(f"hsds_writer> put {req} failed, status: {put_rsp.status_code}")
+            else:
+                self.log.debug(f"hsds_writer> {count} attributes updated")
+
+    def updateValue(self, dset_id, sel, arr):
+        """ update the given dataset using selection and array """
+        self.log.debug("hsds_writer> updateValue")
+        params = {}
+        data = arrayToBytes(arr)
+        self.log.debug(f"writing binary data, {len(data)} bytes")
+
+        if sel.select_type != selections.H5S_SELECT_ALL:
+            select_param = sel.getQueryParam()
+            self.log.debug(f"got select query param: {select_param}")
+            params["select"] = select_param
+
+        req = f"/datasets/{dset_id}/value"
+        rsp = self.http_conn.PUT(req, body=data, params=params, format="binary")
+        if rsp.status_code != 200:
+            self.log.error(f"PUT {req} returned error: {rsp.status_code}")
+        else:
+            self.log.debug(f"PUT {len(data)} bytes successful")
+
+    def updateValues(self, dset_ids):
+        """ write any pending dataset values """
+
+        self.log.debug("hsds_writer> updateValues")
+        for dset_id in dset_ids:
+            if getCollectionForId(dset_id) != "datasets":
+                continue  # ignore groups and datatypes
+            dset_json = self.db.getObjectById(dset_id)
+            if "updates" not in dset_json:
+                continue
+            updates = dset_json["updates"]
+            if updates:
+                self.log.debug(f"hsds_writer> {dset_id} update count: {len(updates)}")
+                for (sel, arr) in updates:
+                    self.updateValue(dset_id, sel, arr)
+                updates.clear()
 
     def flush(self):
         """ Write dirty items """
@@ -286,27 +435,33 @@ def flush(self):
         self.log.debug(f"    new object count: {len(self.db.new_objects)}")
         self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
         self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
-
         if self._init:
-            # initialize all existing objects
-            self.log.debug(f"flush -- init is true, self.db: {self.db.db}")
+            # initialize objects
+            self.log.debug(f"hsds_writer> flush -- init is True self.db: {self.db.db}")
             for obj_id in self.db:
                 self.log.debug(f"init: {obj_id}")
             self.createObjects(self.db.db.keys())
             self._init = False
         elif self.db.new_objects:
+            self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")
             for obj_id in self.db.new_objects:
-                self.log.debug(f"new obj id: {obj_id}")
+                self.log.debug(f"hsds_writer> new obj id: {obj_id}")
             self.createObjects(self.db.new_objects)
+        else:
+            self.log.debug("no new objects to persist")
 
         for obj_id in self.db.dirty_objects:
-            self.log.debug(f"dirty object id: {obj_id}")
+            self.log.debug(f"hsds_writer> dirty object id: {obj_id}")
             self.updateLinks(self.db.dirty_objects)
+            self.updateAttributes(self.db.dirty_objects)
+            self.updateValues(self.db.dirty_objects)
 
         for obj_id in self.db.deleted_objects:
             self.log.debug(f"deleted object: {obj_id}")
 
+        self._init = False
         self._last_flush_time = time.time()
+        self.log.debug("hsds_writer> flush successful")
         return True  # all objects written successfully
 
     def close(self):
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 3a94b094..1a051383 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -38,7 +38,7 @@ def select(obj, args):
     to __getitem__.  The arguments should be the following:
 
     obj
-        Datatset object
+        Dataset object
 
     args
         Either a single argument or a tuple of arguments.  See below for
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 3ff91bee..e51c4dba 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -72,8 +72,8 @@ def testSimple(self):
             g1 = f["g1"]
             self.assertTrue("a1" in g1.attrs)
             self.assertEqual(len(g1), 0)
-
         db.open()
+
         g2_id = db.createGroup()
         db.createHardLink(root_id, "g2", g2_id)
 
@@ -96,8 +96,11 @@ def testSimple(self):
         with h5py.File(filepath) as f:
             self.assertTrue("attr1", f.attrs)
             self.assertTrue("attr2", f.attrs)
+            self.assertEqual(len(f), 2)
             self.assertTrue("g1" in f)
+            self.assertTrue("g2" in f)
             g1 = f["g1"]
+            self.assertEqual(len(g1), 1)
             self.assertTrue("a1" in g1.attrs)
             self.assertTrue("g1.1" in g1)
             g11 = g1["g1.1"]
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
index 72cf6017..d0501b9f 100644
--- a/test/unit/hsds_reader_test.py
+++ b/test/unit/hsds_reader_test.py
@@ -39,68 +39,71 @@ def __init__(self, *args, **kwargs):
     def testSimple(self):
         filepath = "/home/test_user1/test/tall.h5"
         kwargs = {"app_logger": self.log}
-        with Hdf5db(**kwargs) as db:
-            hsds_reader = HSDSReader(filepath, **kwargs)
-            db.reader = hsds_reader
-            root_id = db.getObjectIdByPath("/")
-            root_json = db.getObjectById(root_id)
+        db = Hdf5db(**kwargs)
+        hsds_reader = HSDSReader(filepath, **kwargs)
+        db.reader = hsds_reader
+        root_id = db.open()
+        root_json = db.getObjectById(root_id)
+        self.assertTrue("id" in root_json)
+        """
+        TBD
+        root_attrs = root_json["attributes"]
+        self.assertEqual(len(root_attrs), 2)
+        self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+        root_links = root_json["links"]
+        self.assertEqual(len(root_links), 2)
+        self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+        g1_link = root_links["g1"]
+        self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+        g1_id = g1_link["id"]
+        self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+        dset_json = db.getObjectById(dset111_id)
+        dset_type = dset_json["type"]
+        self.assertEqual(dset_type["class"], "H5T_INTEGER")
+        self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 2)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+        dset_shape = dset_json["shape"]
+        self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(dset_shape["dims"], [10, 10])
 
-            root_attrs = root_json["attributes"]
-            self.assertEqual(len(root_attrs), 2)
-            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
-            root_links = root_json["links"]
-            self.assertEqual(len(root_links), 2)
-            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
-            g1_link = root_links["g1"]
-            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
-            g1_id = g1_link["id"]
-            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
-            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
-            dset_json = db.getObjectById(dset111_id)
-            dset_type = dset_json["type"]
-            self.assertEqual(dset_type["class"], "H5T_INTEGER")
-            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 2)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
-            dset_shape = dset_json["shape"]
-            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(dset_shape["dims"], [10, 10])
+        # got the 5th row of the dataset
+        sel_row = selections.select((10, 10), (5, slice(0, 10)))
+        row = db.getDatasetValues(dset111_id, sel_row)
+        self.assertTrue(isinstance(row, np.ndarray))
+        self.assertEqual(row.shape, (10,))
+        for i in range(10):
+            v = row[i]
+            self.assertEqual(v, i * 5)
 
-            # got the 5th row of the dataset
-            sel_row = selections.select((10, 10), (5, slice(0, 10)))
-            row = db.getDatasetValues(dset111_id, sel_row)
-            self.assertTrue(isinstance(row, np.ndarray))
-            self.assertEqual(row.shape, (10,))
-            for i in range(10):
-                v = row[i]
-                self.assertEqual(v, i * 5)
+        sel_all = selections.select((10, 10), ...)
+        arr = db.getDatasetValues(dset111_id, sel_all)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.shape, (10, 10))
+        for i in range(10):
+            for j in range(10):
+                v = arr[i, j]
+                self.assertEqual(v, i * j)
 
-            sel_all = selections.select((10, 10), ...)
-            arr = db.getDatasetValues(dset111_id, sel_all)
-            self.assertTrue(isinstance(arr, np.ndarray))
-            self.assertEqual(arr.shape, (10, 10))
-            for i in range(10):
-                for j in range(10):
-                    v = arr[i, j]
-                    self.assertEqual(v, i * j)
+        # try adding an attribute
+        db.createAttribute(dset111_id, "attr3", value=42)
+        dset_json = db.getObjectById(dset111_id)
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 3)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+        attr3_json = dset_attrs["attr3"]
+        attr3_shape = attr3_json["shape"]
+        self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+        attr3_type = attr3_json["type"]
+        self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+        self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+        attr3_value = attr3_json["value"]
+        self.assertEqual(attr3_value, 42)
+        """
 
-            # try adding an attribute
-            db.createAttribute(dset111_id, "attr3", value=42)
-            dset_json = db.getObjectById(dset111_id)
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 3)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
-            attr3_json = dset_attrs["attr3"]
-            attr3_shape = attr3_json["shape"]
-            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
-            attr3_type = attr3_json["type"]
-            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
-            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
-            attr3_value = attr3_json["value"]
-            self.assertEqual(attr3_value, 42)
-
-            db.close()
+        db.close()
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index a3ba9bea..667a8bcd 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -10,11 +10,12 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import time
 import logging
-import h5py
+import requests
+import os
 import numpy as np
 from h5json import Hdf5db
+from h5json.hsdsstore.httpconn import HttpConn
 from h5json.hsdsstore.hsds_writer import HSDSWriter
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
@@ -24,6 +25,7 @@ class HSDSWriterTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(HSDSWriterTest, self).__init__(*args, **kwargs)
         # main
+        self.session = requests.Session()
 
         # create logger
         logfname = "hsds_writer_test.log"
@@ -34,19 +36,42 @@ def __init__(self, *args, **kwargs):
 
     def testSimple(self):
 
-        filepath = "/home/test_user1/writer_test.h5"
+        domain_path = "/home/test_user1/writer_test.h5"
+
         db = Hdf5db(app_logger=self.log)
-        db.writer = HSDSWriter(filepath)
+        db.writer = HSDSWriter(domain_path)
         root_id = db.open()
-        print("root_id:", root_id)
+        http_conn = HttpConn(domain_path, mode='r', retries=1)
+
         db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
         db.createAttribute(root_id, "attr2", 42)
+
         g1_id = db.createGroup()
         db.createHardLink(root_id, "g1", g1_id)
         db.createAttribute(g1_id, "a1", "hello")
         g2_id = db.createGroup()
         db.createHardLink(root_id, "g2", g2_id)
 
+        # validate - get the root group and check counts
+        http_rsp = http_conn.GET(f"/groups/{root_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        root_json = http_rsp.json()
+        # attribute count should still be zero (hasn't been flushed yet)
+        self.assertEqual(root_json["attributeCount"], 0)
+        # same for link count
+        self.assertEqual(root_json["linkCount"], 0)
+
+        db.flush()
+
+        # validate - get the root group again and see if counts are updated
+        http_rsp = http_conn.GET(f"/groups/{root_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        root_json = http_rsp.json()
+        # attribute count should still be zero (hasn't been flushed yet)
+        self.assertEqual(root_json["attributeCount"], 2)
+        # same for link count
+        self.assertEqual(root_json["linkCount"], 2)
+
         g1_1_id = db.createGroup()
         db.createHardLink(g1_id, "g1.1", g1_1_id)
         dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
@@ -56,12 +81,35 @@ def testSimple(self):
                 arr[i, j] = i * j
         sel_all = selections.select((10, 10), ...)
         db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.flush()
+
+        # validate - get the dataset and check values
+        http_rsp = http_conn.GET(f"/datasets/{dset_111_id}/value")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        self.assertTrue("value" in rsp_json)
+        rsp_value = rsp_json["value"]
+        self.assertEqual(len(rsp_value), 10)
+        for i in range(10):
+            row = rsp_value[i]
+            self.assertEqual(len(row), 10)
+            for j in range(10):
+                self.assertEqual(row[j], i * j)
+
         db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
         db.createSoftLink(g2_id, "slink", "somewhere")
         db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
         db.createCustomLink(g2_id, "cust", {"foo": "bar"})
         db.flush()
 
+        # validate - check that links got updated
+        http_rsp = http_conn.GET(f"/groups/{g2_id}/links")
+        self.assertEqual(http_rsp.status_code, 200)
+        g2links_json = http_rsp.json()
+        self.assertTrue("links" in g2links_json)
+        g2links = g2links_json["links"]
+        self.assertTrue(len(g2links), 2)  # custom link will be ignored
+
         db.createAttribute(g1_id, "a2", "bye-bye")
         db.flush()
 
@@ -69,10 +117,47 @@ def testSimple(self):
         db.createHardLink(g2_id, "g2.1", g21)
         db.flush()
 
+        # update one element of the dataset
         sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
         arr = np.zeros((), dtype=np.int32)
         arr[()] = 42
         db.setDatasetValues(dset_111_id, sel, arr)
+        db.flush()
+
+        # validate - check that just the one element is modified
+        http_rsp = http_conn.GET(f"/datasets/{dset_111_id}/value")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        self.assertTrue("value" in rsp_json)
+        rsp_value = rsp_json["value"]
+        self.assertEqual(len(rsp_value), 10)
+        for i in range(10):
+            row = rsp_value[i]
+            self.assertEqual(len(row), 10)
+            for j in range(10):
+                if i == 4 and j == 4:
+                    expected = 42
+                else:
+                    expected = i * j
+                self.assertEqual(row[j], expected)
+
+        # create a scalar dataset
+        dset_112_id = db.createDataset(shape=(), dtype=np.int32)
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        sel_all = selections.select((), ...)
+        db.setDatasetValues(dset_112_id, sel_all, arr)
+        db.createHardLink(g1_id, "dset1.1.2", dset_112_id)
+        db.flush()
+
+        # validate - get the scalar dataset value
+        http_rsp = http_conn.GET(f"/datasets/{dset_112_id}/value")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        self.assertTrue("value" in rsp_json)
+        rsp_value = rsp_json["value"]
+        self.assertEqual(rsp_value, 42)
+
         db.close()
 
 

From 09c017aac74feff17170bc455e6bfa996c0962ca Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 26 Jun 2025 17:56:26 +0100
Subject: [PATCH 055/129] reorg executables to apps dir

---
 pyproject.toml                              | 10 ++++------
 src/h5json/{h5tojson => apps}/__init__.py   |  0
 src/h5json/{h5tojson => apps}/h5tojson.py   |  0
 src/h5json/{jsontoh5 => apps}/jsontoh5.py   |  0
 src/h5json/{validator => apps}/validator.py |  0
 5 files changed, 4 insertions(+), 6 deletions(-)
 rename src/h5json/{h5tojson => apps}/__init__.py (100%)
 rename src/h5json/{h5tojson => apps}/h5tojson.py (100%)
 rename src/h5json/{jsontoh5 => apps}/jsontoh5.py (100%)
 rename src/h5json/{validator => apps}/validator.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 879e7ffb..d911700a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,9 +35,9 @@ Social = "https://twitter.com/hdf5"
 Discussion = "https://forum.hdfgroup.org"
 
 [project.scripts]
-h5tojson = "h5json.h5tojson.h5tojson:main"
-jsontoh5 = "h5json.jsontoh5.jsontoh5:main"
-h5jvalidate = "h5json.validator.validator:main"
+h5tojson = "h5json.apps.h5tojson:main"
+jsontoh5 = "h5json.apps.jsontoh5:main"
+h5jvalidate = "h5json.apps.validator:main"
 
 [project.optional-dependencies]
 dev = ["check-manifest"]
@@ -54,10 +54,8 @@ packages = [
     "h5json.jsonstore",
     "h5json.h5pystore",
     "h5json.hsdsstore",
-    "h5json.h5tojson",
-    "h5json.jsontoh5",
     "h5json.schema",
-    "h5json.validator",
+    "h5json.apps",
 ]
 package-data = { "h5json.schema" = ["*.schema.json"] }
 platforms = ["any"]
diff --git a/src/h5json/h5tojson/__init__.py b/src/h5json/apps/__init__.py
similarity index 100%
rename from src/h5json/h5tojson/__init__.py
rename to src/h5json/apps/__init__.py
diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/apps/h5tojson.py
similarity index 100%
rename from src/h5json/h5tojson/h5tojson.py
rename to src/h5json/apps/h5tojson.py
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/apps/jsontoh5.py
similarity index 100%
rename from src/h5json/jsontoh5/jsontoh5.py
rename to src/h5json/apps/jsontoh5.py
diff --git a/src/h5json/validator/validator.py b/src/h5json/apps/validator.py
similarity index 100%
rename from src/h5json/validator/validator.py
rename to src/h5json/apps/validator.py

From 9773e2c1284c5461cdfa4c19a9f80310380398a3 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 1 Jul 2025 15:19:48 +0100
Subject: [PATCH 056/129] added h5tohs util

---
 src/h5json/apps/h5tohs.py             |  63 +++
 src/h5json/h5pystore/h5py_reader.py   |  62 ++-
 src/h5json/h5pystore/h5py_writer.py   |   1 +
 src/h5json/hdf5db.py                  |  49 +-
 src/h5json/hsdsstore/hsds_writer.py   |  81 ++-
 src/h5json/jsonstore/h5json_writer.py |  10 +-
 test/integ/h5tojson_test.py           |   4 +-
 test/integ/jsontoh5_test.py           |   4 +-
 test/unit/h5json_reader_test.py       |  98 ++--
 test/unit/h5json_writer_test.py       | 483 ++++++++---------
 test/unit/h5py_reader_test.py         |  95 ++--
 test/unit/hdf5db_test.py              | 733 ++++++++++++++------------
 test/unit/hsds_writer_test.py         |  50 +-
 13 files changed, 1006 insertions(+), 727 deletions(-)
 create mode 100755 src/h5json/apps/h5tohs.py

diff --git a/src/h5json/apps/h5tohs.py b/src/h5json/apps/h5tohs.py
new file mode 100755
index 00000000..4d1a8106
--- /dev/null
+++ b/src/h5json/apps/h5tohs.py
@@ -0,0 +1,63 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import sys
+import os.path as op
+import logging
+
+from h5json import Hdf5db
+from h5json.hsdsstore.hsds_writer import HSDSWriter
+from h5json.h5pystore.h5py_reader import H5pyReader
+
+def usage():
+    print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file> <hsds_domain>")
+    sys.exit(0)
+
+def main():
+    no_data = False
+    filename = None
+    domain = None
+    for i in range(1, len(sys.argv)):
+        if sys.argv[i] in ("-h", "--help"):
+            usage()
+        elif sys.argv[i] == "--nodata":
+            no_data = True
+        elif filename is None:
+            filename = sys.argv[i]
+        elif domain is None:
+            domain = sys.argv[i]
+        else:
+            usage()
+
+    if domain is None:
+        usage()
+
+    # create logger
+    logfname = "h5tohs.log"
+    loglevel = logging.DEBUG
+    logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+    log = logging.getLogger()
+
+    # check that the input file exists
+    if not op.isfile(filename):
+        sys.exit(f"Cannot find file: {filename}")
+
+    log.info(f"h5tohs {filename}")
+
+    db = Hdf5db(app_logger=log)
+    db.writer = HSDSWriter(domain, no_data=no_data, app_logger=log)
+    db.reader = H5pyReader(filename, app_logger=log)
+    db.open()  # read HDF5 data into db
+
+    db.close()  # close will trigger write to HSDS
+
+if __name__ == "__main__":
+    main()
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index bc4b5820..089f0f24 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -12,6 +12,7 @@
 import h5py
 import numpy as np
 import logging
+import time
 
 from ..objid import createObjId, getCollectionForId
 from ..hdf5dtype import getTypeItem, isOpaqueDtype
@@ -126,6 +127,7 @@ def _copy_array(self, src_arr, fin=None):
             tgt_arr[...] = src_arr[...]
         return tgt_arr
 
+    """
     def visit(self, path, obj):
         name = obj.__class__.__name__
         self.log.info(f"visit: {path} name: {name}")
@@ -136,6 +138,7 @@ def visit(self, path, obj):
 
         addr = h5py.h5o.get_info(obj.id).addr
         self._addr_map[addr] = obj_id
+    """
 
     def __init__(
         self,
@@ -174,11 +177,15 @@ def open(self):
         self._id_map[self._root_id] = f
         addr = h5py.h5o.get_info(f.id).addr
         self._addr_map[addr] = self._root_id
-        f.visititems(self.visit)
+        #f.visititems(self.visit)
+
+        print("h5py_reader keys:", list(self.db.db.keys()))
 
         return self._root_id
 
     def close(self):
+        # close h5py handles in map dict
+        self._id_map = {}
         if self._f:
             self._f.close()
             self._f = None
@@ -261,7 +268,8 @@ def getAttribute(self, obj_id, name, include_data=True):
         else:
             pass  # no data
 
-        # timestamps will be added by getAttributeItem()
+        
+        item['created'] = time.time()  # TBD: get attribute creation time from h5py?
         return item
 
     def getAttributes(self, obj_id, include_data=True):
@@ -306,6 +314,8 @@ def _getLink(self, parent, link_name):
                 item["id"] = None
             else:
                 item["id"] = self._addr_map[addr]
+            
+        item['created'] = time.time()  # TBD: get the link creation time from h5py?
 
         return item
 
@@ -428,7 +438,8 @@ def _getDataset(self, dset):
         self.log.info(f"getDataset alias: [{dset.name}]")
 
         item = {"alias": dset.name}
-
+        print("dset:", dset)
+        print("dset type:", type(dset))
         typeid = dset.id.get_type()
         if h5py.h5t.TypeID.committed(typeid):
             type_uuid = None
@@ -468,21 +479,60 @@ def _getDataset(self, dset):
         item["cpl"] = self._getHDF5DatasetCreationProperties(dset, type_item["class"])
 
         return item
+    
+    def _getHardLinkIds(self, parent):
+        """ create any ids for hard links of the group """
+
+        self.log.debug(f"h5pyreader> _getHardlinkIds for {parent.name}")
+        for link_name in parent:
+            self.log.debug(f"h5py_reader> check link: {link_name}")
+
+            try:
+                linkObj = parent.get(link_name, None, False, True)
+                linkClass = linkObj.__class__.__name__
+            except TypeError:
+                # UDLink? Go on to the next link
+                continue
+            if linkClass != "HardLink":
+                self.log.debug(f"h5py_reader> ignoring {link_name} - type: {linkClass}")
+            else:
+                # get the linked object
+                obj = parent[link_name]
+                addr = h5py.h5o.get_info(obj.id).addr
+                if addr not in self._addr_map:
+                    name = obj.__class__.__name__
+                    obj_id = createObjId(obj_type=name, root_id=self._root_id)  # create uuid
+                    self.log.debug(f"h5py_reader> creating obj_id: {obj_id} for obj: {obj.name}")
+                    self._id_map[obj_id] = obj
+                    self._addr_map[addr] = obj_id
+                else:
+                    obj_id = self._addr_map[addr]
+                    if obj_id not in self._id_map:
+                        self.log.debug(f"h5py_reader> adding obj for {obj_id} to id_map")
+                        self._id_map = obj
+                    else:
+                        self.log.debug("h5py_reader> obj {obj_id} already in id_map")
 
     def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         """ return object with given id """
         if obj_id not in self._id_map:
             raise KeyError(f"{obj_id} not found")
         h5obj = self._id_map[obj_id]
+        print("h5obj:", h5obj)
+        print("h5obj.name:", h5obj.name)
+        print("h5obj type:", type(h5obj))
         if isinstance(h5obj, h5py.Group):
+            self._getHardLinkIds(h5obj)
             obj_json = self._getGroup(h5obj, include_links=include_links)
         elif isinstance(h5obj, h5py.Dataset):
             obj_json = self._getDataset(h5obj)
         elif isinstance(h5obj, h5py.Datatype):
-            obj_json = self._getDatatype(h5obj)
+            obj_json = self._getDataset(h5obj)
         else:
-            raise TypeError(f"unexpected object type: {type(h5obj)}")
-
+            msg = f"unexpected object type: {type(h5obj)}"
+            self.log.error(msg)
+            raise TypeError(msg)
+            
         if include_attrs:
             attributes = self.getAttributes(obj_id)
             obj_json["attributes"] = attributes
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 14942c11..15d35bd4 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -388,6 +388,7 @@ def updateAttributes(self, obj_id, obj):
                 continue
             self.createAttribute(obj, name, attr_json)
 
+        
     def flush(self):
         """ Write dirty items """
         if self.closed:
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 8d88d6ec..28eef18d 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -109,7 +109,6 @@ def writer(self, value: H5Writer):
             self._writer.close()
         self._writer = value
         if self._writer:
-            self.log.debug("writer set_db")
             self._writer.set_db(self)
 
     @property
@@ -161,11 +160,40 @@ def flush(self):
             return  # nothing to do
         if not self.writer.flush():
             # flush not successful, don't clear dirty set
-            return
+            self.log.error("writer flush failed")
+            raise IOError("writer flush failed")
 
-        # reset new and dirty sets
+        # reset new, dirty and deleted sets
         self._new_objects = set()
         self._dirty_objects = set()
+        self._deleted_objects = set()
+
+    def readAll(self):
+        """ read all meta data objects from reader and save to db """
+
+        self.log.debug("readAll")
+        if self.closed:
+            raise IOError("database is not open")
+        
+        if not self.reader:
+            self.log.debug("no reader set")
+            # no reader, nothing to do
+            return
+        
+        obj_ids = set()
+        obj_ids.add(self.root_id)
+        while obj_ids:
+            obj_id = obj_ids.pop()
+            self.log.debug(f"readAll, get {obj_id}")
+            obj_json = self.getObjectById(obj_id)  # will add obj_id to db if not already present
+            if getCollectionForId(obj_id) == "groups":
+                # add any hard links to the set
+                links = obj_json["links"]
+                for title in links:
+                    link_json = links[title]
+                    if "id" in link_json:
+                        link_id = link_json["id"]
+                        obj_ids.add(link_id)
 
     def open(self):
         """ open reader and writer if set """
@@ -196,10 +224,16 @@ def open(self):
                     writer_root_id = self.writer.open()
                     if writer_root_id != self._root_id:
                         # TBD: same as above, need to deal with inconsistent root ids
-                        self.log.warning("writer root_id does not match reader root_id")
+                        msg = "writer root_id does not match reader root_id"
+                        self.log.error(msg)
+                        raise IOError(msg)
+                    else:
+                        self.log.debug('writer and reader root ids match!')
             else:
                 # no root id set by writer or reader, initialize now
-                self._root_id = createObjId(obj_type="groups")
+                root_id = createObjId(obj_type="groups")
+                self.log.debug(f"no reader or writer, creating new root id: {root_id}")
+                self._root_id = root_id
                 if self.writer:
                     # open writer in create mode now that we have a root id
                     self.writer.open()
@@ -215,6 +249,7 @@ def open(self):
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
+        
         self.flush()
         if self.writer:
             self.writer.close()
@@ -237,6 +272,7 @@ def __exit__(self, type, value, traceback):
 
     def getObjectById(self, obj_id):
         """ return object with given id """
+        self.log.debug(f"getObjectById {obj_id}")
         if obj_id not in self.db:
             if self.reader:
                 # load the obj from the reader
@@ -252,9 +288,6 @@ def getObjectIdByPath(self, h5path, parent_id=None):
         """ Return id for the given link path starting from parent_id if set,
         otherwise the root_id """
 
-        if self.closed:
-            self.open()  # initiate db
-
         if h5path == "/":
             return self.root_id  # just return root id
 
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 7b022c34..f56a5e34 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -12,10 +12,10 @@
 import logging
 import time
 
-from ..objid import getCollectionForId, getUuidFromId
+from ..objid import getCollectionForId
 
-from ..hdf5dtype import createDataType, isVlen
-from ..array_util import jsonToArray, bytesToArray, arrayToBytes, bytesArrayToList
+from ..hdf5dtype import isVlen
+from ..array_util import arrayToBytes, bytesArrayToList
 from ..dset_util import getNumElements
 from .. import selections
 from ..h5writer import H5Writer
@@ -117,6 +117,9 @@ def __init__(
 
     def open(self):
         """ setup domain for writing """
+        if not self._db_ref:
+            # no db set yet
+            raise IOError("DB not set")
 
         if self._http_conn:
             http_conn = self._http_conn
@@ -241,10 +244,10 @@ def createObjects(self, obj_ids):
 
         def multiPost(items):
             self.log.debug(f"hsds_writer> POST request {collection} for {len(items)} objects")
+            for item in items:
+                self.log.debug(f"hsds_writer> POST item: {item}")
             post_rsp = self.http_conn.POST("/" + collection, items)
             self.log.debug(f"hsds_writer> POST post_rsp.status_code: {post_rsp.status_code}")
-            if post_rsp.is_json:
-                self.log.debug(f"hsds_writer> post_rsp.json: {post_rsp.json()}")
             items.clear()
 
         self.log.debug(f"hsds_writer> createObjects, {len(obj_ids)} objects")
@@ -266,6 +269,12 @@ def multiPost(items):
                 if key == "updates":
                     # not part of the obj json
                     continue
+                if key == "attributes":
+                    # will update attribute later
+                    continue
+                if key == "links":
+                    # links will also be updated later
+                    continue
                 if key == "shape":
                     # just send the dims, not the shape json
                     shape_json = obj_json["shape"]
@@ -305,6 +314,17 @@ def multiPost(items):
         if dset_value_update_ids:
             self.updateValues(dset_value_update_ids)
 
+    def deleteObjects(self, obj_ids):
+        """ remove the given obj ids from the HSDS store """
+
+        # no multi-delete operation yet, so delete one by one
+        for obj_id in obj_ids:
+            collection = getCollectionForId(obj_id)
+            req = f"/{collection}/{obj_id}"
+            http_rsp = self.http_conn.DELETE(req)
+            if http_rsp.status_code not in (200, 410):
+                self.log.error(f"got {http_rsp.status_code} for DELETE {req}")
+
     def updateLinks(self, grp_ids):
         """ update any modified links of the given objects """
 
@@ -425,44 +445,59 @@ def updateValues(self, dset_ids):
                     self.updateValue(dset_id, sel, arr)
                 updates.clear()
 
+
     def flush(self):
         """ Write dirty items """
-
-        if not self.db:
+        if self.closed:
             # no db set yet
-            return False
+            self.log.warning("hsds_writer> flush called but no db")
+            return IOError("writer is closed")
+        if not self._http_conn:
+            self.log.warning("hsds_writer no http connection")
+            raise IOError("no http connection")
+        
         self.log.info("hsds_writer.flush()")
         self.log.debug(f"    new object count: {len(self.db.new_objects)}")
         self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
         self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
+        root_id = self._root_id
+        dirty_ids = self.db.dirty_objects.copy()
         if self._init:
             # initialize objects
-            self.log.debug(f"hsds_writer> flush -- init is True self.db: {self.db.db}")
-            for obj_id in self.db:
-                self.log.debug(f"init: {obj_id}")
-            self.createObjects(self.db.db.keys())
+            self.log.debug(f"hsds_writer> flush -- init is True self.db: {len(self.db.db)} objects")
+            self.db.readAll()
+            self.log.debug(f"hsds_writer>flush, init after readAll, {len(self.db.db)} objects")
+            obj_ids = set(self.db.db.keys())
+            obj_ids.remove(root_id)  # root group created when domain was
+            self.log.debug(f"init createObjects: {obj_ids}")
+            self.createObjects(obj_ids)
+            dirty_ids.update(obj_ids)
+            dirty_ids.add(root_id)  # add back root for attribute and link creation
             self._init = False
         elif self.db.new_objects:
             self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")
             for obj_id in self.db.new_objects:
                 self.log.debug(f"hsds_writer> new obj id: {obj_id}")
             self.createObjects(self.db.new_objects)
+            dirty_ids.update(self.db.new_objects)
         else:
             self.log.debug("no new objects to persist")
 
-        for obj_id in self.db.dirty_objects:
-            self.log.debug(f"hsds_writer> dirty object id: {obj_id}")
-            self.updateLinks(self.db.dirty_objects)
-            self.updateAttributes(self.db.dirty_objects)
-            self.updateValues(self.db.dirty_objects)
-
-        for obj_id in self.db.deleted_objects:
-            self.log.debug(f"deleted object: {obj_id}")
-
-        self._init = False
+        if dirty_ids:
+            self.log.debug(f"hsds_writer> dirty ids: {dirty_ids}")
+            self.updateLinks(dirty_ids)
+            self.updateAttributes(dirty_ids)
+            if not self._no_data:
+                self.updateValues(dirty_ids)
+
+        if self.db.deleted_objects:
+            self.log.debug(f"deleted ids: {self.db.deleted_objects}")
+            self.deleteObjects(self.db.deleted_objects)
+        
         self._last_flush_time = time.time()
         self.log.debug("hsds_writer> flush successful")
-        return True  # all objects written successfully
+        # all objects written successfully
+        return True
 
     def close(self):
         # over-ride of H5Writer method
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index 92d3499a..8cb5a39c 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -40,15 +40,15 @@ def __init__(
 
     def flush(self):
         """ Write dirty items """
-        # json writer doesn't support incremental updates, so we'll wait
-        # for close to write out database
+         
         if not self._root_id:
             msg = "flush called prior to open"
             self.log.warning(msg)
             raise IOError(msg)
 
         self.log.info("flush")
-        return False
+        self.dumpFile()
+        return True
 
     def open(self):
         """ file open """
@@ -61,7 +61,8 @@ def open(self):
 
     def close(self):
         """ close storage handle """
-        self.dumpFile()
+        self.flush()
+        self._root_id = None
 
     def isClosed(self):
         """ return closed status """
@@ -277,6 +278,7 @@ def dumpFile(self):
         self.json["apiVersion"] = db_version_info["hdf5-json-version"]
         self.json["root"] = getUuidFromId(self._root_uuid)
 
+
         self.updateAliasList()  # create alias_db with obj_id to alias list dict
 
         self.dumpGroups()
diff --git a/test/integ/h5tojson_test.py b/test/integ/h5tojson_test.py
index 5be40c84..8519a5d4 100644
--- a/test/integ/h5tojson_test.py
+++ b/test/integ/h5tojson_test.py
@@ -119,13 +119,13 @@
     out_file = os.path.join(out_dir, split_ext[0] + ".json")
     if not os.path.exists(file_path):
         sys.exit("file: " + file_path + " not found")
-    cmd = "python ../../src/h5json/h5tojson/h5tojson.py " + file_path + " >" + out_file
+    cmd = "python ../../src/h5json/apps/h5tojson.py " + file_path + " >" + out_file
     print("cmd:", cmd)
     rc = os.system(cmd)
     if rc != 0:
         sys.exit("h5tojson failed converting: " + test_file)
 
-    cmd = "python ../../src/h5json/validator/validator.py " + out_file
+    cmd = "python ../../src/h5json/apps/validator.py " + out_file
     print("cmd:", cmd)
     if rc != 0:
         sys.exit("HDF5/JSON validation failed for: " + out_file)
diff --git a/test/integ/jsontoh5_test.py b/test/integ/jsontoh5_test.py
index 3be3a3b7..ee0325d5 100644
--- a/test/integ/jsontoh5_test.py
+++ b/test/integ/jsontoh5_test.py
@@ -119,7 +119,7 @@
     hdf5_version_tuple[1] == 8 and hdf5_version_tuple[2] > 14
 ):
     # add in additional test files
-    print("adding library version dependendent files")
+    print("adding library version dependent files")
     test_files = list(test_files)
     for filename in test_files_latest:
         test_files.append(filename)
@@ -131,7 +131,7 @@
     out_file = os.path.join(out_dir, split_ext[0] + ".h5")
     if not os.path.exists(file_path):
         sys.exit("file: " + file_path + " not found")
-    cmd = "python ../../src/h5json/jsontoh5/jsontoh5.py " + file_path + " " + out_file
+    cmd = "python ../../src/h5json/apps/jsontoh5.py " + file_path + " " + out_file
     print("cmd:", cmd)
     rc = os.system(cmd)
     if rc != 0:
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index f49a86a8..bca00f2c 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -38,59 +38,57 @@ def __init__(self, *args, **kwargs):
 
     def testSimple(self):
         filepath = "data/json/tall.json"
-        kwargs = {"app_logger": self.log}
-        with Hdf5db(**kwargs) as db:
-            h5_reader = H5JsonReader(filepath, **kwargs)
-            db.reader = h5_reader
-            root_id = db.getObjectIdByPath("/")
-            root_json = db.getObjectById(root_id)
+        db = Hdf5db(app_logger=self.log)
+        db.reader = H5JsonReader(filepath, app_logger=self.log)
+        root_id = db.open()
+        root_json = db.getObjectById(root_id)
 
-            root_attrs = root_json["attributes"]
-            self.assertEqual(len(root_attrs), 2)
-            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
-            root_links = root_json["links"]
-            self.assertEqual(len(root_links), 2)
-            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
-            g1_link = root_links["g1"]
-            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
-            g1_id = g1_link["id"]
-            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
-            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
-            dset_json = db.getObjectById(dset111_id)
-            dset_type = dset_json["type"]
-            self.assertEqual(dset_type["class"], "H5T_INTEGER")
-            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 2)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
-            dset_shape = dset_json["shape"]
-            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(dset_shape["dims"], [10, 10])
-            sel_all = selections.select((10, 10), ...)
-            arr = db.getDatasetValues(dset111_id, sel_all)
-            self.assertTrue(isinstance(arr, np.ndarray))
-            self.assertEqual(arr.shape, (10, 10))
-            for i in range(10):
-                for j in range(10):
-                    v = arr[i, j]
-                    self.assertEqual(v, i * j)
+        root_attrs = root_json["attributes"]
+        self.assertEqual(len(root_attrs), 2)
+        self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+        root_links = root_json["links"]
+        self.assertEqual(len(root_links), 2)
+        self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+        g1_link = root_links["g1"]
+        self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+        g1_id = g1_link["id"]
+        self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+        dset_json = db.getObjectById(dset111_id)
+        dset_type = dset_json["type"]
+        self.assertEqual(dset_type["class"], "H5T_INTEGER")
+        self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 2)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+        dset_shape = dset_json["shape"]
+        self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(dset_shape["dims"], [10, 10])
+        sel_all = selections.select((10, 10), ...)
+        arr = db.getDatasetValues(dset111_id, sel_all)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.shape, (10, 10))
+        for i in range(10):
+            for j in range(10):
+                v = arr[i, j]
+                self.assertEqual(v, i * j)
 
-            # try adding an attribute
-            db.createAttribute(dset111_id, "attr3", value=42)
-            dset_json = db.getObjectById(dset111_id)
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 3)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
-            attr3_json = dset_attrs["attr3"]
-            attr3_shape = attr3_json["shape"]
-            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
-            attr3_type = attr3_json["type"]
-            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
-            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
-            attr3_value = attr3_json["value"]
-            self.assertEqual(attr3_value, 42)
+        # try adding an attribute
+        db.createAttribute(dset111_id, "attr3", value=42)
+        dset_json = db.getObjectById(dset111_id)
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 3)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+        attr3_json = dset_attrs["attr3"]
+        attr3_shape = attr3_json["shape"]
+        self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+        attr3_type = attr3_json["type"]
+        self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+        self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+        attr3_value = attr3_json["value"]
+        self.assertEqual(attr3_value, 42)
 
-            db.close()
+        db.close()
 
 
 if __name__ == "__main__":
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index 0f1fb59a..e8b5eb91 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -46,297 +46,298 @@ def testSimple(self):
 
         filepath = "test/unit/out/h5json_writer_testSimple.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
-            db.createAttribute(root_id, "attr2", 42)
-            g1_id = db.createGroup()
-            db.createHardLink(root_id, "g1", g1_id)
-            g2_id = db.createGroup()
-            db.createHardLink(root_id, "g2", g2_id)
-
-            g1_1_id = db.createGroup()
-            db.createHardLink(g1_id, "g1.1", g1_1_id)
-            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-            arr = np.zeros((10, 10), dtype=np.int32)
-            for i in range(10):
-                for j in range(10):
-                    arr[i, j] = i * j
-            sel_all = selections.select((10, 10), ...)
-            db.setDatasetValues(dset_111_id, sel_all, arr)
-            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
-            db.createSoftLink(g2_id, "slink", "somewhere")
-            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
-            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
-            db.flush()
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
+        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+        db.createAttribute(root_id, "attr2", 42)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        g2_id = db.createGroup()
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db.flush()
 
     def testNullSpaceAttribute(self):
 
         filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in item)
-            shape_item = item["shape"]
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_NULL")
-            self.assertTrue(item["created"] > time.time() - 1.0)
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(value, None)
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in item)
+        shape_item = item["shape"]
+        self.assertTrue("class" in shape_item)
+        self.assertEqual(shape_item["class"], "H5S_NULL")
+        self.assertTrue(item["created"] > time.time() - 1.0)
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(value, None)
 
     def testScalarAttribute(self):
         filepath = "test/unit/out/h5json_writer_testScalarAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            dims = ()
-            value = 42
-            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
-            self.assertEqual(item["value"], 42)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        dims = ()
+        value = 42
+        db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+        self.assertEqual(item["value"], 42)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        shape = item["shape"]
+        self.assertEqual(shape["class"], "H5S_SCALAR")
+
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
 
     def testFixedStringAttribute(self):
         filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            value = "Hello, world!"
-            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["length"], 13)
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            ret_value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(ret_value, b'Hello, world!')
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        value = "Hello, world!"
+        db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["length"], 13)
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(ret_value, b'Hello, world!')
 
     def testVlenAsciiAttribute(self):
         filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-
-            value = b"Hello, world!"
-            dt = special_dtype(vlen=bytes)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+
+        value = b"Hello, world!"
+        dt = special_dtype(vlen=bytes)
+
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
 
     def testVlenUtf8Attribute(self):
         filepath = "test/unit/out/h5json_writer_testVlenutf8Attribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-
-            value = b"Hello, world!"
-            dt = special_dtype(vlen=str)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+
+        value = b"Hello, world!"
+        dt = special_dtype(vlen=str)
+
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
 
     def testIntAttribute(self):
         filepath = "test/unit/out/h5json_writer_testIntAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            value = [2, 3, 5, 7, 11]
-            db.createAttribute(root_id, "A1", value, dtype=np.int16)
-            item = db.getAttribute(root_id, "A1")
-            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(item_shape["dims"], [5,])
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        value = [2, 3, 5, 7, 11]
+        db.createAttribute(root_id, "A1", value, dtype=np.int16)
+        item = db.getAttribute(root_id, "A1")
+        self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(item_shape["dims"], [5,])
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I16LE")
 
     def testCreateReferenceAttribute(self):
         filepath = "test/unit/out/h5json_writer_testCreateReferenceAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
 
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
 
-            dt = special_dtype(ref=Reference)
+        dt = special_dtype(ref=Reference)
 
-            ds1_ref = "datasets/" + dset_id
-            value = [ds1_ref,]
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            item = db.getAttribute(root_id, "A1")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in attr)
+        ds1_ref = "datasets/" + dset_id
+        value = [ds1_ref,]
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        item = db.getAttribute(root_id, "A1")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in attr)
 
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
-            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
-            attr_value = item["value"]
-            self.assertEqual(len(attr_value), 1)
-            self.assertEqual(attr_value[0], ds1_ref)
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+        self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+        attr_value = item["value"]
+        self.assertEqual(len(attr_value), 1)
+        self.assertEqual(attr_value[0], ds1_ref)
 
     def testCreateVlenReferenceAttribute(self):
         filepath = "test/unit/out/h5json_writer_testVlenReferenceAttribute.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
-            grp_id = db.createGroup()
-            db.createHardLink(root_id, "G1", grp_id)
-
-            dt_base = special_dtype(ref=Reference)
-            dt = special_dtype(vlen=dt_base)
-
-            ds1_ref = "datasets/" + dset_id
-            grp_ref = "groups/" + grp_id
-            ref_arr = np.zeros((2,), dtype=dt_base)
-            ref_arr[0] = ds1_ref
-            ref_arr[1] = grp_ref
-            vlen_arr = np.zeros((), dtype=dt)
-            vlen_arr[()] = ref_arr
-
-            db.createAttribute(root_id, "A1", vlen_arr)
-            item = db.getAttribute(root_id, "A1")
-
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_VLEN")
-            self.assertEqual(item_type["size"], "H5T_VARIABLE")
-            base_type = item_type["base"]
-            self.assertEqual(base_type["class"], "H5T_REFERENCE")
-            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
-
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        grp_id = db.createGroup()
+        db.createHardLink(root_id, "G1", grp_id)
+
+        dt_base = special_dtype(ref=Reference)
+        dt = special_dtype(vlen=dt_base)
+
+        ds1_ref = "datasets/" + dset_id
+        grp_ref = "groups/" + grp_id
+        ref_arr = np.zeros((2,), dtype=dt_base)
+        ref_arr[0] = ds1_ref
+        ref_arr[1] = grp_ref
+        vlen_arr = np.zeros((), dtype=dt)
+        vlen_arr[()] = ref_arr
+
+        db.createAttribute(root_id, "A1", vlen_arr)
+        item = db.getAttribute(root_id, "A1")
+
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_VLEN")
+        self.assertEqual(item_type["size"], "H5T_VARIABLE")
+        base_type = item_type["base"]
+        self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SCALAR")
 
     def testCommittedType(self):
         filepath = "test/unit/out/h5json_writer_testCommittedType.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-            dt = np.dtype("S15")
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+        dt = np.dtype("S15")
 
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
 
-            item_type = item["type"]
+        item_type = item["type"]
 
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 15)
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item_type["length"], 15)
 
-            # create an attribute using the committed type
-            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], "hello world!")
+        # create an attribute using the committed type
+        db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], "hello world!")
 
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_STRING")
-            self.assertEqual(attr_type["length"], 15)
-            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_STRING")
+        self.assertEqual(attr_type["length"], 15)
+        self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
     def testCommittedCompoundType(self):
         filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.h5"
 
-        with Hdf5db(app_logger=self.log) as db:
-            db.writer = H5JsonWriter(filepath, app_logger=self.log)
-            root_id = db.getObjectIdByPath("/")
-
-            dt_str = special_dtype(vlen=str)
-            fields = []
-            fields.append(("field_1", np.dtype(">i8")))
-            fields.append(("field_2", ">f8"))
-            fields.append(("field_3", np.dtype("S15")))
-            fields.append(("field_4", dt_str))
-            dt = np.dtype(fields)
-
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-
-            item_type = item["type"]
-
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            fields = item_type["fields"]
-            self.assertEqual(len(fields), 4)
-
-            # create an attribute using the committed type
-            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
-            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], list(attr_value))
-            attr_shape = attr["shape"]
-            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
-
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertTrue(isinstance(value, np.ndarray))
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5JsonWriter(filepath, app_logger=self.log)
+        root_id = db.open()
+
+        dt_str = special_dtype(vlen=str)
+        fields = []
+        fields.append(("field_1", np.dtype(">i8")))
+        fields.append(("field_2", ">f8"))
+        fields.append(("field_3", np.dtype("S15")))
+        fields.append(("field_4", dt_str))
+        dt = np.dtype(fields)
+
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+
+        item_type = item["type"]
+
+        self.assertEqual(item_type["class"], "H5T_COMPOUND")
+        fields = item_type["fields"]
+        self.assertEqual(len(fields), 4)
+
+        # create an attribute using the committed type
+        attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+        db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], list(attr_value))
+        attr_shape = attr["shape"]
+        self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(value, np.ndarray))
 
 
 if __name__ == "__main__":
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index 7c11c4f5..a3d946d9 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -12,6 +12,7 @@
 import unittest
 
 import logging
+import time
 from h5json import Hdf5db
 from h5json.h5pystore.h5py_reader import H5pyReader
 
@@ -27,7 +28,7 @@ def __init__(self, *args, **kwargs):
         else:
             lhStdout = None
 
-        self.log.setLevel(logging.INFO)
+        self.log.setLevel(logging.DEBUG)
         handler = logging.FileHandler("./hdf5dbtest.log")
         # add handler to logger
         self.log.addHandler(handler)
@@ -37,50 +38,58 @@ def __init__(self, *args, **kwargs):
 
     def testSimple(self):
         filepath = "data/hdf5/tall.h5"
-        kwargs = {"app_logger": self.log}
-        with Hdf5db(h5_reader=H5pyReader(filepath, **kwargs), **kwargs) as db:
-            root_id = db.getObjectIdByPath("/")
-            print("got root_id:", root_id)
-            root_json = db.getObjectById(root_id)
+        db = Hdf5db(app_logger=self.log)
+        db.reader = H5pyReader(filepath, app_logger=self.log)
+        root_id = db.open()
+        print("got root_id:", root_id)
+        root_json = db.getObjectById(root_id)
+        print("got root_json:", root_json)
+        root_attrs = root_json["attributes"]
+        self.assertEqual(len(root_attrs), 2)
+        self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+        root_links = root_json["links"]
+        self.assertEqual(len(root_links), 2)
+        self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+        g1_link = root_links["g1"]
+        self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+        self.assertTrue("created" in g1_link)
+        g1_created = g1_link["created"]
+        now = time.time()
+        self.assertTrue(g1_created < now)
+        g1_id = g1_link["id"]
+        self.assertTrue(g1_id)
+        self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+        dset_json = db.getObjectById(dset111_id)
+        dset_type = dset_json["type"]
+        self.assertEqual(dset_type["class"], "H5T_INTEGER")
+        self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 2)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+        attr1_json = dset_attrs["attr1"]
+        for k in ("type", "shape", "value", "created"):
+            self.assertTrue(k in attr1_json)
+        dset_shape = dset_json["shape"]
+        self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(dset_shape["dims"], [10, 10])
 
-            root_attrs = root_json["attributes"]
-            self.assertEqual(len(root_attrs), 2)
-            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
-            root_links = root_json["links"]
-            self.assertEqual(len(root_links), 2)
-            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
-            g1_link = root_links["g1"]
-            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
-            g1_id = g1_link["id"]
-            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
-            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
-            dset_json = db.getObjectById(dset111_id)
-            dset_type = dset_json["type"]
-            self.assertEqual(dset_type["class"], "H5T_INTEGER")
-            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 2)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
-            dset_shape = dset_json["shape"]
-            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(dset_shape["dims"], [10, 10])
+        # try adding an attribute
+        db.createAttribute(dset111_id, "attr3", value=42)
+        dset_json = db.getObjectById(dset111_id)
+        dset_attrs = dset_json["attributes"]
+        self.assertEqual(len(dset_attrs), 3)
+        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+        attr3_json = dset_attrs["attr3"]
+        attr3_shape = attr3_json["shape"]
+        self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+        attr3_type = attr3_json["type"]
+        self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+        self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+        attr3_value = attr3_json["value"]
+        self.assertEqual(attr3_value, 42)
 
-            # try adding an attribute
-            db.createAttribute(dset111_id, "attr3", value=42)
-            dset_json = db.getObjectById(dset111_id)
-            dset_attrs = dset_json["attributes"]
-            self.assertEqual(len(dset_attrs), 3)
-            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
-            attr3_json = dset_attrs["attr3"]
-            attr3_shape = attr3_json["shape"]
-            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
-            attr3_type = attr3_json["type"]
-            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
-            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
-            attr3_value = attr3_json["value"]
-            self.assertEqual(attr3_value, 42)
-
-            db.close()
+        db.close()
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index cbd7c879..3c1f3089 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -42,391 +42,432 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-    def testGroup(self):
+    def testOpen(self):
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        self.assertTrue(isSchema2Id(root_id))
+        self.assertTrue(isRootObjId(root_id))
+        self.assertFalse(db.closed)
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
+        db.close()
+        #self.assertTrue(db.closed)
+        obj_id = db.open()
+        self.assertEqual(obj_id, root_id)
+        db.close()
+
+    def testWith(self):
         with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            self.assertTrue(isSchema2Id(root_id))
+            root_id = db.open()
             self.assertTrue(isRootObjId(root_id))
 
-            g1_id = db.createGroup()
-            self.assertTrue(isSchema2Id(g1_id))
-            self.assertFalse(isRootObjId(g1_id))
-            self.assertTrue(isValidUuid(g1_id, obj_class="groups"))
-            db.createHardLink(root_id, "g1", g1_id)
-
-            g2_id = db.createGroup()
-            self.assertTrue(isSchema2Id(g2_id))
-            self.assertFalse(isRootObjId(g2_id))
-            self.assertTrue(isValidUuid(g2_id, obj_class="groups"))
-            db.createHardLink(root_id, "g2", g2_id)
-
-            g1_1_id = db.createGroup()
-            self.assertTrue(isSchema2Id(g1_1_id))
-            self.assertFalse(isRootObjId(g1_1_id))
-            self.assertTrue(isValidUuid(g1_1_id, obj_class="groups"))
-            db.createHardLink(g1_id, "g1.1", g1_1_id)
-
-            self.assertEqual(db.getObjectIdByPath("g1"), g1_id)
-            self.assertEqual(db.getObjectIdByPath("/g1"), g1_id)
-            self.assertEqual(db.getObjectIdByPath("g1/"), g1_id)
-
-            self.assertEqual(db.getObjectIdByPath("g1/g1.1"), g1_1_id)
-            self.assertEqual(db.getObjectIdByPath("/g1/g1.1"), g1_1_id)
-            self.assertEqual(db.getObjectIdByPath("g1/g1.1/"), g1_1_id)
-
-            grp1_json = db.getObjectById(g1_id)
-            self.assertTrue("links" in grp1_json)
-            g1_links = grp1_json["links"]
-            self.assertTrue("g1.1" in g1_links)
-            g1_1_link = db.getLink(g1_id, "g1.1")
-            self.assertEqual(g1_1_link["class"], "H5L_TYPE_HARD")
-            self.assertEqual(g1_1_link["id"], g1_1_id)
-            self.assertTrue(g1_1_link["created"] > time.time() - 1.0)
-
-            db.createSoftLink(g2_id, "slink", "somewhere")
-            soft_link = db.getLink(g2_id, "slink")
-            self.assertEqual(soft_link["class"], "H5L_TYPE_SOFT")
-            self.assertEqual(soft_link["h5path"], "somewhere")
-            self.assertTrue(soft_link["created"] > time.time() - 1.0)
-
-            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
-            ext_link = db.getLink(g2_id, "extlink")
-            self.assertEqual(ext_link["class"], "H5L_TYPE_EXTERNAL")
-            self.assertEqual(ext_link["h5path"], "somewhere")
-            self.assertEqual(ext_link["file"], "someplace")
-            self.assertTrue(ext_link["created"] > time.time() - 1.0)
-
-            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
-            cust_link = db.getLink(g2_id, "cust")
-            self.assertEqual(cust_link["class"], "H5L_TYPE_USER_DEFINED")
-            self.assertEqual(cust_link["foo"], "bar")
-            self.assertTrue(cust_link["created"] > time.time() - 1.0)
-
-            links = db.getLinks(g2_id)
-            self.assertEqual(len(links), 3)
-            for title in "slink", "extlink", "cust":
-                self.assertTrue(title in links)
-
-            db.deleteLink(g2_id, "cust")
-            links = db.getLinks(g2_id)
-            self.assertEqual(len(links), 2)
-            for title in "slink", "extlink":
-                self.assertTrue(title in links)
-
-            try:
-                db.getObjectIdByPath("/g1/foo")
-                self.assertTrue(False)
-            except KeyError:
-                pass  # expected
-
-            ret = db.getLink(g2_id, "not_a_link")
-            self.assertTrue(ret is None)
+    def testGroup(self):
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+
+        g1_id = db.createGroup()
+        self.assertTrue(isSchema2Id(g1_id))
+        self.assertFalse(isRootObjId(g1_id))
+        self.assertTrue(isValidUuid(g1_id, obj_class="groups"))
+        db.createHardLink(root_id, "g1", g1_id)
+
+        g2_id = db.createGroup()
+        self.assertTrue(isSchema2Id(g2_id))
+        self.assertFalse(isRootObjId(g2_id))
+        self.assertTrue(isValidUuid(g2_id, obj_class="groups"))
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        self.assertTrue(isSchema2Id(g1_1_id))
+        self.assertFalse(isRootObjId(g1_1_id))
+        self.assertTrue(isValidUuid(g1_1_id, obj_class="groups"))
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+
+        self.assertEqual(db.getObjectIdByPath("g1"), g1_id)
+        self.assertEqual(db.getObjectIdByPath("/g1"), g1_id)
+        self.assertEqual(db.getObjectIdByPath("g1/"), g1_id)
+
+        self.assertEqual(db.getObjectIdByPath("g1/g1.1"), g1_1_id)
+        self.assertEqual(db.getObjectIdByPath("/g1/g1.1"), g1_1_id)
+        self.assertEqual(db.getObjectIdByPath("g1/g1.1/"), g1_1_id)
+
+        grp1_json = db.getObjectById(g1_id)
+        self.assertTrue("links" in grp1_json)
+        g1_links = grp1_json["links"]
+        self.assertTrue("g1.1" in g1_links)
+        g1_1_link = db.getLink(g1_id, "g1.1")
+        self.assertEqual(g1_1_link["class"], "H5L_TYPE_HARD")
+        self.assertEqual(g1_1_link["id"], g1_1_id)
+        self.assertTrue(g1_1_link["created"] > time.time() - 1.0)
+
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        soft_link = db.getLink(g2_id, "slink")
+        self.assertEqual(soft_link["class"], "H5L_TYPE_SOFT")
+        self.assertEqual(soft_link["h5path"], "somewhere")
+        self.assertTrue(soft_link["created"] > time.time() - 1.0)
+
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        ext_link = db.getLink(g2_id, "extlink")
+        self.assertEqual(ext_link["class"], "H5L_TYPE_EXTERNAL")
+        self.assertEqual(ext_link["h5path"], "somewhere")
+        self.assertEqual(ext_link["file"], "someplace")
+        self.assertTrue(ext_link["created"] > time.time() - 1.0)
+
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        cust_link = db.getLink(g2_id, "cust")
+        self.assertEqual(cust_link["class"], "H5L_TYPE_USER_DEFINED")
+        self.assertEqual(cust_link["foo"], "bar")
+        self.assertTrue(cust_link["created"] > time.time() - 1.0)
+
+        links = db.getLinks(g2_id)
+        self.assertEqual(len(links), 3)
+        for title in "slink", "extlink", "cust":
+            self.assertTrue(title in links)
+
+        db.deleteLink(g2_id, "cust")
+        links = db.getLinks(g2_id)
+        self.assertEqual(len(links), 2)
+        for title in "slink", "extlink":
+            self.assertTrue(title in links)
+
+        try:
+            db.getObjectIdByPath("/g1/foo")
+            self.assertTrue(False)
+        except KeyError:
+            pass  # expected
+
+        ret = db.getLink(g2_id, "not_a_link")
+        self.assertTrue(ret is None)
+        db.close()
 
     def testNullSpaceAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in item)
-            shape_item = item["shape"]
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_NULL")
-            self.assertTrue(item["created"] > time.time() - 1.0)
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(value, None)
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in item)
+        shape_item = item["shape"]
+        self.assertTrue("class" in shape_item)
+        self.assertEqual(shape_item["class"], "H5S_NULL")
+        self.assertTrue(item["created"] > time.time() - 1.0)
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(value, None)
+        db.close()
 
     def testScalarAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            dims = ()
-            value = 42
-            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
-            self.assertEqual(item["value"], 42)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dims = ()
+        value = 42
+        db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+        self.assertEqual(item["value"], 42)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        shape = item["shape"]
+        self.assertEqual(shape["class"], "H5S_SCALAR")
+
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db.close()
 
     def testFixedStringAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            value = "Hello, world!"
-            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["length"], 13)
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            ret_value = db.getAttributeValue(root_id, "A1")
-            self.assertEqual(ret_value, value.encode("ascii"))
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        value = "Hello, world!"
+        db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["length"], 13)
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(ret_value, value.encode("ascii"))
+        db.close()
 
     def testVlenAsciiAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-
-            value = b"Hello, world!"
-            dt = special_dtype(vlen=bytes)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+
+        value = b"Hello, world!"
+        dt = special_dtype(vlen=bytes)
+
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
 
     def testVlenUtf8Attribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-
-            value = b"Hello, world!"
-            dt = special_dtype(vlen=str)
-
-            # write the attribute
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            # read it back
-            item = db.getAttribute(root_id, "A1")
-            shape_json = item["shape"]
-            self.assertEqual(shape_json["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+
+        value = b"Hello, world!"
+        dt = special_dtype(vlen=str)
+
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
 
     def testIntAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            value = [2, 3, 5, 7, 11]
-            db.createAttribute(root_id, "A1", value, dtype=np.int16)
-            item = db.getAttribute(root_id, "A1")
-            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
-            self.assertEqual(item_shape["dims"], [5,])
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_INTEGER")
-            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        value = [2, 3, 5, 7, 11]
+        db.createAttribute(root_id, "A1", value, dtype=np.int16)
+        item = db.getAttribute(root_id, "A1")
+        self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(item_shape["dims"], [5,])
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db.close()
 
     def testCreateReferenceAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
 
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
 
-            dt = special_dtype(ref=Reference)
+        dt = special_dtype(ref=Reference)
 
-            ds1_ref = "datasets/" + dset_id
-            value = [ds1_ref,]
-            db.createAttribute(root_id, "A1", value, dtype=dt)
-            item = db.getAttribute(root_id, "A1")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertTrue("shape" in attr)
+        ds1_ref = "datasets/" + dset_id
+        value = [ds1_ref,]
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        item = db.getAttribute(root_id, "A1")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in attr)
 
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
-            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
-            attr_value = item["value"]
-            self.assertEqual(len(attr_value), 1)
-            self.assertEqual(attr_value[0], ds1_ref)
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+        self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+        attr_value = item["value"]
+        self.assertEqual(len(attr_value), 1)
+        self.assertEqual(attr_value[0], ds1_ref)
+
+        db.close()
 
     def testCreateVlenReferenceAttribute(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape=(), dtype=np.int32)
-            db.createHardLink(root_id, "DS1", dset_id)
-            grp_id = db.createGroup()
-            db.createHardLink(root_id, "G1", grp_id)
-
-            dt_base = special_dtype(ref=Reference)
-            dt = special_dtype(vlen=dt_base)
-
-            ds1_ref = "datasets/" + dset_id
-            grp_ref = "groups/" + grp_id
-            ref_arr = np.zeros((2,), dtype=dt_base)
-            ref_arr[0] = ds1_ref
-            ref_arr[1] = grp_ref
-            vlen_arr = np.zeros((), dtype=dt)
-            vlen_arr[()] = ref_arr
-
-            db.createAttribute(root_id, "A1", vlen_arr)
-            item = db.getAttribute(root_id, "A1")
-
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_VLEN")
-            self.assertEqual(item_type["size"], "H5T_VARIABLE")
-            base_type = item_type["base"]
-            self.assertEqual(base_type["class"], "H5T_REFERENCE")
-            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
-
-            item_shape = item["shape"]
-            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        grp_id = db.createGroup()
+        db.createHardLink(root_id, "G1", grp_id)
+
+        dt_base = special_dtype(ref=Reference)
+        dt = special_dtype(vlen=dt_base)
+
+        ds1_ref = "datasets/" + dset_id
+        grp_ref = "groups/" + grp_id
+        ref_arr = np.zeros((2,), dtype=dt_base)
+        ref_arr[0] = ds1_ref
+        ref_arr[1] = grp_ref
+        vlen_arr = np.zeros((), dtype=dt)
+        vlen_arr[()] = ref_arr
+
+        db.createAttribute(root_id, "A1", vlen_arr)
+        item = db.getAttribute(root_id, "A1")
+
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_VLEN")
+        self.assertEqual(item_type["size"], "H5T_VARIABLE")
+        base_type = item_type["base"]
+        self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SCALAR")
+
+        db.close()
 
     def testCommittedType(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            dt = np.dtype("S15")
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dt = np.dtype("S15")
+
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
 
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
+        item_type = item["type"]
 
-            item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item_type["length"], 15)
 
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 15)
+        # create an attribute using the committed type
+        db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], "hello world!")
 
-            # create an attribute using the committed type
-            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], "hello world!")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_STRING")
+        self.assertEqual(attr_type["length"], 15)
+        self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
 
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_STRING")
-            self.assertEqual(attr_type["length"], 15)
-            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        db.close()
 
     def testCommittedCompoundType(self):
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-
-            dt_str = special_dtype(vlen=str)
-            fields = []
-            fields.append(("field_1", np.dtype(">i8")))
-            fields.append(("field_2", ">f8"))
-            fields.append(("field_3", np.dtype("S15")))
-            fields.append(("field_4", dt_str))
-            dt = np.dtype(fields)
-
-            ctype_id = db.createCommittedType(dt)
-            db.createHardLink(root_id, "ctype", ctype_id)
-            item = db.getObjectById(ctype_id)
-            now = int(time.time())
-            self.assertTrue(item["created"] > now - 1)
-
-            item_type = item["type"]
-
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            fields = item_type["fields"]
-            self.assertEqual(len(fields), 4)
-
-            # create an attribute using the committed type
-            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
-            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
-            attr = db.getAttribute(root_id, "A1")
-            self.assertEqual(attr["value"], list(attr_value))
-            attr_shape = attr["shape"]
-            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
-
-            attr_type = attr["type"]
-            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
-
-            value = db.getAttributeValue(root_id, "A1")
-            self.assertTrue(isinstance(value, np.ndarray))
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+
+        dt_str = special_dtype(vlen=str)
+        fields = []
+        fields.append(("field_1", np.dtype(">i8")))
+        fields.append(("field_2", ">f8"))
+        fields.append(("field_3", np.dtype("S15")))
+        fields.append(("field_4", dt_str))
+        dt = np.dtype(fields)
+
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+
+        item_type = item["type"]
+
+        self.assertEqual(item_type["class"], "H5T_COMPOUND")
+        fields = item_type["fields"]
+        self.assertEqual(len(fields), 4)
+
+        # create an attribute using the committed type
+        attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+        db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], list(attr_value))
+        attr_shape = attr["shape"]
+        self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(value, np.ndarray))
+
+        db.close()
 
     def testSimpleDataset(self):
-        with Hdf5db(app_logger=self.log) as db:
-            nrows = 8
-            ncols = 10
-            shape = (nrows, ncols)
-            dtype = np.int32
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape, dtype=dtype)
-            db.createHardLink(root_id, "dset", dset_id)
-            db.createAttribute(dset_id, "a1", "Hello, world")
-            sel_all = selections.select(shape, ...)
-            arr = db.getDatasetValues(dset_id, sel_all)
-            self.assertEqual(arr.dtype, dtype)
-            self.assertEqual(arr.shape, shape)
-            self.assertEqual(arr.min(), 0)
-            self.assertEqual(arr.max(), 0)
-            row = np.zeros((ncols,), dtype=dtype)
-            for i in range(nrows):
-                row[:] = list(range(i * 10, (i + 1) * 10))
-                row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
-                db.setDatasetValues(dset_id, row_sel, row)
-            arr = db.getDatasetValues(dset_id, sel_all)
-            for i in range(nrows):
-                row = np.array(list(range(i * 10, (i + 1) * 10)), dtype=dtype)
-                np.testing.assert_array_equal(arr[i, :], row)
+        nrows = 8
+        ncols = 10
+        shape = (nrows, ncols)
+        dtype = np.int32
+        
+        db = Hdf5db(app_logger=self.log)  
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        db.createAttribute(dset_id, "a1", "Hello, world")
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+        self.assertEqual(arr.min(), 0)
+        self.assertEqual(arr.max(), 0)
+        row = np.zeros((ncols,), dtype=dtype)
+        for i in range(nrows):
+            row[:] = list(range(i * 10, (i + 1) * 10))
+            row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
+            db.setDatasetValues(dset_id, row_sel, row)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        for i in range(nrows):
+            row = np.array(list(range(i * 10, (i + 1) * 10)), dtype=dtype)
+            np.testing.assert_array_equal(arr[i, :], row)
+
+        db.close()
 
     def testScalarDataset(self):
         dtype = np.int32
-        with Hdf5db(app_logger=self.log) as db:
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset((), dtype=dtype)
-            db.createHardLink(root_id, "dset", dset_id)
-            db.createAttribute(dset_id, "a1", "Hello, world")
-            sel_all = selections.select((), ...)
-            arr = db.getDatasetValues(dset_id, sel_all)
-            self.assertEqual(arr.dtype, dtype)
-            self.assertEqual(arr.shape, ())
-            self.assertEqual(arr[()], 0)
-            db.setDatasetValues(dset_id, sel_all, np.array(42, dtype=dtype))
-            arr = db.getDatasetValues(dset_id, sel_all)
-            self.assertEqual(arr.dtype, dtype)
-            self.assertEqual(arr.shape, ())
-            self.assertEqual(arr.min(), 42)
-            self.assertEqual(arr.max(), 42)
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset((), dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        db.createAttribute(dset_id, "a1", "Hello, world")
+        sel_all = selections.select((), ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, ())
+        self.assertEqual(arr[()], 0)
+        db.setDatasetValues(dset_id, sel_all, np.array(42, dtype=dtype))
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, ())
+        self.assertEqual(arr.min(), 42)
+        self.assertEqual(arr.max(), 42)
+
+        db.close()
 
     def testResizableDataset(self):
-        with Hdf5db(app_logger=self.log) as db:
-            nrows = 8
-            ncols = 10
-            shape = (nrows, ncols)
-            dtype = np.int32
-            maxdims = (None, ncols * 2)
-            root_id = db.getObjectIdByPath("/")
-            dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
-            db.createHardLink(root_id, "dset", dset_id)
-            db.createAttribute(dset_id, "a1", "Hello, world")
-
-            # resize limited dimension
-            db.resizeDataset(dset_id, (nrows, ncols * 2))
-
-            # try to go beyond max extent
-            try:
-                db.resizeDataset(dset_id, (nrows, ncols * 3))
-                self.assertTrue(False)
-            except ValueError:
-                pass  # expected
-
-            # resize unlimited dimension
-            db.resizeDataset(dset_id, (nrows * 10, ncols))
+        nrows = 8
+        ncols = 10
+        shape = (nrows, ncols)
+        dtype = np.int32
+        maxdims = (None, ncols * 2)
+
+        db = Hdf5db(app_logger=self.log)
+            
+        root_id = db.open()
+        dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        db.createAttribute(dset_id, "a1", "Hello, world")
+
+        # resize limited dimension
+        db.resizeDataset(dset_id, (nrows, ncols * 2))
+
+        # try to go beyond max extent
+        try:
+            db.resizeDataset(dset_id, (nrows, ncols * 3))
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+        # resize unlimited dimension
+        db.resizeDataset(dset_id, (nrows * 10, ncols))
+
+        db.close()
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index 667a8bcd..fdbc7b7a 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -17,6 +17,7 @@
 from h5json import Hdf5db
 from h5json.hsdsstore.httpconn import HttpConn
 from h5json.hsdsstore.hsds_writer import HSDSWriter
+from h5json.h5pystore.h5py_reader import H5pyReader
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 
@@ -36,10 +37,10 @@ def __init__(self, *args, **kwargs):
 
     def testSimple(self):
 
-        domain_path = "/home/test_user1/writer_test.h5"
+        domain_path = "hdf5://home/test_user1/test/writer_test.h5"
 
         db = Hdf5db(app_logger=self.log)
-        db.writer = HSDSWriter(domain_path)
+        db.writer = HSDSWriter(domain_path, app_logger=self.log)
         root_id = db.open()
         http_conn = HttpConn(domain_path, mode='r', retries=1)
 
@@ -158,6 +159,51 @@ def testSimple(self):
         rsp_value = rsp_json["value"]
         self.assertEqual(rsp_value, 42)
 
+        db.close()
+
+    def testH5PyToHS(self):
+        # test reading from HDF5 file and writing to HSDS
+
+        file_path = "data/hdf5/tall.h5"
+        domain_path = "hdf5://home/test_user1/test/hsds_writer_test_tall.h5"
+         
+        db = Hdf5db(app_logger=self.log)
+        db.reader = H5pyReader(file_path)
+        db.writer = HSDSWriter(domain_path)
+        root_id = db.open()
+        #db.readAll()
+        root_json = db.getObjectById(root_id)
+        db.flush()
+
+        # validate - get the root group and see if counts are correct
+        http_conn = HttpConn(domain_path, mode='r', retries=1)
+        http_rsp = http_conn.GET(f"/groups/{root_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        root_json = http_rsp.json()
+        self.assertEqual(root_json["id"], root_id)
+        # attribute count should still be zero (hasn't been flushed yet)
+        self.assertEqual(root_json["attributeCount"], 2)
+        # same for link count
+        self.assertEqual(root_json["linkCount"], 2)
+
+        # get the g1 hard link
+        http_rsp = http_conn.GET(f"/groups/{root_id}/links/g1")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        g1_link = rsp_json["link"]
+        g1_id = g1_link["id"]
+
+        # get the g1 group json
+        http_rsp = http_conn.GET(f"/groups/{g1_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        g1_json = http_rsp.json()
+        self.assertEqual(g1_json["attributeCount"], 0)
+        self.assertEqual(g1_json["linkCount"], 2)
+
+
+
+
+
         db.close()
 
 

From 3d9003c334c7e6f6aa94fb22199493eaf2fc4bdb Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 11 Jul 2025 19:45:17 +0100
Subject: [PATCH 057/129] hsds writer updates

---
 src/h5json/apps/h5tohs.py             |   3 +
 src/h5json/h5pystore/h5py_reader.py   |  19 +-
 src/h5json/h5pystore/h5py_writer.py   |   1 -
 src/h5json/h5writer.py                |   2 +-
 src/h5json/hdf5db.py                  | 297 +++++++++++++++++++-------
 src/h5json/hsdsstore/hsds_writer.py   |   5 +-
 src/h5json/jsonstore/h5json_reader.py |   4 +-
 src/h5json/jsonstore/h5json_writer.py |   3 +-
 test/unit/h5json_reader_test.py       |   4 +-
 test/unit/h5py_writer_test.py         |  20 ++
 test/unit/hdf5db_test.py              |   8 +-
 test/unit/hsds_writer_test.py         |   7 +-
 12 files changed, 263 insertions(+), 110 deletions(-)

diff --git a/src/h5json/apps/h5tohs.py b/src/h5json/apps/h5tohs.py
index 4d1a8106..9853482e 100755
--- a/src/h5json/apps/h5tohs.py
+++ b/src/h5json/apps/h5tohs.py
@@ -17,10 +17,12 @@
 from h5json.hsdsstore.hsds_writer import HSDSWriter
 from h5json.h5pystore.h5py_reader import H5pyReader
 
+
 def usage():
     print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file> <hsds_domain>")
     sys.exit(0)
 
+
 def main():
     no_data = False
     filename = None
@@ -59,5 +61,6 @@ def main():
 
     db.close()  # close will trigger write to HSDS
 
+
 if __name__ == "__main__":
     main()
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 089f0f24..9aee273d 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -177,9 +177,6 @@ def open(self):
         self._id_map[self._root_id] = f
         addr = h5py.h5o.get_info(f.id).addr
         self._addr_map[addr] = self._root_id
-        #f.visititems(self.visit)
-
-        print("h5py_reader keys:", list(self.db.db.keys()))
 
         return self._root_id
 
@@ -268,7 +265,6 @@ def getAttribute(self, obj_id, name, include_data=True):
         else:
             pass  # no data
 
-        
         item['created'] = time.time()  # TBD: get attribute creation time from h5py?
         return item
 
@@ -314,7 +310,7 @@ def _getLink(self, parent, link_name):
                 item["id"] = None
             else:
                 item["id"] = self._addr_map[addr]
-            
+
         item['created'] = time.time()  # TBD: get the link creation time from h5py?
 
         return item
@@ -435,11 +431,11 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
         return creationProps
 
     def _getDataset(self, dset):
+        """ return json representation of the given dataset """
+
         self.log.info(f"getDataset alias: [{dset.name}]")
 
         item = {"alias": dset.name}
-        print("dset:", dset)
-        print("dset type:", type(dset))
         typeid = dset.id.get_type()
         if h5py.h5t.TypeID.committed(typeid):
             type_uuid = None
@@ -479,7 +475,7 @@ def _getDataset(self, dset):
         item["cpl"] = self._getHDF5DatasetCreationProperties(dset, type_item["class"])
 
         return item
-    
+
     def _getHardLinkIds(self, parent):
         """ create any ids for hard links of the group """
 
@@ -518,21 +514,18 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True):
         if obj_id not in self._id_map:
             raise KeyError(f"{obj_id} not found")
         h5obj = self._id_map[obj_id]
-        print("h5obj:", h5obj)
-        print("h5obj.name:", h5obj.name)
-        print("h5obj type:", type(h5obj))
         if isinstance(h5obj, h5py.Group):
             self._getHardLinkIds(h5obj)
             obj_json = self._getGroup(h5obj, include_links=include_links)
         elif isinstance(h5obj, h5py.Dataset):
             obj_json = self._getDataset(h5obj)
         elif isinstance(h5obj, h5py.Datatype):
-            obj_json = self._getDataset(h5obj)
+            obj_json = self._getDatatype(h5obj)
         else:
             msg = f"unexpected object type: {type(h5obj)}"
             self.log.error(msg)
             raise TypeError(msg)
-            
+
         if include_attrs:
             attributes = self.getAttributes(obj_id)
             obj_json["attributes"] = attributes
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 15d35bd4..14942c11 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -388,7 +388,6 @@ def updateAttributes(self, obj_id, obj):
                 continue
             self.createAttribute(obj, name, attr_json)
 
-        
     def flush(self):
         """ Write dirty items """
         if self.closed:
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index 3dfb8da8..8de1a277 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -67,7 +67,7 @@ def no_data(self):
     @abstractmethod
     def open(self):
         """ open storage handle, return root_id"""
-        return None
+        pass
 
     @abstractmethod
     def flush(self):
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 28eef18d..66c84311 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -22,6 +22,150 @@
 from .h5writer import H5Writer
 
 
+class H5NullReader(H5Reader):
+    """
+    This class can be used by HDF5DB as a default no-op reader
+    """
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        super().__init__(filepath, app_logger=app_logger)
+        self.log.debug("H5NullReader.__init__")
+
+        self._root_id = None
+        self._is_closed = True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+
+        if obj_id != self._root_id:
+            raise KeyError(f"{obj_id} not found")
+
+        # create a root group with no links or attributes
+        group_json = {"links": {}, "attributes": {}, "cpl": {}}
+        group_json["created"] = time.time()
+
+        return group_json
+
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        raise IOError("not supported")
+
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        # just return a zero array
+        arr = np.zeros(sel.shape, dtype=dtype)
+
+        return arr
+
+    def open(self):
+        """ Open data source for reading """
+        self.log.debug("H5NullReader open")
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+
+        if self._is_closed:
+            if not self._root_id:
+                if self.db.root_id:
+                    # use the db root id
+                    self._root_id = self.db.root_id
+                else:
+                    # create a new root id
+                    self._root_id = createObjId(obj_type="groups")
+            self._is_closed = False
+        return self._root_id
+
+    def close(self):
+        """ close any open handles to the storage """
+        self._is_closed = True
+
+    def isClosed(self):
+        """ return True if handle is closed """
+        return self._is_closed
+
+
+class H5NullWriter(H5Writer):
+    """
+    This class can be used by HDF5DB as a default no-op writer
+    """
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        if append:
+            raise IOError("append is not supprot for H5NullWriter")
+
+        super().__init__(filepath, no_data=no_data, app_logger=app_logger)
+        self.log.debug("H5NullWriter.__init__")
+        self._root_id = None
+        self._is_closed = True
+
+    def open(self):
+        """ open storage handle, return root_id"""
+        self.log.debug("H5NullWriter open")
+        if not self._is_closed:
+            return self._root_id  # already open
+
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+
+        if not self._root_id:
+            if self.db.root_id:
+                self._root_id = self.db.root_id
+            else:
+                self._root_id = createObjId(obj_type="groups")
+        self._is_closed = False
+        return self._root_id
+
+    def flush(self):
+        """ Write dirty items """
+        self.log.debug("H5NullWriter> flush")
+        # Null writer is unable to actually persist anything, so return False
+        return False
+
+    def close(self):
+        """ close storage handle """
+        self.log.debug("H5NullWriter.close")
+        self._is_closed = True
+
+    def isClosed(self):
+        """ return True if handle is closed """
+        return self._is_closed
+
+
 class Hdf5db:
     """
     This class is used to manage id lookup tables for primary HDF objects (Groups, Datasets,
@@ -79,23 +223,12 @@ def reader(self):
     @reader.setter
     def reader(self, value: H5Reader):
         """ set the reader """
-        if self._writer:
+        if self._writer and not self._writer.isClosed():
             self.flush()
-        if self._reader:
+        if self._reader and not self._reader.isClosed():
             self._reader.close()
         self._reader = value
         self._reader.set_db(self)
-        """
-        root_id = value.get_root_id()
-        if not root_id:
-            raise ValueError(f"reader {type(value)} unable to return root_id")
-        group_json = value.getObjectById(root_id)
-        if not group_json:
-            raise ValueError(f"reader {type(value)} unable to return group json")
-        self._reader = value
-        self._db[root_id] = group_json
-        self._root_id = root_id
-        """
 
     @property
     def writer(self):
@@ -156,17 +289,17 @@ def make_dirty(self, obj_id):
     def flush(self):
         """ write out any changes """
         self.log.debug("db.flush()")
-        if not self.writer:
-            return  # nothing to do
+        self._checkWriter()
         if not self.writer.flush():
             # flush not successful, don't clear dirty set
             self.log.error("writer flush failed")
-            raise IOError("writer flush failed")
+            return False
 
         # reset new, dirty and deleted sets
         self._new_objects = set()
         self._dirty_objects = set()
         self._deleted_objects = set()
+        return True
 
     def readAll(self):
         """ read all meta data objects from reader and save to db """
@@ -174,12 +307,7 @@ def readAll(self):
         self.log.debug("readAll")
         if self.closed:
             raise IOError("database is not open")
-        
-        if not self.reader:
-            self.log.debug("no reader set")
-            # no reader, nothing to do
-            return
-        
+
         obj_ids = set()
         obj_ids.add(self.root_id)
         while obj_ids:
@@ -198,50 +326,48 @@ def readAll(self):
     def open(self):
         """ open reader and writer if set """
         self.log.debug("db.open()")
-        if self.root_id:
-            self.log.debug("root id already set, re-open call")
-            if self.writer:
-                self.writer.open()
-            if self.reader:
-                self.reader.open()
+
+        if self.reader is None:
+            self.reader = H5NullReader(None, app_logger=self.log)
+            self._reader.set_db(self)
+
+        if self.writer is None:
+            self.writer = H5NullWriter(None, app_logger=self.log)
+            self._writer.set_db(self)
+
+        if not self.reader.isClosed():
+            self.log.debug("db is already opened")
+            raise IOError("db is already opened")
+            return self._root_id
+
+        if self.writer.append:
+            # append mode for the writer, first open writer and get the root id
+            self.log.debug("db.open, write append, getting root_id from writer")
+            writer_root_id = self.writer.open()
+            if self._root_id:
+                if writer_root_id != self._root_id:
+                    raise IOError("writer root id does not match reader root id")
+            else:
+                self._root_id = writer_root_id
+
+            # now open reader
+            reader_root_id = self.reader.open()
+            if reader_root_id != self._root_id:
+                raise IOError("writer root id does not match reader root id")
+
         else:
-            self.log.debug("db.open, getting root_id")
-
-            if self.writer and self.writer.append:
-                # append mode for the writer, open writer and get the root id
-                self.log.debug("db.open, write append, getting root_id from writer")
-                self._root_id = self.writer.open()
-                if self.reader:
-                    reader_root_id = self.reader.open()
-                    if reader_root_id != self._root_id:
-                        # TBD: need someway to reconcile if both reader and writer have
-                        # an potentiated idea on what there root id is
-                        self.log.warn("reader root_id does not match writer root_id")
-            elif self.reader:
-                self.log.debug("db.open, getting root_id from reader")
-                self._root_id = self.reader.open()
-                if self.writer:
-                    writer_root_id = self.writer.open()
-                    if writer_root_id != self._root_id:
-                        # TBD: same as above, need to deal with inconsistent root ids
-                        msg = "writer root_id does not match reader root_id"
-                        self.log.error(msg)
-                        raise IOError(msg)
-                    else:
-                        self.log.debug('writer and reader root ids match!')
+            # open reader first and get root id
+            reader_root_id = self.reader.open()
+            if self._root_id:
+                if reader_root_id != self._root_id:
+                    raise IOError("writer root id does not match reader root id")
             else:
-                # no root id set by writer or reader, initialize now
-                root_id = createObjId(obj_type="groups")
-                self.log.debug(f"no reader or writer, creating new root id: {root_id}")
-                self._root_id = root_id
-                if self.writer:
-                    # open writer in create mode now that we have a root id
-                    self.writer.open()
-
-                # create a root group just as a memory object
-                group_json = {"links": {}, "attributes": {}, "cpl": {}}
-                group_json["created"] = time.time()
-                self._db[self._root_id] = group_json
+                self._root_id = reader_root_id
+
+            # now open writer
+            writer_root_id = self.writer.open()
+            if writer_root_id != self._root_id:
+                raise IOError("writer root id does not match reader root id")
 
         self.log.debug(f"db.open() - returning root_id: {self._root_id}")
         return self._root_id
@@ -249,7 +375,7 @@ def open(self):
     def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
-        
+
         self.flush()
         if self.writer:
             self.writer.close()
@@ -258,7 +384,14 @@ def close(self):
 
     @property
     def closed(self):
-        return False if self.root_id else True
+        if self.reader:
+            return self.reader.isClosed()
+        elif self.writer:
+            return self.writer.isClosed()
+        elif self._root_id:
+            return True
+        else:
+            return False
 
     def __enter__(self):
         """ called on package init """
@@ -270,16 +403,28 @@ def __exit__(self, type, value, traceback):
         self.log.info("Hdf5db __exit")
         self.close()
 
+    def _checkReader(self):
+        """ check the reader is set and open """
+        if self.reader is None:
+            raise IOError("reader not set")
+        if self.reader.closed:
+            raise IOError("reader is closed")
+
+    def _checkWriter(self):
+        """ check the writer is set and open """
+        if self.writer is None:
+            raise IOError("writer not set")
+        if self.writer.closed:
+            raise IOError("writer is closed")
+
     def getObjectById(self, obj_id):
         """ return object with given id """
         self.log.debug(f"getObjectById {obj_id}")
+        self._checkReader()
         if obj_id not in self.db:
-            if self.reader:
-                # load the obj from the reader
-                obj_json = self.reader.getObjectById(obj_id)
-                self.db[obj_id] = obj_json
-            else:
-                raise KeyError(f"obj_id: {obj_id} not found")
+            # load the obj from the reader
+            obj_json = self.reader.getObjectById(obj_id)
+            self.db[obj_id] = obj_json
         obj_json = self.db[obj_id]
 
         return obj_json
@@ -538,6 +683,8 @@ def getDatasetValues(self, dset_id, sel):
         number of elements as the rank of the dataset.
         """
         self.log.info(f"getDatasetValues dset_id: {dset_id}, sel: {sel}")
+
+        self._checkReader()
         dset_json = self.getObjectById(dset_id)
         shape_json = dset_json["shape"]
         if not isinstance(sel, selections.Selection):
@@ -560,11 +707,7 @@ def getDatasetValues(self, dset_id, sel):
             rank = len(dims)
 
         dtype = self.getDtype(dset_json)
-        if self.reader:
-            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
-        else:
-            # TBD: Initialize with fill value if non-zero
-            arr = np.zeros(sel.shape, dtype=dtype)
+        arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
 
         if "updates" in dset_json:
             # apply any non-flushed changes that intersect the current selection
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index f56a5e34..4697093d 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -445,7 +445,6 @@ def updateValues(self, dset_ids):
                     self.updateValue(dset_id, sel, arr)
                 updates.clear()
 
-
     def flush(self):
         """ Write dirty items """
         if self.closed:
@@ -455,7 +454,7 @@ def flush(self):
         if not self._http_conn:
             self.log.warning("hsds_writer no http connection")
             raise IOError("no http connection")
-        
+
         self.log.info("hsds_writer.flush()")
         self.log.debug(f"    new object count: {len(self.db.new_objects)}")
         self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
@@ -493,7 +492,7 @@ def flush(self):
         if self.db.deleted_objects:
             self.log.debug(f"deleted ids: {self.db.deleted_objects}")
             self.deleteObjects(self.db.deleted_objects)
-        
+
         self._last_flush_time = time.time()
         self.log.debug("hsds_writer> flush successful")
         # all objects written successfully
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index 40f8e5e4..4fd1e0c8 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -62,10 +62,10 @@ def open(self):
         return self._root_id
 
     def close(self):
-        pass
+        self._h5json = None
 
     def isClosed(self):
-        return False if self._h5json else False
+        return False if self._h5json else True
 
     def get_root_id(self):
         """ Return root id """
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index 8cb5a39c..f5ede89f 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -40,7 +40,7 @@ def __init__(
 
     def flush(self):
         """ Write dirty items """
-         
+
         if not self._root_id:
             msg = "flush called prior to open"
             self.log.warning(msg)
@@ -278,7 +278,6 @@ def dumpFile(self):
         self.json["apiVersion"] = db_version_info["hdf5-json-version"]
         self.json["root"] = getUuidFromId(self._root_uuid)
 
-
         self.updateAliasList()  # create alias_db with obj_id to alias list dict
 
         self.dumpGroups()
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
index bca00f2c..5c190203 100644
--- a/test/unit/h5json_reader_test.py
+++ b/test/unit/h5json_reader_test.py
@@ -28,7 +28,7 @@ def __init__(self, *args, **kwargs):
         else:
             lhStdout = None
 
-        self.log.setLevel(logging.INFO)
+        self.log.setLevel(logging.DEBUG)
         handler = logging.FileHandler("./h5json_reader_test.log")
         # add handler to logger
         self.log.addHandler(handler)
@@ -40,7 +40,9 @@ def testSimple(self):
         filepath = "data/json/tall.json"
         db = Hdf5db(app_logger=self.log)
         db.reader = H5JsonReader(filepath, app_logger=self.log)
+        self.assertTrue(db.closed)
         root_id = db.open()
+        self.assertTrue(root_id)
         root_json = db.getObjectById(root_id)
 
         root_attrs = root_json["attributes"]
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index e51c4dba..e2763795 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -20,6 +20,7 @@
 from h5json.jsonstore.h5json_reader import H5JsonReader
 from h5json.h5pystore.h5py_writer import H5pyWriter
 from h5json.hdf5dtype import special_dtype, Reference
+from h5json.objid import isRootObjId, isSchema2Id
 from h5json import selections
 
 
@@ -46,6 +47,21 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
+    def testOpen(self):
+        filepath = "test/unit/out/h5py_writer_test_testOpen.h5"
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath)
+        root_id = db.open()
+        self.assertTrue(isSchema2Id(root_id))
+        self.assertTrue(isRootObjId(root_id))
+        self.assertFalse(db.closed)
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
+        db.close()
+        self.assertTrue(db.closed)
+        obj_id = db.open()
+        self.assertEqual(obj_id, root_id)
+        db.close()
+
     def testSimple(self):
 
         filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
@@ -518,6 +534,7 @@ def testReaderWithUpdate(self):
         db.open()
         # close should create everything the json reader read to the output file
         db.close()
+        self.assertTrue(db.closed)
 
         with h5py.File(file_out) as f:
             self.assertTrue("/g1/g1.1/dset1.1.1" in f)
@@ -525,8 +542,11 @@ def testReaderWithUpdate(self):
             self.assertEqual(len(dset111.attrs), 2)
 
         db.open()
+        self.assertFalse(db.closed)
         dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
         db.createAttribute(dset111_id, "attr3", "hello")
+        self.assertFalse(db.closed)
+        print("test - db.close()")
         db.close()
 
         with h5py.File(file_out) as f:
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 3c1f3089..c9c32969 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -50,7 +50,7 @@ def testOpen(self):
         self.assertFalse(db.closed)
         self.assertEqual(db.getObjectIdByPath("/"), root_id)
         db.close()
-        #self.assertTrue(db.closed)
+        self.assertTrue(db.closed)
         obj_id = db.open()
         self.assertEqual(obj_id, root_id)
         db.close()
@@ -394,8 +394,8 @@ def testSimpleDataset(self):
         ncols = 10
         shape = (nrows, ncols)
         dtype = np.int32
-        
-        db = Hdf5db(app_logger=self.log)  
+
+        db = Hdf5db(app_logger=self.log)
         root_id = db.open()
         dset_id = db.createDataset(shape, dtype=dtype)
         db.createHardLink(root_id, "dset", dset_id)
@@ -448,7 +448,7 @@ def testResizableDataset(self):
         maxdims = (None, ncols * 2)
 
         db = Hdf5db(app_logger=self.log)
-            
+
         root_id = db.open()
         dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
         db.createHardLink(root_id, "dset", dset_id)
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index fdbc7b7a..9557a9f8 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -166,12 +166,11 @@ def testH5PyToHS(self):
 
         file_path = "data/hdf5/tall.h5"
         domain_path = "hdf5://home/test_user1/test/hsds_writer_test_tall.h5"
-         
+
         db = Hdf5db(app_logger=self.log)
         db.reader = H5pyReader(file_path)
         db.writer = HSDSWriter(domain_path)
         root_id = db.open()
-        #db.readAll()
         root_json = db.getObjectById(root_id)
         db.flush()
 
@@ -200,10 +199,6 @@ def testH5PyToHS(self):
         self.assertEqual(g1_json["attributeCount"], 0)
         self.assertEqual(g1_json["linkCount"], 2)
 
-
-
-
-
         db.close()
 
 

From 74d3a6217a0eafb1d5c7f773bd74acadd0cd9a18 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Jul 2025 18:15:16 +0100
Subject: [PATCH 058/129] update datasetvalues for in init

---
 src/h5json/hsdsstore/hsds_writer.py | 34 +++++++++++++++++++-----
 test/unit/hsds_writer_test.py       | 40 +++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 4697093d..24ed900c 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -436,14 +436,31 @@ def updateValues(self, dset_ids):
             if getCollectionForId(dset_id) != "datasets":
                 continue  # ignore groups and datatypes
             dset_json = self.db.getObjectById(dset_id)
-            if "updates" not in dset_json:
+            dset_shape = dset_json["shape"]
+            dset_class = dset_shape['class']
+            if dset_class == "H5S_NULL":
+                # no data to update
                 continue
-            updates = dset_json["updates"]
-            if updates:
-                self.log.debug(f"hsds_writer> {dset_id} update count: {len(updates)}")
-                for (sel, arr) in updates:
-                    self.updateValue(dset_id, sel, arr)
-                updates.clear()
+            if self._init:
+                # get all data for the dataset
+                # TBD: do this by chunks
+                if dset_class == "H5S_SCALAR":
+                    dset_dims = []
+                else:
+                    dset_dims = dset_shape["dims"]
+                sel_all = selections.select(dset_dims, ...)
+                arr = self.db.getDatasetValues(dset_id, sel_all)
+                if arr is not None:
+                    self.updateValue(dset_id, sel_all, arr)
+            else:
+                if "updates" not in dset_json:
+                    continue
+                updates = dset_json["updates"]
+                if updates:
+                    self.log.debug(f"hsds_writer> {dset_id} update count: {len(updates)}")
+                    for (sel, arr) in updates:
+                        self.updateValue(dset_id, sel, arr)
+                    updates.clear()
 
     def flush(self):
         """ Write dirty items """
@@ -472,6 +489,9 @@ def flush(self):
             self.createObjects(obj_ids)
             dirty_ids.update(obj_ids)
             dirty_ids.add(root_id)  # add back root for attribute and link creation
+            if not self._no_data:
+                # initialize dataset values
+                self.updateValues(obj_ids)
             self._init = False
         elif self.db.new_objects:
             self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index 9557a9f8..a2d7201b 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -199,6 +199,46 @@ def testH5PyToHS(self):
         self.assertEqual(g1_json["attributeCount"], 0)
         self.assertEqual(g1_json["linkCount"], 2)
 
+        # get the g1.1 link
+        http_rsp = http_conn.GET(f"/groups/{g1_id}/links/g1.1")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        g1_1_link = rsp_json["link"]
+        g1_1_id = g1_1_link["id"]
+
+        # Get the g1.1 json
+        http_rsp = http_conn.GET(f"/groups/{g1_1_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        g1_json = http_rsp.json()
+        self.assertEqual(g1_json["attributeCount"], 0)
+        self.assertEqual(g1_json["linkCount"], 2)
+
+        # get the dset1.1.1 link
+        http_rsp = http_conn.GET(f"/groups/{g1_1_id}/links/dset1.1.1")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        dset1_1_1_link = rsp_json["link"]
+        dset1_1_1_id = dset1_1_1_link["id"]
+
+        # get the dset1.1.1 json
+        http_rsp = http_conn.GET(f"/datasets/{dset1_1_1_id}")
+        self.assertEqual(http_rsp.status_code, 200)
+        dset1_1_1_json = http_rsp.json()
+        dset1_1_1_shape = dset1_1_1_json["shape"]
+        self.assertEqual(dset1_1_1_shape["class"], "H5S_SIMPLE")
+
+        # get the dset1_1_1 data
+        http_rsp = http_conn.GET(f"/datasets/{dset1_1_1_id}/value")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        dset1_1_1_value = rsp_json["value"]
+        self.assertEqual(len(dset1_1_1_value), 10)
+        for i in range(10):
+            row = dset1_1_1_value[i]
+            self.assertEqual(len(row), 10)
+            for j in range(10):
+                self.assertEqual(row[j], i * j)
+
         db.close()
 
 

From 985a842f4097705061cae01f9471e92e71b7eae9 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Jul 2025 21:03:17 +0100
Subject: [PATCH 059/129] set dataset values in create if possible

---
 src/h5json/dset_util.py             | 22 ++++++++++++-----
 src/h5json/hsdsstore/hsds_writer.py | 38 +++++++++++++++++------------
 test/unit/h5py_reader_test.py       |  2 --
 3 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 496734d3..d992a01a 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -43,14 +43,24 @@ def resize_dataset(dset_json, shape):
     dset_json["modified"] = time.time()
 
 
-def getNumElements(dset_json):
+def getDims(dset_json):
+    """ return extents of the dataset shape as a tuple """
     shape_json = dset_json["shape"]
     shape_class = shape_json["class"]
     if shape_class == "H5S_NULL":
-        num_elements = 0
+        dims = None
     elif shape_class == "H5S_SCALAR":
-        num_elements = 1
+        dims = ()
     elif shape_class == "H5S_SIMPLE":
-        dims = shape_json["dims"]
-        num_elements = int(np.prod(dims))
-    return num_elements
+        dims = tuple(shape_json["dims"])
+    else:
+        raise ValueError(f"Unexpected shape class: {shape_class}")
+    return dims
+
+
+def getNumElements(dset_json):
+    """ return the number of elements defined by the dataset's shape
+        returns None for null shape, 1 for scalar shape, and product of
+        extents otherwise """
+
+    return int(np.prod(getDims(dset_json)))
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 24ed900c..8881e5e9 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -16,7 +16,7 @@
 
 from ..hdf5dtype import isVlen
 from ..array_util import arrayToBytes, bytesArrayToList
-from ..dset_util import getNumElements
+from ..dset_util import getNumElements, getDims
 from .. import selections
 from ..h5writer import H5Writer
 from .httpconn import HttpConn
@@ -251,7 +251,7 @@ def multiPost(items):
             items.clear()
 
         self.log.debug(f"hsds_writer> createObjects, {len(obj_ids)} objects")
-        MAX_OBJECTS_PER_REQUEST = 3
+        MAX_OBJECTS_PER_REQUEST = 300
         collections = ("groups", "datasets", "datatypes")
         col_items = {}
         dset_value_update_ids = set()
@@ -286,15 +286,25 @@ def multiPost(items):
                     item[key] = obj_json[key]
 
             # initialize dataset values if provided and not too large
-            if "updates" in obj_json:
-                updates = obj_json["updates"]
-                if updates and len(updates) == 1 and self.getDatasetSize(obj_id) < MAX_INIT_SIZE:
+            if collection == "datasets":
+                dset_dims = getDims(obj_json)  # will be None for null space datasets
+                dset_size = self.getDatasetSize(obj_id)  # number of bytes defined by the shape
+                init_arr = None  # data to be passed to post create method
+                updates = obj_json.get("updates")
+                if updates and len(updates) == 1 and dset_size < MAX_INIT_SIZE:
                     sel, arr = updates[0]
                     if sel.select_type == selections.H5S_SELECT_ALL:
-                        value = bytesArrayToList(arr)
-                        item["value"] = value
+                        init_arr = arr
                         updates.clear()  # reset the update list
-                if updates:
+                if self._init and init_arr is None and dset_dims is not None:
+                    # get all values from dataset if small enough
+                    if dset_size < MAX_INIT_SIZE:
+                        sel_all = selections.select(dset_dims, ...)
+                        init_arr = self.db.getDatasetValues(obj_id, sel_all)
+                if init_arr is not None:
+                    value = bytesArrayToList(init_arr)
+                    item["value"] = value
+                elif updates or self._init:
                     dset_value_update_ids.add(obj_id)  # will set dataset value below
 
             # add to the list of new items for the given collection
@@ -436,18 +446,13 @@ def updateValues(self, dset_ids):
             if getCollectionForId(dset_id) != "datasets":
                 continue  # ignore groups and datatypes
             dset_json = self.db.getObjectById(dset_id)
-            dset_shape = dset_json["shape"]
-            dset_class = dset_shape['class']
-            if dset_class == "H5S_NULL":
+            dset_dims = getDims(dset_json)
+            if dset_dims is None:
                 # no data to update
                 continue
             if self._init:
                 # get all data for the dataset
                 # TBD: do this by chunks
-                if dset_class == "H5S_SCALAR":
-                    dset_dims = []
-                else:
-                    dset_dims = dset_shape["dims"]
                 sel_all = selections.select(dset_dims, ...)
                 arr = self.db.getDatasetValues(dset_id, sel_all)
                 if arr is not None:
@@ -491,7 +496,8 @@ def flush(self):
             dirty_ids.add(root_id)  # add back root for attribute and link creation
             if not self._no_data:
                 # initialize dataset values
-                self.updateValues(obj_ids)
+                pass
+                # self.updateValues(obj_ids)
             self._init = False
         elif self.db.new_objects:
             self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index a3d946d9..8f76543c 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -41,9 +41,7 @@ def testSimple(self):
         db = Hdf5db(app_logger=self.log)
         db.reader = H5pyReader(filepath, app_logger=self.log)
         root_id = db.open()
-        print("got root_id:", root_id)
         root_json = db.getObjectById(root_id)
-        print("got root_json:", root_json)
         root_attrs = root_json["attributes"]
         self.assertEqual(len(root_attrs), 2)
         self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])

From 9d78d0c4763b51b612a6b80738318fc59aa56077 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sat, 26 Jul 2025 15:37:10 +0100
Subject: [PATCH 060/129] hsdsreader test

---
 src/h5json/dset_util.py       | 26 ++++++++++++++++++++++++++
 test/unit/hsds_reader_test.py |  7 ++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index d992a01a..e1a44a59 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -64,3 +64,29 @@ def getNumElements(dset_json):
         extents otherwise """
 
     return int(np.prod(getDims(dset_json)))
+
+
+def getDatasetLayout(dset_json):
+    """ Return layout json from creation property list or layout json """
+    layout = None
+
+    if "creationProperties" in dset_json:
+        cp = dset_json["creationProperties"]
+        if "layout" in cp:
+            layout = cp["layout"]
+    if not layout and "layout" in dset_json:
+        layout = dset_json["layout"]
+    if not layout:
+        # no layout for {dset_json
+        return None
+    return layout
+
+
+def getDatasetLayoutClass(dset_json):
+    """ return layout class """
+    layout = getDatasetLayout(dset_json)
+    if layout and "class" in layout:
+        layout_class = layout["class"]
+    else:
+        layout_class = None
+    return layout_class
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
index d0501b9f..db7a9f4b 100644
--- a/test/unit/hsds_reader_test.py
+++ b/test/unit/hsds_reader_test.py
@@ -45,11 +45,11 @@ def testSimple(self):
         root_id = db.open()
         root_json = db.getObjectById(root_id)
         self.assertTrue("id" in root_json)
-        """
-        TBD
+
         root_attrs = root_json["attributes"]
         self.assertEqual(len(root_attrs), 2)
         self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+
         root_links = root_json["links"]
         self.assertEqual(len(root_links), 2)
         self.assertEqual(list(root_links.keys()), ["g1", "g2"])
@@ -57,11 +57,13 @@ def testSimple(self):
         self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
         g1_id = g1_link["id"]
         self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+
         dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
         dset_json = db.getObjectById(dset111_id)
         dset_type = dset_json["type"]
         self.assertEqual(dset_type["class"], "H5T_INTEGER")
         self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+
         dset_attrs = dset_json["attributes"]
         self.assertEqual(len(dset_attrs), 2)
         self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
@@ -101,7 +103,6 @@ def testSimple(self):
         self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
         attr3_value = attr3_json["value"]
         self.assertEqual(attr3_value, 42)
-        """
 
         db.close()
 

From 4413e9b23179817c8dc27d8e7a3d1ebfe68c422f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 31 Jul 2025 12:53:23 +0100
Subject: [PATCH 061/129] added reader, writer stat method

---
 src/h5json/h5pystore/h5py_reader.py   | 14 ++++++++++
 src/h5json/h5pystore/h5py_writer.py   | 14 ++++++++++
 src/h5json/h5reader.py                |  9 +++++++
 src/h5json/h5writer.py                | 14 ++++++++++
 src/h5json/hdf5db.py                  | 28 ++++++++++++++++++--
 src/h5json/hsdsstore/hsds_reader.py   | 12 +++++++++
 src/h5json/hsdsstore/hsds_writer.py   | 16 +++++++++++-
 src/h5json/jsonstore/h5json_reader.py | 14 ++++++++++
 src/h5json/jsonstore/h5json_writer.py | 16 ++++++++++++
 test/unit/h5json_writer_test.py       |  2 ++
 test/unit/hdf5db_test.py              |  2 ++
 test/unit/hsds_reader_test.py         | 16 ++++++++++++
 test/unit/hsds_writer_test.py         | 37 +++++++++++++++++++++++++--
 13 files changed, 189 insertions(+), 5 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 9aee273d..bb32a6e9 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -12,6 +12,7 @@
 import h5py
 import numpy as np
 import logging
+from os import stat as os_stat
 import time
 
 from ..objid import createObjId, getCollectionForId
@@ -557,3 +558,16 @@ def getDatasetValues(self, dset_id, sel=None, dtype=None):
         # convert any h5py references to h5json references
         arr = self._copy_array(arr, fin=dset.file)
         return arr
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stat_info = os_stat(self.filepath)
+        stats = {}
+        stats['created'] = stat_info.st_ctime
+        stats["lastModified"] = stat_info.st_mtime
+        stats['owner'] = stat_info.st_uid  # TBD: convert to username?
+        return stats
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 14942c11..dd543a38 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -11,6 +11,7 @@
 ##############################################################################
 import h5py
 import numpy as np
+from os import stat as os_stat
 import time
 
 from ..objid import getCollectionForId, isValidUuid, createObjId
@@ -460,3 +461,16 @@ def close(self):
     def isClosed(self):
         """ return closed status """
         return False if self._f else True
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stat_info = os_stat(self.filepath)
+        stats = {}
+        stats['created'] = stat_info.st_ctime
+        stats["lastModified"] = stat_info.st_mtime
+        stats['owner'] = stat_info.st_uid  # TBD: convert to username?
+        return stats
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index 3bf49ca7..f48612fc 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -92,3 +92,12 @@ def close(self):
     def isClosed(self):
         """ return True if handle is closed """
         pass
+
+    @abstractmethod
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        pass
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index 8de1a277..cc5c601c 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -32,6 +32,7 @@ def __init__(
         self._no_data = no_data
         self._filepath = filepath
         self._db_ref = None
+        self._lastModified = None
         if app_logger:
             self.log = app_logger
         else:
@@ -49,6 +50,10 @@ def filepath(self):
     def closed(self):
         return self.isClosed()
 
+    @property
+    def lastModified(self):
+        return self._lastModified
+
     @property
     def db(self):
         if not self._db_ref:
@@ -83,3 +88,12 @@ def close(self):
     def isClosed(self):
         """ return True if handle is closed """
         pass
+
+    @abstractmethod
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        pass
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 66c84311..0b0c22a4 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -105,6 +105,18 @@ def isClosed(self):
         """ return True if handle is closed """
         return self._is_closed
 
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
+
 
 class H5NullWriter(H5Writer):
     """
@@ -165,6 +177,18 @@ def isClosed(self):
         """ return True if handle is closed """
         return self._is_closed
 
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
+
 
 class Hdf5db:
     """
@@ -417,11 +441,11 @@ def _checkWriter(self):
         if self.writer.closed:
             raise IOError("writer is closed")
 
-    def getObjectById(self, obj_id):
+    def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
         self.log.debug(f"getObjectById {obj_id}")
         self._checkReader()
-        if obj_id not in self.db:
+        if obj_id not in self.db or refresh:
             # load the obj from the reader
             obj_json = self.reader.getObjectById(obj_id)
             self.db[obj_id] = obj_json
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index 55a8c022..a521f158 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -310,3 +310,15 @@ def getDatasetValues(self, dset_id, sel=None, dtype=None):
             self.log.debug(f"jsonToArray returned: {arr}")
 
         return arr
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 8881e5e9..7e5d7781 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -385,6 +385,7 @@ def updateLinks(self, grp_ids):
                 raise IOError("hsds_writer unable to update links")
             else:
                 self.log.debug(f"hsds_writer> {grp_id} {count} links updated")
+                self._lastModified = time.time()
 
     def updateAttributes(self, obj_ids):
         """ update any modified links of the given objects """
@@ -418,6 +419,7 @@ def updateAttributes(self, obj_ids):
                 self.log.error(f"hsds_writer> put {req} failed, status: {put_rsp.status_code}")
             else:
                 self.log.debug(f"hsds_writer> {count} attributes updated")
+                self._lastModified = time.time()
 
     def updateValue(self, dset_id, sel, arr):
         """ update the given dataset using selection and array """
@@ -437,6 +439,7 @@ def updateValue(self, dset_id, sel, arr):
             self.log.error(f"PUT {req} returned error: {rsp.status_code}")
         else:
             self.log.debug(f"PUT {len(data)} bytes successful")
+            self._lastModified = time.time()
 
     def updateValues(self, dset_ids):
         """ write any pending dataset values """
@@ -476,7 +479,6 @@ def flush(self):
         if not self._http_conn:
             self.log.warning("hsds_writer no http connection")
             raise IOError("no http connection")
-
         self.log.info("hsds_writer.flush()")
         self.log.debug(f"    new object count: {len(self.db.new_objects)}")
         self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
@@ -535,3 +537,15 @@ def isClosed(self):
     def get_root_id(self):
         """ Return root id """
         return self._root_id
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index 4fd1e0c8..b64a3d1d 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -11,6 +11,7 @@
 ##############################################################################
 import json
 import logging
+from os import stat as os_stat
 
 from ..objid import getCollectionForId, getUuidFromId
 
@@ -215,3 +216,16 @@ def getDatasetValues(self, obj_id, sel=None, dtype=None):
             raise NotImplementedError("selection type not supported")
 
         return arr
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stat_info = os_stat(self.filepath)
+        stats = {}
+        stats['created'] = stat_info.st_ctime
+        stats["lastModified"] = stat_info.st_mtime
+        stats['owner'] = stat_info.st_uid  # TBD: convert to username?
+        return stats
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index f5ede89f..343c045f 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -11,6 +11,8 @@
 ##############################################################################
 
 import json
+from os import stat as os_stat
+import time
 
 from ..h5writer import H5Writer
 from ..objid import getUuidFromId, getCollectionForId, createObjId
@@ -292,3 +294,17 @@ def dumpFile(self):
                 json.dump(self.json, f, ensure_ascii=ensure_ascii, indent=indent)
         else:
             print(json.dumps(self.json, sort_keys=True, ensure_ascii=ensure_ascii, indent=indent))
+        self._lastModified = time.time()  # update timestamp
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stat_info = os_stat(self.filepath)
+        stats = {}
+        stats['created'] = stat_info.st_ctime
+        stats["lastModified"] = stat_info.st_mtime
+        stats['owner'] = stat_info.st_uid  # TBD: convert to username?
+        return stats
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index e8b5eb91..ba2cbc19 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -70,7 +70,9 @@ def testSimple(self):
         db.createSoftLink(g2_id, "slink", "somewhere")
         db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
         db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        self.assertTrue(db.writer.lastModified is None)  # no update yet
         db.flush()
+        self.assertTrue(db.writer.lastModified > 0)  # timestamp should be updated
 
     def testNullSpaceAttribute(self):
 
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index c9c32969..2722eaa6 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -53,6 +53,8 @@ def testOpen(self):
         self.assertTrue(db.closed)
         obj_id = db.open()
         self.assertEqual(obj_id, root_id)
+        root_json = db.getObjectById(root_id)
+        self.assertFalse("id" in root_json)
         db.close()
 
     def testWith(self):
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
index db7a9f4b..9b0acf1f 100644
--- a/test/unit/hsds_reader_test.py
+++ b/test/unit/hsds_reader_test.py
@@ -11,6 +11,8 @@
 ##############################################################################
 import unittest
 import logging
+import random
+import string
 import numpy as np
 from h5json import Hdf5db
 from h5json.hsdsstore.hsds_reader import HSDSReader
@@ -106,6 +108,20 @@ def testSimple(self):
 
         db.close()
 
+    def testNoFile(self):
+        # create a random string so we don't try to open an existing file
+        filename = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
+        filepath = "/home/test_user1/test/" + filename
+        kwargs = {"app_logger": self.log}
+        db = Hdf5db(**kwargs)
+        hsds_reader = HSDSReader(filepath, **kwargs)
+        db.reader = hsds_reader
+        try:
+            db.open()
+            self.assertTrue(False)
+        except IOError as ioe:
+            self.assertEqual(ioe.errno, 404)
+
 
 if __name__ == "__main__":
     # setup test files
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index a2d7201b..4ef8ff8f 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -11,12 +11,14 @@
 ##############################################################################
 import unittest
 import logging
+import random
+import string
 import requests
-import os
 import numpy as np
 from h5json import Hdf5db
 from h5json.hsdsstore.httpconn import HttpConn
 from h5json.hsdsstore.hsds_writer import HSDSWriter
+from h5json.hsdsstore.hsds_reader import HSDSReader
 from h5json.h5pystore.h5py_reader import H5pyReader
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
@@ -42,6 +44,10 @@ def testSimple(self):
         db = Hdf5db(app_logger=self.log)
         db.writer = HSDSWriter(domain_path, app_logger=self.log)
         root_id = db.open()
+
+        stats = db.writer.getStats()
+        for k in ("created", "lastModified", "owner"):
+            self.assertTrue(k in stats)
         http_conn = HttpConn(domain_path, mode='r', retries=1)
 
         db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
@@ -61,8 +67,9 @@ def testSimple(self):
         self.assertEqual(root_json["attributeCount"], 0)
         # same for link count
         self.assertEqual(root_json["linkCount"], 0)
-
+        self.assertTrue(db.writer.lastModified is None)  # no write yet
         db.flush()
+        self.assertTrue(db.writer.lastModified > 0)  # timestamp should be updated
 
         # validate - get the root group again and see if counts are updated
         http_rsp = http_conn.GET(f"/groups/{root_id}")
@@ -159,7 +166,33 @@ def testSimple(self):
         rsp_value = rsp_json["value"]
         self.assertEqual(rsp_value, 42)
 
+        # create a dataset and try to read from it
+        dset_222_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        sel_all = selections.select((10, 10), ...)
+        arr = db.getDatasetValues(dset_222_id, sel_all)
+        self.assertTrue((arr == 0).all())
+
+        db.close()
+
+    def testReaderWriter(self):
+        # try reading and writer to an HSDS domain
+        # create a random string so we don't try to open an existing file
+        filename = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
+        domain_path = "/home/test_user1/test/" + filename + ".h5"
+        db = Hdf5db(app_logger=self.log)
+        db.writer = HSDSWriter(domain_path, app_logger=self.log)
+        self.assertEqual(db.writer.filepath, domain_path)
+        root_id = db.open()
+        self.assertTrue(root_id)
+        db.reader = HSDSReader(domain_path, app_logger=self.log)
         db.close()
+        root_id2 = db.open()
+        self.assertEqual(root_id, root_id2)
+        root_json = db.getObjectById(root_id)
+        self.assertTrue("id" not in root_json)
+        self.assertTrue("created" in root_json)
+        self.assertTrue(root_json["created"] > 0)
+        self.assertTrue(db.writer.lastModified is None)  # no flush yet
 
     def testH5PyToHS(self):
         # test reading from HDF5 file and writing to HSDS

From 5ee8b3e21cf61ea222195a8e41c58abef6a31212 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 3 Aug 2025 15:51:25 +0100
Subject: [PATCH 062/129] fix for reopen db

---
 src/h5json/hdf5db.py                |   9 +-
 src/h5json/hsdsstore/hsds_reader.py |  38 ++++---
 src/h5json/hsdsstore/hsds_writer.py | 152 +++++++++++++---------------
 src/h5json/hsdsstore/httpconn.py    |  97 ++++++++----------
 test/unit/hsds_reader_test.py       |  16 +++
 5 files changed, 153 insertions(+), 159 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 0b0c22a4..87cd5687 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -362,7 +362,6 @@ def open(self):
         if not self.reader.isClosed():
             self.log.debug("db is already opened")
             raise IOError("db is already opened")
-            return self._root_id
 
         if self.writer.append:
             # append mode for the writer, first open writer and get the root id
@@ -377,14 +376,14 @@ def open(self):
             # now open reader
             reader_root_id = self.reader.open()
             if reader_root_id != self._root_id:
-                raise IOError("writer root id does not match reader root id")
+                raise IOError("db root id does not match reader root id")
 
         else:
             # open reader first and get root id
             reader_root_id = self.reader.open()
             if self._root_id:
                 if reader_root_id != self._root_id:
-                    raise IOError("writer root id does not match reader root id")
+                    raise IOError("reader root id does not match reader root id")
             else:
                 self._root_id = reader_root_id
 
@@ -431,14 +430,14 @@ def _checkReader(self):
         """ check the reader is set and open """
         if self.reader is None:
             raise IOError("reader not set")
-        if self.reader.closed:
+        if self.reader.isClosed():
             raise IOError("reader is closed")
 
     def _checkWriter(self):
         """ check the writer is set and open """
         if self.writer is None:
             raise IOError("writer not set")
-        if self.writer.closed:
+        if self.writer.isClosed():
             raise IOError("writer is closed")
 
     def getObjectById(self, obj_id, refresh=False):
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index a521f158..e0053033 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -87,15 +87,21 @@ def __init__(
         # save these for when we create the connection
         self._http_kwargs = kwargs
         self._http_conn = None
+        self._stats = {"created": 0, "lastModified": 0, "owner": ""}
 
         super().__init__(domain_path, app_logger=app_logger)
 
     def open(self):
+        if self._http_conn and not self._http_conn.isClosed():
+            return self._root_id  # open already called
+
         if self._http_conn:
-            return  # open already called
+            http_conn = self._http_conn
+        else:
+            kwargs = self._http_kwargs
+            http_conn = HttpConn(self.filepath, **kwargs)
 
-        kwargs = self._http_kwargs
-        http_conn = HttpConn(self.filepath, **kwargs)
+        http_conn.open()
 
         hsds_info = http_conn.serverInfo()
         self.log.debug(f"got hsds info: {hsds_info}")
@@ -122,6 +128,11 @@ def open(self):
         domain_json = rsp.json()
         self.log.debug(f"got domain_json: {domain_json}")
 
+        # update stats
+        for key in ("created", "lastModified", "owner", "limits", "version", "compressors"):
+            if key in domain_json:
+                self._stats[key] = domain_json[key]
+
         if "root" not in domain_json:
             http_conn.close()
             raise IOError(404, "Location is a folder, not a file")
@@ -134,17 +145,8 @@ def open(self):
             domain_objs = root_json["domain_objs"]
             objdb.load(domain_objs)
         """
-        if "limits" in domain_json:
-            self._limits = domain_json["limits"]
-        else:
-            self._limits = None
-        if "version" in domain_json:
-            self._version = domain_json["version"]
-        else:
-            self._version = None
 
         self._http_conn = http_conn
-        self._domain_json = domain_json
 
         return self._root_id
 
@@ -157,10 +159,10 @@ def close(self):
             self._http_conn.close()
 
     def isClosed(self):
-        if self._http_conn:
-            return False
-        else:
+        if not self._http_conn:
             return True
+        else:
+            return self._http_conn.isClosed()
 
     def get_root_id(self):
         """ Return root id """
@@ -317,8 +319,4 @@ def getStats(self):
             'lastModified': modificationTime
             'owner': owner name
         """
-        stats = {}
-        stats['created'] = 0
-        stats["lastModified"] = 0
-        stats['owner'] = ""
-        return stats
+        return self._stats
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 7e5d7781..ba3b7b87 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -109,11 +109,10 @@ def __init__(
         self._http_conn = None
         self._root_id = None
         self._append = append
-        self._owner = owner
         self._track_order = track_order
         self._linked_domain = linked_domain
-        self._domain_json = None
         self._last_flush_time = 0
+        self._stats = {"created": 0, "lastModified": 0, "owner": ""}
 
     def open(self):
         """ setup domain for writing """
@@ -133,91 +132,84 @@ def open(self):
             hsds_info = http_conn.serverInfo()
             self.log.debug(f"got hsds info: {hsds_info}")
 
-        if not self._domain_json:
-            # haven't fetched the domain json yet, do it now
-
-            # try to do a GET from the domain
-            req = "/"
-            params = {}
-            """
-            if max_objects is None or max_objects > 0:
-                # get object meta objects
-                # TBD: have hsds support a max limit of objects to return
-                params["getobjs"] = 1
-                params["include_attrs"] = 1
-                params["include_links"] = 1
-            """
-
-            domain_json = None
-            rsp = http_conn.GET(req, params=params)
-
-            if rsp.status_code not in (200, 404, 410):
-                msg = f"Got status code: {rsp.status_code} on initial domain get"
-                self.log.warning(msg)
-                raise IOError(msg)
-
-            if rsp.status_code == 200:
-                if self._append:
-                    # domain exists already
-                    domain_json = rsp.json()
-                    if "root" not in domain_json:
-                        # this a folder not a domain
-                        self.log.warning(f"folder: {self.filepath} has no root property")
-                        http_conn.close()
-                        raise IOError(404, "Location is a folder, not a file")
-                else:
-                    # not append - delete existing domain
-                    self.log.info(f"sending delete request for {self.filepath}")
-                    delete_rsp = http_conn.DELETE(req, params=params)
-                    if delete_rsp.status_code not in (200, 410):
-                        # failed to delete
-                        http_conn.close()
-                        raise IOError(rsp.status_code, rsp.reason)
-
-            if not domain_json:
-                # domain doesn't exist, create it
-                body = {}
-                if self.db.root_id:
-                    # initialize domain using the db's root_id
-                    body["root_id"] = self.db.root_id
-                if self._owner:
-                    body["owner"] = self._owner
-                if self._linked_domain:
-                    body["linked_domain"] = self._linked_domain
-                if self._track_order:
-                    create_props = {"CreateOrder": 1}
-                    group_body = {"creationProperties": create_props}
-                    body["group"] = group_body
-                rsp = http_conn.PUT(req, params=params, body=body)
-                if rsp.status_code != 201:
-                    http_conn.close()
-                    raise IOError(rsp.status_code, rsp.reason)
+        # fetch the domain json
+
+        # try to do a GET from the domain
+        req = "/"
+        params = {}
+        """
+        if max_objects is None or max_objects > 0:
+            # get object meta objects
+            # TBD: have hsds support a max limit of objects to return
+            params["getobjs"] = 1
+            params["include_attrs"] = 1
+            params["include_links"] = 1
+        """
+
+        domain_json = None
+        rsp = http_conn.GET(req, params=params)
+
+        if rsp.status_code not in (200, 404, 410):
+            msg = f"Got status code: {rsp.status_code} on initial domain get"
+            self.log.warning(msg)
+            raise IOError(msg)
+
+        if rsp.status_code == 200:
+            if self._append:
+                # domain exists already
                 domain_json = rsp.json()
-                self.log.info(f"got rsp on PUT domain: {domain_json}")
                 if "root" not in domain_json:
+                    # this a folder not a domain
+                    self.log.warning(f"folder: {self.filepath} has no root property")
                     http_conn.close()
-                    raise IOError(404, "Unexpected error")
-
-            self.log.debug(f"got domain_json: {domain_json}")
+                    raise IOError(404, "Location is a folder, not a file")
+            else:
+                # not append - delete existing domain
+                self.log.info(f"sending delete request for {self.filepath}")
+                delete_rsp = http_conn.DELETE(req, params=params)
+                if delete_rsp.status_code not in (200, 410):
+                    # failed to delete
+                    http_conn.close()
+                    raise IOError(rsp.status_code, rsp.reason)
 
+        if not domain_json:
+            # domain doesn't exist, create it
+            body = {}
+            if self.db.root_id:
+                # initialize domain using the db's root_id
+                body["root_id"] = self.db.root_id
+            if self._owner:
+                body["owner"] = self._owner
+            if self._linked_domain:
+                body["linked_domain"] = self._linked_domain
+            if self._track_order:
+                create_props = {"CreateOrder": 1}
+                group_body = {"creationProperties": create_props}
+                body["group"] = group_body
+            rsp = http_conn.PUT(req, params=params, body=body)
+            if rsp.status_code != 201:
+                http_conn.close()
+                raise IOError(rsp.status_code, rsp.reason)
+            domain_json = rsp.json()
+            self.log.info(f"got rsp on PUT domain: {domain_json}")
             if "root" not in domain_json:
                 http_conn.close()
-                raise IOError(404, "Location is a folder, not a file")
+                raise IOError(404, "Unexpected error")
 
-            root_id = domain_json["root"]
+        self.log.debug(f"got domain_json: {domain_json}")
 
-            self._root_id = root_id
+        if "root" not in domain_json:
+            http_conn.close()
+            raise IOError(404, "Location is a folder, not a file")
 
-            if "limits" in domain_json:
-                self._limits = domain_json["limits"]
-            else:
-                self._limits = None
-            if "version" in domain_json:
-                self._version = domain_json["version"]
-            else:
-                self._version = None
+        root_id = domain_json["root"]
+
+        self._root_id = root_id
 
-            self._domain_json = domain_json
+        # update stats
+        for key in ("created", "lastModified", "owner", "limits", "version", "compressors"):
+            if key in domain_json:
+                self._stats[key] = domain_json[key]
 
         return self._root_id
 
@@ -544,8 +536,4 @@ def getStats(self):
             'lastModified': modificationTime
             'owner': owner name
         """
-        stats = {}
-        stats['created'] = 0
-        stats["lastModified"] = 0
-        stats['owner'] = ""
-        return stats
+        return self._stats
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
index 14b3d54d..53c42dcc 100644
--- a/src/h5json/hsdsstore/httpconn.py
+++ b/src/h5json/hsdsstore/httpconn.py
@@ -258,7 +258,6 @@ def __init__(
         bucket=None,
         api_key=None,
         mode="a",
-        use_session=True,
         expire_time=1.0,
         max_objects=None,
         max_age=1.0,
@@ -270,7 +269,6 @@ def __init__(
         self._domain = domain_name
         self._mode = mode
         self._domain_json = None
-        self._use_session = use_session
         self._retries = retries
         self._timeout = timeout
         self._api_key = api_key
@@ -283,7 +281,7 @@ def __init__(
             self.log = logging
         else:
             self.log = logging.getLogger(logger)
-        msg = f"HttpConn.init(domain: {domain_name} use_session: {use_session} "
+        msg = f"HttpConn.init(domain: {domain_name}"
         msg += f"expire_time: {expire_time:6.2f} sec retries: {retries}"
         self.log.debug(msg)
 
@@ -355,12 +353,6 @@ def __init__(
             else:
                 self.log.error(f"Unknown openid provider: {provider}")
 
-    def __del__(self):
-        if self._s:
-            self.log.debug("close session")
-            self._s.close()
-            self._s = None
-
     def getHeaders(self, username=None, password=None, headers=None):
 
         if headers is None:
@@ -447,6 +439,8 @@ def verifyCert(self):
     def GET(self, req, format="json", params=None, headers=None):
         if self._endpoint is None:
             raise IOError("object not initialized")
+        if not self._s:
+            raise IOError("http session is closed")
         # check that domain is defined (except for some specific requests)
         if req not in ("/domains", "/about", "/info", "/") and self._domain is None:
             raise IOError(f"no domain defined: req: {req}")
@@ -477,7 +471,7 @@ def GET(self, req, format="json", params=None, headers=None):
                 self.log.debug(f"GET params {k}:{v}")
 
         try:
-            s = self.session
+            s = self._s
             stream = True  # tbd  - config for no streaming?
             ts = time.time()
             rsp = s.get(
@@ -507,6 +501,8 @@ def PUT(self, req, body=None, format="json", params=None, headers=None):
             raise IOError("object not initialized")
         if self._domain is None:
             raise IOError("no domain defined")
+        if not self._s:
+            raise IOError("http session is closed")
 
         if params:
             self.log.info(f"PUT params: {params}")
@@ -539,7 +535,7 @@ def PUT(self, req, body=None, format="json", params=None, headers=None):
         self.log.info(f"PUT: {req} format: {format} [{len(data)} bytes]")
 
         try:
-            s = self.session
+            s = self._s
             ts = time.time()
             rsp = s.put(
                 self._endpoint + req,
@@ -568,6 +564,8 @@ def POST(self, req, body=None, format="json", params=None, headers=None):
             raise IOError("object not initialized")
         if self._domain is None:
             raise IOError("no domain defined")
+        if not self._s:
+            raise IOError("http session is closed")
 
         if params is None:
             params = {}
@@ -608,7 +606,7 @@ def POST(self, req, body=None, format="json", params=None, headers=None):
         self.log.info("POST: " + req)
 
         try:
-            s = self.session
+            s = self._s
             ts = time.time()
             rsp = s.post(
                 self._endpoint + req,
@@ -631,6 +629,8 @@ def POST(self, req, body=None, format="json", params=None, headers=None):
     def DELETE(self, req, params=None, headers=None):
         if self._endpoint is None:
             raise IOError("object not initialized")
+        if not self._s:
+            raise IOError("http session is closed")
 
         if req not in ("/domains", "/") and self._domain is None:
             raise IOError("no domain defined")
@@ -652,9 +652,8 @@ def DELETE(self, req, params=None, headers=None):
 
         self.log.info("DEL: " + req)
         try:
-            s = self.session
             ts = time.time()
-            rsp = s.delete(
+            rsp = self._s.delete(
                 self._endpoint + req,
                 headers=headers,
                 params=params,
@@ -676,55 +675,49 @@ def DELETE(self, req, params=None, headers=None):
 
         return HttpResponse(rsp)
 
-    @property
-    def session(self):
-        # create a session object to re-use http connection when possible
-        s = requests
-        retries = self._retries
-        backoff_factor = 1
-        status_forcelist = (500, 502, 503, 504)
-
-        if self._use_session:
-            if self._s is None:
-                if self._endpoint.startswith("http+unix://"):
-                    self.log.debug(f"create unixsocket session: {self._endpoint}")
-                    s = requests_unixsocket.Session()
-                else:
-                    # regular request session
-                    s = requests.Session()
-
-                retry = Retry(
-                    total=retries,
-                    read=retries,
-                    connect=retries,
-                    backoff_factor=backoff_factor,
-                    status_forcelist=status_forcelist,
-                )
-
-                s.mount(
-                    "http://",
-                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
-                )
-                s.mount(
-                    "https://",
-                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
-                )
-                self._s = s
-            else:
-                s = self._s
-        return s
-
     def add_external_ref(self, fid):
         # this is used by the group class to keep references to external links open
         if fid.__class__.__name__ != "FileID":
             raise TypeError("add_external_ref, expected FileID type")
         self._external_refs.append(fid)
 
+    def open(self):
+        if self._s:
+            return  # already open
+
+        retries = self._retries
+        backoff_factor = 1
+        status_forcelist = (500, 502, 503, 504)
+        if self._endpoint.startswith("http+unix://"):
+            self.log.debug(f"create unixsocket session: {self._endpoint}")
+            s = requests_unixsocket.Session()
+        else:
+            # regular request session
+            s = requests.Session()
+
+            retry = Retry(
+                total=retries,
+                read=retries,
+                connect=retries,
+                backoff_factor=backoff_factor,
+                status_forcelist=status_forcelist,
+            )
+            kwargs = {"max_retries": retry, "pool_connections": 16, "pool_maxsize": 16}
+            s.mount("http://", HTTPAdapter(**kwargs))
+            s.mount("https://", HTTPAdapter(**kwargs))
+            self._s = s
+
     def close(self):
         if self._s:
             self._s.close()
             self._s = None
 
+    def isClosed(self):
+        if self._s is None:
+            return True
+        else:
+            return False
+
     @property
     def domain(self):
         return self._domain
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
index 9b0acf1f..ce75d540 100644
--- a/test/unit/hsds_reader_test.py
+++ b/test/unit/hsds_reader_test.py
@@ -45,6 +45,22 @@ def testSimple(self):
         hsds_reader = HSDSReader(filepath, **kwargs)
         db.reader = hsds_reader
         root_id = db.open()
+
+        # check domain stats
+        stats = db.reader.getStats()
+        self.assertTrue(stats["created"] > 0)
+        self.assertTrue(stats["lastModified"] > 0)
+        self.assertTrue(stats["owner"])
+        self.assertTrue("compressors" in stats)
+        self.assertTrue(len(stats["compressors"]) > 0)
+        self.assertTrue("limits" in stats)
+        self.assertTrue(len(stats["limits"]) > 0)
+
+        db.close()
+        self.assertTrue(db.closed)
+        obj_id = db.open()
+        self.assertEqual(obj_id, root_id)
+
         root_json = db.getObjectById(root_id)
         self.assertTrue("id" in root_json)
 

From 67bf8e1718bd8807b9a69607cfe30c15fdfef846 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 4 Aug 2025 15:53:40 +0200
Subject: [PATCH 063/129] add more debug log messages

---
 src/h5json/hdf5db.py                |  5 ++++-
 src/h5json/hsdsstore/hsds_writer.py | 22 +++++++++++++++++-----
 src/h5json/hsdsstore/httpconn.py    |  5 ++++-
 test/unit/hsds_writer_test.py       | 10 ++++++++++
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 87cd5687..571a4de3 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -381,14 +381,17 @@ def open(self):
         else:
             # open reader first and get root id
             reader_root_id = self.reader.open()
+            self.log.debug(f"got reader root_id:  {reader_root_id}")
+
             if self._root_id:
                 if reader_root_id != self._root_id:
                     raise IOError("reader root id does not match reader root id")
             else:
                 self._root_id = reader_root_id
-
+            self.log.debug("open writer")
             # now open writer
             writer_root_id = self.writer.open()
+            self.log.debug(f"got writer root_id: {writer_root_id}")
             if writer_root_id != self._root_id:
                 raise IOError("writer root id does not match reader root id")
 
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index ba3b7b87..586c7852 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -110,6 +110,7 @@ def __init__(
         self._root_id = None
         self._append = append
         self._track_order = track_order
+        self._owner = owner
         self._linked_domain = linked_domain
         self._last_flush_time = 0
         self._stats = {"created": 0, "lastModified": 0, "owner": ""}
@@ -120,17 +121,24 @@ def open(self):
             # no db set yet
             raise IOError("DB not set")
 
-        if self._http_conn:
-            http_conn = self._http_conn
-        else:
+        if self._http_conn and not self._http_conn.isClosed():
+            return self._root_id
+
+        if not self._http_conn:
             kwargs = self._http_kwargs
             kwargs["retries"] = 1  # tbd: test setting
             http_conn = HttpConn(self.filepath, **kwargs)
             if self._append:
                 http_conn._mode = "a"
+                self.log.debug("hsdswriter - set http_conn mode to a")
             self._http_conn = http_conn
-            hsds_info = http_conn.serverInfo()
-            self.log.debug(f"got hsds info: {hsds_info}")
+
+        http_conn = self._http_conn
+        self.log.debug("hsdswriter - open http conn")
+        http_conn.open()
+
+        hsds_info = self._http_conn.serverInfo()
+        self.log.debug(f"got hsds info: {hsds_info}")
 
         # fetch the domain json
 
@@ -148,6 +156,7 @@ def open(self):
 
         domain_json = None
         rsp = http_conn.GET(req, params=params)
+        self.log.debug(f"hsdswriter initial get status_code: {rsp.status_code}")
 
         if rsp.status_code not in (200, 404, 410):
             msg = f"Got status code: {rsp.status_code} on initial domain get"
@@ -165,6 +174,7 @@ def open(self):
                     raise IOError(404, "Location is a folder, not a file")
             else:
                 # not append - delete existing domain
+                self.log.info("hsds_writer - delete domain")
                 self.log.info(f"sending delete request for {self.filepath}")
                 delete_rsp = http_conn.DELETE(req, params=params)
                 if delete_rsp.status_code not in (200, 410):
@@ -174,6 +184,7 @@ def open(self):
 
         if not domain_json:
             # domain doesn't exist, create it
+            self.log.debug("hsds_writer create domain")
             body = {}
             if self.db.root_id:
                 # initialize domain using the db's root_id
@@ -203,6 +214,7 @@ def open(self):
             raise IOError(404, "Location is a folder, not a file")
 
         root_id = domain_json["root"]
+        self.log.debug("hsds_writer got root_id:", root_id)
 
         self._root_id = root_id
 
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
index 53c42dcc..dc2ff9b1 100644
--- a/src/h5json/hsdsstore/httpconn.py
+++ b/src/h5json/hsdsstore/httpconn.py
@@ -682,6 +682,7 @@ def add_external_ref(self, fid):
         self._external_refs.append(fid)
 
     def open(self):
+        self.log.debug("http_conn.open")
         if self._s:
             return  # already open
 
@@ -705,10 +706,12 @@ def open(self):
             kwargs = {"max_retries": retry, "pool_connections": 16, "pool_maxsize": 16}
             s.mount("http://", HTTPAdapter(**kwargs))
             s.mount("https://", HTTPAdapter(**kwargs))
-            self._s = s
+        self.log.debug("Httpconn set self._s")
+        self._s = s
 
     def close(self):
         if self._s:
+            self.log.debug("http_conn.close")
             self._s.close()
             self._s = None
 
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index 4ef8ff8f..a9e9b877 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -49,6 +49,7 @@ def testSimple(self):
         for k in ("created", "lastModified", "owner"):
             self.assertTrue(k in stats)
         http_conn = HttpConn(domain_path, mode='r', retries=1)
+        http_conn.open()
 
         db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
         db.createAttribute(root_id, "attr2", 42)
@@ -186,6 +187,14 @@ def testReaderWriter(self):
         self.assertTrue(root_id)
         db.reader = HSDSReader(domain_path, app_logger=self.log)
         db.close()
+        """
+        db.writer = HSDSWriter(domain, **kwargs)
+        root_id = db.open()
+        db.close()
+        # now set the reader
+        db.reader = HSDSReader(domain, **kwargs)
+        db.open()
+        """
         root_id2 = db.open()
         self.assertEqual(root_id, root_id2)
         root_json = db.getObjectById(root_id)
@@ -209,6 +218,7 @@ def testH5PyToHS(self):
 
         # validate - get the root group and see if counts are correct
         http_conn = HttpConn(domain_path, mode='r', retries=1)
+        http_conn.open()
         http_rsp = http_conn.GET(f"/groups/{root_id}")
         self.assertEqual(http_rsp.status_code, 200)
         root_json = http_rsp.json()

From 0a2c0aff13ebf658137c1613004e817d648679ee Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 5 Aug 2025 14:05:56 +0200
Subject: [PATCH 064/129] move null reader, writer classes

---
 src/h5json/h5reader.py              | 100 ++++++++++++++++
 src/h5json/h5writer.py              |  73 ++++++++++++
 src/h5json/hdf5db.py                | 172 +---------------------------
 src/h5json/hsdsstore/hsds_reader.py |   4 +-
 src/h5json/hsdsstore/hsds_writer.py |   2 +-
 testall.py                          |   5 +-
 6 files changed, 181 insertions(+), 175 deletions(-)

diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index f48612fc..a4127097 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -13,6 +13,10 @@
 import weakref
 
 import logging
+import time
+import numpy as np
+
+from .objid import createObjId
 
 
 class H5Reader(ABC):
@@ -101,3 +105,99 @@ def getStats(self):
             'owner': owner name
         """
         pass
+
+
+class H5NullReader(H5Reader):
+    """
+    This class can be used by HDF5DB as a default no-op reader
+    """
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        super().__init__(filepath, app_logger=app_logger)
+        self.log.debug("H5NullReader.__init__")
+
+        self._root_id = None
+        self._is_closed = True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+
+        if obj_id != self._root_id:
+            raise KeyError(f"{obj_id} not found")
+
+        # create a root group with no links or attributes
+        group_json = {"links": {}, "attributes": {}, "cpl": {}}
+        group_json["created"] = time.time()
+
+        return group_json
+
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        return None
+
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        # just return a zero array
+        arr = np.zeros(sel.shape, dtype=dtype)
+
+        return arr
+
+    def open(self):
+        """ Open data source for reading """
+        self.log.debug("H5NullReader open")
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+
+        if self._is_closed:
+            if not self._root_id:
+                if self.db.root_id:
+                    # use the db root id
+                    self._root_id = self.db.root_id
+                else:
+                    # create a new root id
+                    self._root_id = createObjId(obj_type="groups")
+            self._is_closed = False
+        return self._root_id
+
+    def close(self):
+        """ close any open handles to the storage """
+        self._is_closed = True
+
+    def isClosed(self):
+        """ return True if handle is closed """
+        return self._is_closed
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index cc5c601c..a27e76cb 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -12,6 +12,7 @@
 from abc import ABC, abstractmethod
 import weakref
 import logging
+from .objid import createObjId
 
 
 class H5Writer(ABC):
@@ -97,3 +98,75 @@ def getStats(self):
             'owner': owner name
         """
         pass
+
+
+class H5NullWriter(H5Writer):
+    """
+    This class can be used by HDF5DB as a default no-op writer
+    """
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        if append:
+            raise IOError("append is not supprot for H5NullWriter")
+
+        super().__init__(filepath, no_data=no_data, app_logger=app_logger)
+        self.log.debug("H5NullWriter.__init__")
+        self._root_id = None
+        self._is_closed = True
+
+    def open(self):
+        """ open storage handle, return root_id"""
+        self.log.debug("H5NullWriter open")
+        if not self._is_closed:
+            return self._root_id  # already open
+
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+
+        if not self._root_id:
+            if self.db.root_id:
+                self._root_id = self.db.root_id
+            else:
+                self._root_id = createObjId(obj_type="groups")
+        self._is_closed = False
+        return self._root_id
+
+    def flush(self):
+        """ Write dirty items """
+        self.log.debug("H5NullWriter> flush")
+        # Null writer is unable to actually persist anything, so return False
+        return False
+
+    def close(self):
+        """ close storage handle """
+        self.log.debug("H5NullWriter.close")
+        self._is_closed = True
+
+    def isClosed(self):
+        """ return True if handle is closed """
+        return self._is_closed
+
+    def getStats(self):
+        """ return a dictionary object with at minimum the following keys:
+            'created': creation time
+            'lastModified': modificationTime
+            'owner': owner name
+        """
+        stats = {}
+        stats['created'] = 0
+        stats["lastModified"] = 0
+        stats['owner'] = ""
+        return stats
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 571a4de3..220511e2 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -18,176 +18,8 @@
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId
 from . import selections
 from .apiversion import _apiver
-from .h5reader import H5Reader
-from .h5writer import H5Writer
-
-
-class H5NullReader(H5Reader):
-    """
-    This class can be used by HDF5DB as a default no-op reader
-    """
-
-    def __init__(
-        self,
-        filepath,
-        app_logger=None
-    ):
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-
-        super().__init__(filepath, app_logger=app_logger)
-        self.log.debug("H5NullReader.__init__")
-
-        self._root_id = None
-        self._is_closed = True
-
-    def get_root_id(self):
-        """ Return root id """
-        return self._root_id
-
-    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
-        """ return object with given id """
-
-        if obj_id != self._root_id:
-            raise KeyError(f"{obj_id} not found")
-
-        # create a root group with no links or attributes
-        group_json = {"links": {}, "attributes": {}, "cpl": {}}
-        group_json["created"] = time.time()
-
-        return group_json
-
-    def getAttribute(self, obj_id, name, includeData=True):
-        """
-        Get attribute given an object id and name
-        returns: JSON object
-        """
-        raise IOError("not supported")
-
-    def getDatasetValues(self, obj_id, sel=None, dtype=None):
-        """
-        Get values from dataset identified by obj_id.
-        If a slices list or tuple is provided, it should have the same
-        number of elements as the rank of the dataset.
-        """
-
-        # just return a zero array
-        arr = np.zeros(sel.shape, dtype=dtype)
-
-        return arr
-
-    def open(self):
-        """ Open data source for reading """
-        self.log.debug("H5NullReader open")
-        if self.db is None:
-            # no db set yet
-            self.log.warning("no self.db db_ref")
-            raise ValueError("no db")
-
-        if self._is_closed:
-            if not self._root_id:
-                if self.db.root_id:
-                    # use the db root id
-                    self._root_id = self.db.root_id
-                else:
-                    # create a new root id
-                    self._root_id = createObjId(obj_type="groups")
-            self._is_closed = False
-        return self._root_id
-
-    def close(self):
-        """ close any open handles to the storage """
-        self._is_closed = True
-
-    def isClosed(self):
-        """ return True if handle is closed """
-        return self._is_closed
-
-    def getStats(self):
-        """ return a dictionary object with at minimum the following keys:
-            'created': creation time
-            'lastModified': modificationTime
-            'owner': owner name
-        """
-        stats = {}
-        stats['created'] = 0
-        stats["lastModified"] = 0
-        stats['owner'] = ""
-        return stats
-
-
-class H5NullWriter(H5Writer):
-    """
-    This class can be used by HDF5DB as a default no-op writer
-    """
-
-    def __init__(
-        self,
-        filepath,
-        append=False,
-        no_data=False,
-        app_logger=None
-    ):
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-
-        if append:
-            raise IOError("append is not supprot for H5NullWriter")
-
-        super().__init__(filepath, no_data=no_data, app_logger=app_logger)
-        self.log.debug("H5NullWriter.__init__")
-        self._root_id = None
-        self._is_closed = True
-
-    def open(self):
-        """ open storage handle, return root_id"""
-        self.log.debug("H5NullWriter open")
-        if not self._is_closed:
-            return self._root_id  # already open
-
-        if self.db is None:
-            # no db set yet
-            self.log.warning("no self.db db_ref")
-            raise ValueError("no db")
-
-        if not self._root_id:
-            if self.db.root_id:
-                self._root_id = self.db.root_id
-            else:
-                self._root_id = createObjId(obj_type="groups")
-        self._is_closed = False
-        return self._root_id
-
-    def flush(self):
-        """ Write dirty items """
-        self.log.debug("H5NullWriter> flush")
-        # Null writer is unable to actually persist anything, so return False
-        return False
-
-    def close(self):
-        """ close storage handle """
-        self.log.debug("H5NullWriter.close")
-        self._is_closed = True
-
-    def isClosed(self):
-        """ return True if handle is closed """
-        return self._is_closed
-
-    def getStats(self):
-        """ return a dictionary object with at minimum the following keys:
-            'created': creation time
-            'lastModified': modificationTime
-            'owner': owner name
-        """
-        stats = {}
-        stats['created'] = 0
-        stats["lastModified"] = 0
-        stats['owner'] = ""
-        return stats
+from .h5reader import H5Reader, H5NullReader
+from .h5writer import H5Writer, H5NullWriter
 
 
 class Hdf5db:
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index e0053033..819126a6 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -10,8 +10,10 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import logging
+import time
+import numpy as np
 
-from ..objid import getCollectionForId, getUuidFromId
+from ..objid import getCollectionForId, getUuidFromId, createObjId
 
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray, bytesToArray
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 586c7852..c8b12f07 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -214,7 +214,7 @@ def open(self):
             raise IOError(404, "Location is a folder, not a file")
 
         root_id = domain_json["root"]
-        self.log.debug("hsds_writer got root_id:", root_id)
+        self.log.debug(f"hsds_writer got root_id: {root_id}")
 
         self._root_id = root_id
 
diff --git a/testall.py b/testall.py
index 45e06106..a33cb327 100755
--- a/testall.py
+++ b/testall.py
@@ -24,6 +24,8 @@
     "h5json_writer_test",
     "h5py_reader_test",
     "h5py_writer_test",
+    "hsds_reader_test",
+    "hsds_writer_test",
 ]
 
 use_hsds = True
@@ -60,7 +62,6 @@
     if rc != 0:
         sys.exit("FAILED")
 shutil.rmtree("./out", ignore_errors=True)
-os.remove("hdf5dbtest.log")
 
 os.chdir("test/integ")
 
@@ -77,8 +78,6 @@
         sys.exit("FAILED")
 shutil.rmtree("./h5_out", ignore_errors=True)
 shutil.rmtree("./json_out", ignore_errors=True)
-os.remove("h5tojson.log")
-os.remove("jsontoh5.log")
 
 os.chdir("..")
 print("Testing suite: Success!")

From 0542e7d9807932e228fddb33ba6e2bfe3384863e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 11 Aug 2025 22:59:16 +0100
Subject: [PATCH 065/129] fix for remove links

---
 src/h5json/h5pystore/h5py_writer.py | 12 +++++++++-
 src/h5json/h5writer.py              |  2 +-
 src/h5json/hdf5db.py                | 18 ++++++++++----
 src/h5json/hsdsstore/hsds_writer.py | 37 +++++++++++++++++++++++++++--
 test/unit/h5py_writer_test.py       | 12 +++++++++-
 test/unit/hsds_writer_test.py       |  6 +++++
 6 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index dd543a38..e820330c 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -268,9 +268,19 @@ def _createDatatype(self, parent, ctype_json, name=None):
     def _createObjects(self, parent, links_json, visited=set()):
         """ create child object in the given group, recurse for any sub-groups """
 
-        for title in links_json:
+        titles = list(links_json.keys())
+        for title in titles:
             link_json = links_json[title]
             link_class = link_json["class"]
+            if "DELETE" in link_json:
+                if title in parent:
+                    # delete the link
+                    self.log.debug(f"deleting link {title}")
+                    del parent[title]
+                # update the link json
+                del links_json[title]
+                continue
+
             if link_class == "H5L_TYPE_SOFT" and title not in parent:
                 h5path = link_json["h5path"]
                 parent[title] = h5py.SoftLink(h5path)
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index a27e76cb..a4b9a522 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -118,7 +118,7 @@ def __init__(
             self.log = logging.getLogger()
 
         if append:
-            raise IOError("append is not supprot for H5NullWriter")
+            raise IOError("append is not supported for H5NullWriter")
 
         super().__init__(filepath, no_data=no_data, app_logger=app_logger)
         self.log.debug("H5NullWriter.__init__")
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 220511e2..f7b2c97d 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -651,8 +651,12 @@ def getLinks(self, grp_id):
         links = grp_json["links"]
         names = []
         for name in links:
-            if links[name] is not None:
-                names.append(name)
+            link_json = links[name]
+            if link_json is None:
+                continue
+            if "DELETE" in link_json:
+                continue  # deleted link
+            names.append(name)
         return names
 
     def getLink(self, grp_id, name):
@@ -663,11 +667,12 @@ def getLink(self, grp_id, name):
         if name not in links:
             self.log.info(f"Link [{name}] not found in {grp_id}")
             return None
-        if links[name] is None:
+        link_json = links[name]
+        if "DELETED" in link_json:
             self.log.info(f"Link {name} in {grp_id} has been deleted")
             return None
 
-        return links[name]
+        return link_json
 
     def _addLink(self, grp_id, name, link_json):
         obj_json = self.getObjectById(grp_id)
@@ -708,8 +713,11 @@ def deleteLink(self, grp_id, name):
         links = grp_json["links"]
         if name not in links:
             raise KeyError(f"Link [{name}] not found in {grp_id}")
-        links[name] = None  # mark for deletion
+        link_json = links[name]
+        link_json["DELETE"] = time.time()  # mark for deletion
         self.make_dirty(grp_id)
+        grp_json = self.getObjectById(grp_id)
+        links = grp_json["links"]
 
     def createGroup(self, cpl=None):
         """ Create a new group """
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index c8b12f07..92f73fd6 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -344,6 +344,7 @@ def updateLinks(self, grp_ids):
 
         self.log.debug("hsds_writer> updateLinks")
         items = {}  # dict which will hold a map of grp ids to links to create
+        removals = {}  # map of grp_ids to link titles to be deleted
         count = 0
 
         for grp_id in grp_ids:
@@ -351,12 +352,23 @@ def updateLinks(self, grp_ids):
                 continue  # ignore datasets and datatypes
             grp_json = self.db.getObjectById(grp_id)
             grp_links = grp_json["links"]
-            for link_title in grp_links:
+            link_titles = list(grp_links.keys())
+            for link_title in link_titles:
                 link_json = grp_links[link_title]
                 if "created" not in link_json:
                     self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
                 created = link_json["created"]
-                if created > self._last_flush_time:
+                if "DELETE" in link_json:
+                    if created > self._last_flush_time:
+                        # link hasn't been created yet
+                        msg = f"hsds_writer> {grp_id}: link: {link_title} deleted before flush"
+                        self.log.debug(msg)
+                    else:
+                        # link has been persisted, remove
+                        if grp_id not in removals:
+                            removals[grp_id] = set()
+                        removals[grp_id].add(link_title)
+                elif created > self._last_flush_time:
                     self.log.debug(f"hsds_writer> {grp_id}: new link: {link_title}")
                     count += 1
                     # new link, add to our list
@@ -380,6 +392,27 @@ def updateLinks(self, grp_ids):
                         raise IOError(f"unexpected link class: {link_class}")
                     links[link_title] = new_link
                     self.log.debug(f"setting link {link_title} to {new_link}")
+                else:
+                    self.log.debug(f"link {link_title} has already been persisted")
+
+        if removals:
+            # TBD: hsds doesn't have a multiple object link deletion operation yet
+            # so make one request per object id
+            for grp_id in removals:
+                titles = removals[grp_id]
+                params = {"titles": "/".join(titles)}
+                del_rsp = self.http_conn.DELETE("/groups/" + grp_id + links, params=params)
+                if del_rsp.status_code != 200:
+                    self.log.error("failed to delete links for grp: {grp_id} titles: {titles}")
+                    raise IOError("hsds_writer failed to delete links")
+                else:
+                    self.log.debug(f"hsds_writer> {grp_id} deleted {len(titles)} links")
+                    self._lastModified = time.time()
+                    # remove links from link_json in db
+                    grp_json = self.db.getObjectById(grp_id)
+                    grp_links = grp_json["links"]
+                    for title in titles:
+                        del grp_links[title]
 
         if items:
             body = {"grp_ids": items}
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index e2763795..259d7937 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -150,6 +150,17 @@ def testSimple(self):
             g2 = f["g2"]
             self.assertTrue("g2.1" in g2)
 
+        # create a link, then delete before flushing
+        db.open()
+        tmp_grp_id = db.createGroup("tmp_group")
+        db.createHardLink(g2_id, "tmp_group", tmp_grp_id)
+        db.deleteLink(g2_id, "tmp_group")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            g2 = f["g2"]
+            self.assertFalse("tmp_group" in g2)
+
         db.open()
         sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
         arr = np.zeros((), dtype=np.int32)
@@ -546,7 +557,6 @@ def testReaderWithUpdate(self):
         dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
         db.createAttribute(dset111_id, "attr3", "hello")
         self.assertFalse(db.closed)
-        print("test - db.close()")
         db.close()
 
         with h5py.File(file_out) as f:
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index a9e9b877..8f12c920 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -111,6 +111,12 @@ def testSimple(self):
         db.createCustomLink(g2_id, "cust", {"foo": "bar"})
         db.flush()
 
+        # create a link, then delete before flushing
+        tmp_grp_id = db.createGroup("tmp_group")
+        db.createHardLink(g1_1_id, "tmp_group", tmp_grp_id)
+        db.deleteLink(g1_1_id, "tmp_group")
+        db.flush()
+
         # validate - check that links got updated
         http_rsp = http_conn.GET(f"/groups/{g2_id}/links")
         self.assertEqual(http_rsp.status_code, 200)

From 5bbb0f34ae361cd3179bfc9a1624ba2e57130c6a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 17 Aug 2025 14:52:21 +0100
Subject: [PATCH 066/129] use DELTED to indicate link deletions

---
 src/h5json/h5pystore/h5py_writer.py | 2 +-
 src/h5json/hdf5db.py                | 4 ++--
 src/h5json/hsdsstore/hsds_writer.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index e820330c..1ee9570e 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -272,7 +272,7 @@ def _createObjects(self, parent, links_json, visited=set()):
         for title in titles:
             link_json = links_json[title]
             link_class = link_json["class"]
-            if "DELETE" in link_json:
+            if "DELETED" in link_json:
                 if title in parent:
                     # delete the link
                     self.log.debug(f"deleting link {title}")
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index f7b2c97d..68f9b17c 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -654,7 +654,7 @@ def getLinks(self, grp_id):
             link_json = links[name]
             if link_json is None:
                 continue
-            if "DELETE" in link_json:
+            if "DELETED" in link_json:
                 continue  # deleted link
             names.append(name)
         return names
@@ -714,7 +714,7 @@ def deleteLink(self, grp_id, name):
         if name not in links:
             raise KeyError(f"Link [{name}] not found in {grp_id}")
         link_json = links[name]
-        link_json["DELETE"] = time.time()  # mark for deletion
+        link_json["DELETED"] = time.time()  # mark for deletion
         self.make_dirty(grp_id)
         grp_json = self.getObjectById(grp_id)
         links = grp_json["links"]
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 92f73fd6..72864db5 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -358,7 +358,7 @@ def updateLinks(self, grp_ids):
                 if "created" not in link_json:
                     self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
                 created = link_json["created"]
-                if "DELETE" in link_json:
+                if "DELETED" in link_json:
                     if created > self._last_flush_time:
                         # link hasn't been created yet
                         msg = f"hsds_writer> {grp_id}: link: {link_title} deleted before flush"

From b05941f79e7b35b4fe5372bb2d1bd58641a1f247 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 17 Aug 2025 17:56:29 +0100
Subject: [PATCH 067/129] persist attr deletion

---
 src/h5json/h5pystore/h5py_writer.py |  7 +++++
 src/h5json/hdf5db.py                | 45 +++++++++++++-------------
 src/h5json/hsdsstore/hsds_writer.py | 49 ++++++++++++++++++++++++++++-
 test/unit/h5py_writer_test.py       | 19 +++++++++++
 test/unit/hdf5db_test.py            | 11 +++++++
 test/unit/hsds_writer_test.py       | 41 ++++++++++++++++++++++++
 6 files changed, 150 insertions(+), 22 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 1ee9570e..2cb42c0b 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -394,6 +394,13 @@ def updateAttributes(self, obj_id, obj):
         attrs = obj_json["attributes"]
         for name in attrs:
             attr_json = attrs[name]
+            if "DELETED" in attr_json:
+                if name in obj.attrs:
+                    # delete the attribute
+                    self.log.debug(f"h5py_writer - delete attribute {name}")
+                    del obj.attrs[name]
+                else:
+                    pass  # already deleted or never added
             if "created" in attr_json and attr_json["created"] < self._flush_time:
                 # attribute should be saved already
                 continue
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 68f9b17c..5f51c972 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -377,46 +377,48 @@ def getDtype(self, obj_json):
 
         return dtype
 
-    def getAttribute(self, obj_id, name, includeData=True):
+    def getAttributes(self, obj_id):
         """
-        Get attribute given an object id and name
+        Get attributes given an object id and name
         returns: JSON object
         """
 
         obj_json = self.getObjectById(obj_id)
         attrs = obj_json["attributes"]
+        names = []
 
-        if name not in attrs:
-            msg = f"Attribute: [{name}] not found in object: {obj_id}"
-            self.log.info(msg)
-            return None
-        if attrs[name] is None:
-            msg = f"Attribute: [{name}] has been deleted"
-            self.log.info(None)
-            return None
-
-        attr_json = attrs[name]
+        for name in attrs:
+            attr_json = attrs[name]
+            if attr_json is None:
+                continue
+            if "DELETED" in attr_json:
+                continue  # deleted attr
+            names.append(name)
 
-        return attr_json
+        return names
 
-    def getAttributes(self, obj_id):
+    def getAttribute(self, obj_id, name, includeData=True):
         """
-        Get attributes given an object id and name
+        Get attribute given an object id and name
         returns: JSON object
         """
 
+        attr_names = self.getAttributes(obj_id)
+        if name not in attr_names:
+            return None
+
         obj_json = self.getObjectById(obj_id)
         attrs = obj_json["attributes"]
-        names = []
-        for name in attrs:
-            if attrs[name] is not None:
-                names.append(name)
 
-        return names
+        attr_json = attrs[name]
+
+        return attr_json
 
     def getAttributeValue(self, obj_id, name):
         """ Return NDArray of the given attribute value """
         attr_json = self.getAttribute(obj_id, name)
+        if attr_json is None:
+            raise KeyError(f"attribute {name} not found")
         shape_json = attr_json["shape"]
         if shape_json["class"] == "H5S_NULL":
             # no value for empty shape attributes
@@ -530,7 +532,8 @@ def deleteAttribute(self, obj_id, name):
         attrs_json = obj_json["attributes"]
         if name not in attrs_json:
             raise KeyError(f"attribute [{name}] not found in {obj_id}")
-        attrs_json[name] = None  # mark key for deletion
+        attr_json = attrs_json[name]
+        attr_json["DELETED"] = time.time()  # mark key for deletion
 
         self.make_dirty(obj_id)
 
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 72864db5..45e24fc0 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -424,11 +424,24 @@ def updateLinks(self, grp_ids):
                 self.log.debug(f"hsds_writer> {grp_id} {count} links updated")
                 self._lastModified = time.time()
 
+    def _deleteAttribute(self, obj_id, attr_name):
+        # delete the given attribute
+
+        col_name = getCollectionForId(obj_id)
+        req = f"/{col_name}/{obj_id}/attributes/{attr_name}"
+        http_rsp = self.http_conn.DELETE(req)
+        if http_rsp.status_code != 200:
+            self.log.error("failed to delete attribute for obj: {obj_id} name: {attr_name}")
+            raise IOError("hsds_writer failed to delete attribute")
+
     def updateAttributes(self, obj_ids):
         """ update any modified links of the given objects """
 
         self.log.debug("hsds_writer> updateAttributes")
         items = {}  # dict which will hold a map of objects ids to attributes to create
+        removals = {}  # map of obj_ids to attributes to be deleted
+        separator = '|'  # use this character to join attribute names for deletion
+
         count = 0
 
         for obj_id in obj_ids:
@@ -436,10 +449,26 @@ def updateAttributes(self, obj_ids):
             obj_attrs = obj_json["attributes"]
             for attr_name in obj_attrs:
                 attr_json = obj_attrs[attr_name]
+
                 if "created" not in attr_json:
                     self.log.error(f"hsds_writer> expected created timestamp in attr: {attr_json}")
                 created = attr_json["created"]
-                if created > self._last_flush_time:
+                if "DELETED" in attr_json:
+                    if created > self._last_flush_time:
+                        # attribute hasn't been created yet
+                        msg = f"hsds_writer> {obj_id}: attr: {attr_name} deleted before flush"
+                        self.log.debug(msg)
+                    else:
+                        # attribute has been persisted, remove
+                        if attr_name.find(separator) != -1:
+                            # need to delete individually
+                            self._deleteAttribute(obj_id, attr_name)
+                        else:
+                            # can delete in a batch
+                            if obj_id not in removals:
+                                removals[obj_id] = set()
+                            removals[obj_id].add(attr_name)
+                elif created > self._last_flush_time:
                     self.log.debug(f"hsds_writer> {obj_id} attribute {attr_name} created")
                     count += 1
                     # new attribute, add to our list
@@ -447,6 +476,24 @@ def updateAttributes(self, obj_ids):
                         items[obj_id] = {"attributes": {}}
                     attrs = items[obj_id]["attributes"]
                     attrs[attr_name] = attr_json
+                else:
+                    self.log.debug(f"hsds_writer> {obj_id}: attr: {attr_name} has already been deleted")
+
+        if removals:
+            # TBD: hsds doesn't have a multiple object attribute deletion operation yet
+            # so make one request per object id
+            # Delete with custom separator
+
+            for obj_id in removals:
+                attr_names = removals[obj_id]
+                params = {"attr_names": separator.join(attr_names)}
+                params["separator"] = separator
+                collection = getCollectionForId(obj_id)
+                req = f"/{collection}/{obj_id}/attributes"
+                rsp = self.http_conn.DELETE(req, params=params)
+                if rsp.status_code != 200:
+                    self.log.error("failed to delete attribute for obj: {obj_id}")
+                    raise IOError("hsds_writer failed to delete attributes")
 
         if items:
             body = {"obj_ids": items}
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 259d7937..aa481dfd 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -132,7 +132,9 @@ def testSimple(self):
             self.assertTrue("slink" in g2)
 
         db.open()
+        db.createAttribute(g1_id, "a1", "hello")
         db.createAttribute(g1_id, "a2", "bye-bye")
+        self.assertEqual(len(db.getAttributes(g1_id)), 2)
         db.close()
 
         with h5py.File(filepath) as f:
@@ -141,6 +143,19 @@ def testSimple(self):
             self.assertTrue("a1" in g1.attrs)
             self.assertTrue("a2" in g1.attrs)
 
+        db.open()
+        # test deleting an attribute
+        db.deleteAttribute(g1_id, "a1")
+        self.assertEqual(len(db.getAttributes(g1_id)), 1)
+        self.assertEqual(db.getAttribute(g1_id, "a1"), None)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            g1 = f["g1"]
+            self.assertEqual(len(g1.attrs), 1)
+            self.assertFalse("a1" in g1.attrs)
+            self.assertTrue("a2" in g1.attrs)
+
         db.open()
         g21 = db.createGroup()
         db.createHardLink(g2_id, "g2.1", g21)
@@ -154,7 +169,11 @@ def testSimple(self):
         db.open()
         tmp_grp_id = db.createGroup("tmp_group")
         db.createHardLink(g2_id, "tmp_group", tmp_grp_id)
+        del_link = db.getLink(g2_id, "tmp_group")
+        self.assertTrue(del_link is not None)
         db.deleteLink(g2_id, "tmp_group")
+        self.assertEqual(db.getLink(g2_id, "tmp_group"), None)
+
         db.close()
 
         with h5py.File(filepath) as f:
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 2722eaa6..7a882259 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -139,6 +139,17 @@ def testGroup(self):
 
         ret = db.getLink(g2_id, "not_a_link")
         self.assertTrue(ret is None)
+
+        db.createAttribute(g1_id, "a1", "hello")
+        db.createAttribute(g1_id, "a2", "bye-bye")
+        self.assertEqual(len(db.getAttributes(g1_id)), 2)
+        a1_attr = db.getAttribute(g1_id, "a1")
+        self.assertEqual(a1_attr["value"], "hello")
+
+        db.deleteAttribute(g1_id, "a1")
+        self.assertEqual(len(db.getAttributes(g1_id)), 1)
+        self.assertEqual(db.getAttribute(g1_id, "a1"), None)
+
         db.close()
 
     def testNullSpaceAttribute(self):
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index 8f12c920..ca3c2579 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -125,9 +125,50 @@ def testSimple(self):
         g2links = g2links_json["links"]
         self.assertTrue(len(g2links), 2)  # custom link will be ignored
 
+        db.createAttribute(g1_id, "a1", "hello")
         db.createAttribute(g1_id, "a2", "bye-bye")
         db.flush()
 
+        # validate - check that attributes got created
+        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        attrs_json = rsp_json["attributes"]
+        self.assertEqual(len(attrs_json), 2)
+
+        # delete an attribute
+        db.deleteAttribute(g1_id, "a1")
+        db.flush()
+
+        # validate - check that the attribute got deleted
+        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        attrs_json = rsp_json["attributes"]
+        self.assertEqual(len(attrs_json), 1)
+
+        # create an attribute that happens to use the separator character
+        db.createAttribute(g1_id, "a|z", "goofy")
+        db.flush()
+
+        # validate - check that attributes got created
+        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        attrs_json = rsp_json["attributes"]
+        self.assertEqual(len(attrs_json), 2)
+
+        # delete an attribute
+        db.deleteAttribute(g1_id, "a|z")
+        db.flush()
+
+        # validate - check that the attribute got deleted
+        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
+        self.assertEqual(http_rsp.status_code, 200)
+        rsp_json = http_rsp.json()
+        attrs_json = rsp_json["attributes"]
+        self.assertEqual(len(attrs_json), 1)
+
         g21 = db.createGroup()
         db.createHardLink(g2_id, "g2.1", g21)
         db.flush()

From e4e0105c2843826723350ce86b64ef0b255248d0 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 5 Sep 2025 12:07:11 +0100
Subject: [PATCH 068/129] fix key name for creationPropertyList

---
 src/h5json/hdf5db.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 5f51c972..9bbfa702 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -729,9 +729,9 @@ def createGroup(self, cpl=None):
         grp_id = createObjId("groups", root_id=self.root_id)
         group_json = {"attributes": {}, "links": {}}
         if cpl:
-            group_json["cpl"] = cpl
+            group_json["creationProperties"] = cpl
         else:
-            group_json["cpl"] = {}
+            group_json["creationProperties"] = {}
         group_json["created"] = time.time()
         self.db[grp_id] = group_json
         self._new_objects.add(grp_id)
@@ -756,7 +756,7 @@ def createCommittedType(self, datatype, cpl=None):
 
         type_json = getTypeItem(dt)  # get canonical json description of datatype
 
-        ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
+        ctype_json = {"type": type_json, "attributes": {}, "creationProperties": cpl}
         ctype_json["created"] = time.time()
         self.db[ctype_id] = ctype_json
         self._new_objects.add(ctype_id)
@@ -795,9 +795,9 @@ def createDataset(
 
         dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
         if cpl:
-            dset_json["cpl"] = cpl
+            dset_json["creationProperties"] = cpl
         else:
-            dset_json["cpl"] = {}
+            dset_json["creationProperties"] = {}
 
         dset_id = createObjId("datasets", root_id=self.root_id)
         self.db[dset_id] = dset_json

From 3260929d20b2ea194b28ae71851559b788a13f22 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 11:06:44 +0100
Subject: [PATCH 069/129] use client create time for new link if provided

---
 src/h5json/hsdsstore/hsds_writer.py |  2 +-
 test/unit/hdf5db_test.py            | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
index 45e24fc0..9166937a 100644
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -376,7 +376,7 @@ def updateLinks(self, grp_ids):
                         items[grp_id] = {"links": {}}
                     links = items[grp_id]["links"]
                     link_class = link_json["class"]
-                    new_link = {"class": link_class}
+                    new_link = {"class": link_class, "created": created}
                     # convert to hsds representation
                     if link_class == "H5L_TYPE_HARD":
                         new_link["id"] = link_json["id"]
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 7a882259..8dc0e99b 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -332,6 +332,29 @@ def testCreateVlenReferenceAttribute(self):
 
         db.close()
 
+    def testAttributeCreateOrder(self):
+        titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")
+        cpl = {"CreateOrder": True}
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        for title in titles:
+            db.createAttribute(g1_id, title, title)
+        g2_id = db.createGroup(cpl=cpl)
+        db.createHardLink(root_id, "g2", g2_id)
+        for title in titles:
+            db.createAttribute(g2_id, title, title)
+        print("g1 attributes:", db.getAttributes(g1_id))
+        print("g2 attributes:", db.getAttributes(g2_id))
+        self.assertEqual(sorted(db.getAttributes(g1_id)), sorted(titles))
+        #self.assertEqual(db.getAttributes(g2_id), titles)
+        db.close()
+
+
+
+
+
     def testCommittedType(self):
         db = Hdf5db(app_logger=self.log)
         root_id = db.open()

From 1ea0bff5cf4fd45e2bba0e073814fa6a6ed9a5de Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 16:17:23 +0100
Subject: [PATCH 070/129] make reference type simple wrapper for str uuid

---
 src/h5json/h5py_util.py  | 109 ---------------------------------------
 src/h5json/hdf5db.py     |   9 ++--
 src/h5json/hdf5dtype.py  |  47 +++++++++--------
 src/h5json/objid.py      |  23 +++++++++
 test/unit/hdf5db_test.py |   8 +--
 test/unit/objid_test.py  |   3 +-
 6 files changed, 56 insertions(+), 143 deletions(-)
 delete mode 100644 src/h5json/h5py_util.py

diff --git a/src/h5json/h5py_util.py b/src/h5json/h5py_util.py
deleted file mode 100644
index ebe2dbdb..00000000
--- a/src/h5json/h5py_util.py
+++ /dev/null
@@ -1,109 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-
-import h5py
-import numpy as np
-
-from . import hdf5dtype
-
-
-def is_reference(val):
-    """ Return True if the type or value is a Reference """
-
-    if isinstance(val, object) and val.__class__.__name__ == "Reference":
-        return True
-    elif isinstance(val, type) and val.__name__ == "Reference":
-        return True
-    else:
-        return False
-
-
-def is_regionreference(val):
-    """ Return True if the type or value is a RegionReference """
-
-    if isinstance(val, object) and val.__class__.__name__ == "RegionReference":
-        return True
-    elif isinstance(val, type) and val.__name__ == "RegionReference":
-        return True
-
-    return False
-
-
-def has_reference(dtype):
-    """ return True if the dtype (or a sub-type) is a Reference type """
-    has_ref = False
-    if not isinstance(dtype, np.dtype):
-        return False
-    if len(dtype) > 0:
-        for name in dtype.fields:
-            item = dtype.fields[name]
-            if has_reference(item[0]):
-                has_ref = True
-                break
-    elif dtype.metadata and "ref" in dtype.metadata:
-        basedt = dtype.metadata["ref"]
-        has_ref = is_reference(basedt)
-    elif dtype.metadata and "vlen" in dtype.metadata:
-        basedt = dtype.metadata["vlen"]
-        has_ref = has_reference(basedt)
-    return has_ref
-
-
-def convert_dtype(srcdt, to_h5py=True):
-    """Return a dtype based on input dtype, converting any Reference types from
-    h5py style to h5json and vice-versa.
-    """
-
-    if len(srcdt) > 0:
-        fields = []
-        for name in srcdt.fields:
-            item = srcdt.fields[name]
-            # item is a tuple of dtype and integer offset
-            field_dt = convert_dtype(item[0], to_h5py=to_h5py)
-            fields.append((name, field_dt))
-        tgt_dt = np.dtype(fields)
-    else:
-        # check if this a "special dtype"
-        if srcdt.metadata and "ref" in srcdt.metadata:
-            ref = srcdt.metadata["ref"]
-            if is_reference(ref):
-                if to_h5py:
-                    tgt_dt = h5py.special_dtype(ref=h5py.Reference)
-                else:
-                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.Reference)
-            elif is_regionreference(ref):
-                if to_h5py:
-                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
-                else:
-                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.RegionReference)
-            else:
-                msg = f"Unexpected ref type: {srcdt}"
-                raise TypeError(msg)
-        elif srcdt.metadata and "vlen" in srcdt.metadata:
-            src_vlen = srcdt.metadata["vlen"]
-            if isinstance(src_vlen, np.dtype):
-                tgt_base = convert_dtype(src_vlen, to_h5py=to_h5py)
-            else:
-                tgt_base = src_vlen
-            if to_h5py:
-                tgt_dt = h5py.special_dtype(vlen=tgt_base)
-            else:
-                tgt_dt = hdf5dtype.special_dtype(vlen=tgt_base)
-        elif srcdt.kind == "U":
-            # use vlen for unicode strings
-            if to_h5py:
-                tgt_dt = h5py.special_dtype(vlen=str)
-            else:
-                tgt_dt = hdf5dtype.special_dtype(vlen=str)
-        else:
-            tgt_dt = srcdt
-    return tgt_dt
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 9bbfa702..91884f57 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -15,7 +15,7 @@
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import resize_dataset
-from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId
+from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
 from .apiversion import _apiver
 from .h5reader import H5Reader, H5NullReader
@@ -279,11 +279,12 @@ def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
         self.log.debug(f"getObjectById {obj_id}")
         self._checkReader()
-        if obj_id not in self.db or refresh:
+        tag = getHashTagForId(obj_id)
+        if tag not in self.db or refresh:
             # load the obj from the reader
             obj_json = self.reader.getObjectById(obj_id)
-            self.db[obj_id] = obj_json
-        obj_json = self.db[obj_id]
+            self.db[tag] = obj_json
+        obj_json = self.db[tag]
 
         return obj_json
 
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index bbef116d..8799836a 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -13,6 +13,8 @@
 import weakref
 import numpy as np
 
+from .objid import getHashTagForId
+
 
 numpy_integer_types = (np.int8, np.uint8, np.int16, np.int16, np.int32, np.uint32, np.int64, np.uint64)
 numpy_float_types = (np.float16, np.float32, np.float64)
@@ -28,42 +30,43 @@ def id(self):
         """Low-level identifier appropriate for this object"""
         return self._id
 
-    @property
-    def objref(self):
-        """Weak reference to object"""
-        return self._objref  # return weak ref to ref'd object
-
     def __init__(self, bind):
         """Create a new reference by binding to
-        a group/dataset/committed type
+        a uuid
         """
-        self._id = bind._id
-        self._objref = weakref.ref(bind)
+        if not bind:
+            self._id = None
+        else:
+            if isinstance(bind, bytes):
+                bind = bind.decode()
+
+            if not isinstance(bind, str):
+                raise TypeError("Expected string id")
+
+            self._id = getHashTagForId(bind)
 
     def __repr__(self):
         # TBD: this is not consistent with hsds or h5py...
-        if not isinstance(self._id.id, str):
-            raise TypeError("Expected string id")
-        item = None
-
-        collection_type = self._id.collection_type
-        item = f"{collection_type}/{self._id.id}"
-        return item
+        return f"<HDF5 object reference: {self._id}>"
 
     def tolist(self):
-        if type(self._id.id) is not str:
+        if type(self._id) is not str:
             raise TypeError("Expected string id")
-        if self._id.objtype_code == "d":
+        if not self._id:
+            return [("",),]
+
+        objtype_code = self._id[0]
+        if objtype_code == "d":
             return [
-                ("datasets/" + self._id.id),
+                ("datasets/" + self._id),
             ]
-        elif self._id.objtype_code == "g":
+        elif objtype_code == "g":
             return [
-                ("groups/" + self._id.id),
+                ("groups/" + self._id),
             ]
-        elif self._id.objtype_code == "t":
+        elif objtype_code == "t":
             return [
-                ("datatypes/" + self._id.id),
+                ("datatypes/" + self._id),
             ]
         else:
             raise TypeError("Unexpected id type")
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
index 57b5316c..fa82e0ef 100644
--- a/src/h5json/objid.py
+++ b/src/h5json/objid.py
@@ -130,6 +130,29 @@ def getCollectionForId(obj_id):
     return collection
 
 
+def getHashTagForId(id):
+    """ Return canonical <collection_char>-<UUID> """
+
+    if not isinstance(id, str):
+        raise ValueError("Expected string type")
+
+    if not id:
+        raise ValueError("Empty id")
+
+    parts = id.split("/")
+    tag = parts[-1]
+
+    # add a prefix tag if not already present
+    if len(tag) < UUID_LEN:
+        raise ValueError(f"unexpected uuid: {tag}")
+    if tag[1] != '-':
+        if len(parts) != 2:
+            raise ValueError(f"unexpected obj id: {id}")
+        collection = parts[0]
+        tag = _getPrefixForCollection(collection) + '-' + tag
+    return tag
+
+
 def isRootObjId(id):
     """returns true if this is a root id (only for v2 schema)"""
     if not isSchema2Id(id):
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 8dc0e99b..1eca8e2a 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -345,16 +345,10 @@ def testAttributeCreateOrder(self):
         db.createHardLink(root_id, "g2", g2_id)
         for title in titles:
             db.createAttribute(g2_id, title, title)
-        print("g1 attributes:", db.getAttributes(g1_id))
-        print("g2 attributes:", db.getAttributes(g2_id))
         self.assertEqual(sorted(db.getAttributes(g1_id)), sorted(titles))
-        #self.assertEqual(db.getAttributes(g2_id), titles)
+        self.assertEqual(tuple(db.getAttributes(g2_id)), titles)
         db.close()
 
-
-
-
-
     def testCommittedType(self):
         db = Hdf5db(app_logger=self.log)
         root_id = db.open()
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
index d74ec102..7104e9bc 100755
--- a/test/unit/objid_test.py
+++ b/test/unit/objid_test.py
@@ -12,7 +12,7 @@
 import unittest
 
 from h5json.objid import isRootObjId, isValidUuid, validateUuid
-from h5json.objid import createObjId, getCollectionForId, getUuidFromId
+from h5json.objid import createObjId, getCollectionForId, getUuidFromId, getHashTagForId
 from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
 
 
@@ -203,6 +203,7 @@ def testGetDataTypeId(self):
             self.assertTrue(isValidUuid(test_id))
             self.assertEqual(getCollectionForId(test_id), "datatypes")
             self.assertEqual(getUuidFromId(test_id), test_uuid)
+            self.assertEqual(getHashTagForId(test_id), "t-" + test_uuid)
 
 
 if __name__ == "__main__":

From 46ff5face0fef3c83b6636535659f64b4fad27ac Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 16:21:55 +0100
Subject: [PATCH 071/129] fix syntax for ci yaml

---
 .github/workflows/ci.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ba618d56..dcfdc512 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -83,8 +83,9 @@ jobs:
 
       - name: Run tests
         shell: bash
-        HS_ENDPOINT: http://localhost:5101
-        HS_USERNAME: test_user1
-        HS_PASSWORD: test
+        env:
+          HS_ENDPOINT: http://localhost:5101
+          HS_USERNAME: test_user1
+          HS_PASSWORD: test
         run: |
           python testall.py

From e88f85f5ad5734df4bf885cbea201657b59c8365 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 16:25:50 +0100
Subject: [PATCH 072/129] remove python 3.9 support

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dcfdc512..554cb6d5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
     runs-on: ${{ matrix.os }}
 
     steps:

From 51f2a9b4441152358ed6f55d8b1ae8e77bd19aef Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 16:28:07 +0100
Subject: [PATCH 073/129] revert h5py_util.py

---
 src/h5json/h5py_util.py | 109 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 src/h5json/h5py_util.py

diff --git a/src/h5json/h5py_util.py b/src/h5json/h5py_util.py
new file mode 100644
index 00000000..ebe2dbdb
--- /dev/null
+++ b/src/h5json/h5py_util.py
@@ -0,0 +1,109 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import h5py
+import numpy as np
+
+from . import hdf5dtype
+
+
+def is_reference(val):
+    """ Return True if the type or value is a Reference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "Reference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "Reference":
+        return True
+    else:
+        return False
+
+
+def is_regionreference(val):
+    """ Return True if the type or value is a RegionReference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "RegionReference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "RegionReference":
+        return True
+
+    return False
+
+
+def has_reference(dtype):
+    """ return True if the dtype (or a sub-type) is a Reference type """
+    has_ref = False
+    if not isinstance(dtype, np.dtype):
+        return False
+    if len(dtype) > 0:
+        for name in dtype.fields:
+            item = dtype.fields[name]
+            if has_reference(item[0]):
+                has_ref = True
+                break
+    elif dtype.metadata and "ref" in dtype.metadata:
+        basedt = dtype.metadata["ref"]
+        has_ref = is_reference(basedt)
+    elif dtype.metadata and "vlen" in dtype.metadata:
+        basedt = dtype.metadata["vlen"]
+        has_ref = has_reference(basedt)
+    return has_ref
+
+
+def convert_dtype(srcdt, to_h5py=True):
+    """Return a dtype based on input dtype, converting any Reference types from
+    h5py style to h5json and vice-versa.
+    """
+
+    if len(srcdt) > 0:
+        fields = []
+        for name in srcdt.fields:
+            item = srcdt.fields[name]
+            # item is a tuple of dtype and integer offset
+            field_dt = convert_dtype(item[0], to_h5py=to_h5py)
+            fields.append((name, field_dt))
+        tgt_dt = np.dtype(fields)
+    else:
+        # check if this a "special dtype"
+        if srcdt.metadata and "ref" in srcdt.metadata:
+            ref = srcdt.metadata["ref"]
+            if is_reference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.Reference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.Reference)
+            elif is_regionreference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.RegionReference)
+            else:
+                msg = f"Unexpected ref type: {srcdt}"
+                raise TypeError(msg)
+        elif srcdt.metadata and "vlen" in srcdt.metadata:
+            src_vlen = srcdt.metadata["vlen"]
+            if isinstance(src_vlen, np.dtype):
+                tgt_base = convert_dtype(src_vlen, to_h5py=to_h5py)
+            else:
+                tgt_base = src_vlen
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=tgt_base)
+            else:
+                tgt_dt = hdf5dtype.special_dtype(vlen=tgt_base)
+        elif srcdt.kind == "U":
+            # use vlen for unicode strings
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=str)
+            else:
+                tgt_dt = hdf5dtype.special_dtype(vlen=str)
+        else:
+            tgt_dt = srcdt
+    return tgt_dt

From e7452ca7ce9c9bd30e9710825504a5166793bb11 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 18:03:14 +0100
Subject: [PATCH 074/129] use uuid as representation of Reference type

---
 src/h5json/hdf5dtype.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index 8799836a..c0ed2884 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -46,8 +46,8 @@ def __init__(self, bind):
             self._id = getHashTagForId(bind)
 
     def __repr__(self):
-        # TBD: this is not consistent with hsds or h5py...
-        return f"<HDF5 object reference: {self._id}>"
+        # return canonical uuid
+        return f"{self._id}"
 
     def tolist(self):
         if type(self._id) is not str:

From 5b6f33db42665b24f88a7512b3f6db43a4035b2b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Sep 2025 11:17:59 +0100
Subject: [PATCH 075/129] fix len ref in hsds_reader

---
 src/h5json/hsdsstore/hsds_reader.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
index 819126a6..e7dfa26d 100644
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -10,10 +10,8 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import logging
-import time
-import numpy as np
 
-from ..objid import getCollectionForId, getUuidFromId, createObjId
+from ..objid import getCollectionForId, getUuidFromId
 
 from ..hdf5dtype import createDataType
 from ..array_util import jsonToArray, bytesToArray
@@ -277,7 +275,7 @@ def getDatasetValues(self, dset_id, sel=None, dtype=None):
             params["fields"] = ":".join(mtype.names)
 
         MAX_SELECT_QUERY_LEN = 100
-        if len(query_param) > MAX_SELECT_QUERY_LEN:
+        if query_param and len(query_param) > MAX_SELECT_QUERY_LEN:
             # use a post method to avoid possible long query strings
             try:
                 rsp = self.http_conn.POST(req, body=params, format="binary")

From 8e6d14a28aa1fd2ef31193d2321b3612a61c18d6 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Sep 2025 13:33:50 +0100
Subject: [PATCH 076/129] fix for reading unpersisted dataset values

---
 src/h5json/hdf5db.py          | 20 ++++++++++++++++++-
 src/h5json/selections.py      | 36 +++++++++++++++++++++++++++++------
 test/unit/hsds_writer_test.py | 23 +++++++++++++---------
 3 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 91884f57..b0c069d0 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -569,7 +569,25 @@ def getDatasetValues(self, dset_id, sel):
             rank = len(dims)
 
         dtype = self.getDtype(dset_json)
-        arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
+
+        # determine if we need to make a read request or not
+        if dset_id in self._new_objects:
+            fetch = False
+        else:
+            fetch = True
+            # check against pending updates
+            if "updates" in dset_json:
+                updates = dset_json["updates"]
+                for (update_sel, update_val) in updates:
+                    if selections.contained(sel, update_sel):
+                        fetch = False
+                        break
+
+        # send a reader request unless an update already covers the sel area
+        if fetch:
+            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
+        else:
+            arr = np.zeros(sel.shape, dtype=dtype)
 
         if "updates" in dset_json:
             # apply any non-flushed changes that intersect the current selection
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 1a051383..ec4ac649 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -116,8 +116,8 @@ def select(obj, args):
     return sel
 
 
-def intersect(s1, s2):
-    """ Return the intersection of two selections """
+def _check_bool_args(s1, s2):
+    """ verify argument for boolean operations """
     # TBD: this is currently only working for simple selections with stride 1
     valid_select_types = (H5S_SELECT_HYPERSLABS, H5S_SELECT_ALL)
     if not isinstance(s1, Selection):
@@ -131,15 +131,18 @@ def intersect(s1, s2):
     if s1.shape != s2.shape:
         raise ValueError("selections have incompatible shapes")
 
+
+def intersect(s1, s2):
+    """ Return the intersection of two selections """
+    # TBD: this is currently only working for simple selections with stride 1
+    _check_bool_args(s1, s2)
+
     slices = []
     rank = len(s1.shape)
     for dim in range(rank):
         start = max(s1.start[dim], s2.start[dim])
         stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim])
-        msg = "stepped slices not currently supported"
-        if s1.step[dim] > 1:
-            raise ValueError(msg)
-        if s2.step[dim] > 1:
+        if s1.step[dim] > 1 or s2.step[dim] > 1:
             raise ValueError("stepped slices not currently supported")
         if start > stop:
             stop = start
@@ -149,6 +152,27 @@ def intersect(s1, s2):
     return select(s1.shape, slices)
 
 
+def contained(s1, s2):
+    """ return True if s1 is contained in s2, otherwise False """
+    _check_bool_args(s1, s2)
+
+    is_contained = True
+    rank = len(s1.shape)
+    for dim in range(rank):
+        if s1.step[dim] > 1 or s2.step[dim] > 1:
+            # TBD: do the right thing for stepped selections
+            # for now just return False
+            is_contained = False
+            break
+        if s1.start[dim] < s2.start[dim]:
+            is_contained = False
+            break
+        if s1.start[dim] + s1.count[dim] > s2.start[dim] + s2.count[dim]:
+            is_contained = False
+            break
+    return is_contained
+
+
 class Selection(object):
 
     """
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index ca3c2579..af12fa32 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -223,7 +223,7 @@ def testSimple(self):
         db.close()
 
     def testReaderWriter(self):
-        # try reading and writer to an HSDS domain
+        # try reading and writing to an HSDS domain
         # create a random string so we don't try to open an existing file
         filename = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
         domain_path = "/home/test_user1/test/" + filename + ".h5"
@@ -234,14 +234,7 @@ def testReaderWriter(self):
         self.assertTrue(root_id)
         db.reader = HSDSReader(domain_path, app_logger=self.log)
         db.close()
-        """
-        db.writer = HSDSWriter(domain, **kwargs)
-        root_id = db.open()
-        db.close()
-        # now set the reader
-        db.reader = HSDSReader(domain, **kwargs)
-        db.open()
-        """
+
         root_id2 = db.open()
         self.assertEqual(root_id, root_id2)
         root_json = db.getObjectById(root_id)
@@ -250,6 +243,18 @@ def testReaderWriter(self):
         self.assertTrue(root_json["created"] > 0)
         self.assertTrue(db.writer.lastModified is None)  # no flush yet
 
+        # create a scalar dataset
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        sel_all = selections.select((), ...)
+        db.setDatasetValues(dset_id, sel_all, arr)
+
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr[()], 42)
+
+        db.close()
+
     def testH5PyToHS(self):
         # test reading from HDF5 file and writing to HSDS
 

From 556176749c41b519fa8965486ccc931c1034109c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Sep 2025 16:35:15 +0100
Subject: [PATCH 077/129] fix for created and lastModified keys

---
 src/h5json/dset_util.py       |  1 -
 src/h5json/hdf5db.py          | 11 ++++++-----
 test/unit/hsds_writer_test.py |  8 ++++++++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index e1a44a59..37d67f1e 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -40,7 +40,6 @@ def resize_dataset(dset_json, shape):
             raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
 
     shape_json["dims"] = list(shape)
-    dset_json["modified"] = time.time()
 
 
 def getDims(dset_json):
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index b0c069d0..1dbaf43b 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -129,9 +129,7 @@ def deleted_objects(self):
 
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
-        if self.is_new(obj_id):
-            # object hasn't been initially written yet, just return
-            return
+
         if obj_id not in self.db:
             self.log.error("make dirty called on deleted object")
             raise KeyError(f"obj_id: {obj_id} not found")
@@ -140,7 +138,9 @@ def make_dirty(self, obj_id):
             return
         obj_json = self.db[obj_id]
         obj_json["lastModified"] = time.time()
-        self._dirty_objects.add(obj_id)
+        if not self.is_new(obj_id):
+            # object hasn't been initially written yet, add to dirt_object set
+            self._dirty_objects.add(obj_id)
 
     def flush(self):
         """ write out any changes """
@@ -646,7 +646,7 @@ def resizeDataset(self, dset_id, shape):
 
         dset_json = self.getObjectById(dset_id)  # will throw exception if not found
         if resize_dataset(dset_json, shape):
-            self._dirty_objects.add(dset_id)
+            self._make_dirty(dset_id)
 
     def deleteObject(self, obj_id):
         """ Delete the given object """
@@ -817,6 +817,7 @@ def createDataset(
             dset_json["creationProperties"] = cpl
         else:
             dset_json["creationProperties"] = {}
+        dset_json["created"] = time.time()
 
         dset_id = createObjId("datasets", root_id=self.root_id)
         self.db[dset_id] = dset_json
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index af12fa32..d731836a 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -245,10 +245,18 @@ def testReaderWriter(self):
 
         # create a scalar dataset
         dset_id = db.createDataset(shape=(), dtype=np.int32)
+        dset_json = db.getObjectById(dset_id)
+        self.assertTrue("created" in dset_json)
+        dset_create_time = dset_json["created"]
+        self.assertTrue(dset_create_time > 0)
+
         arr = np.zeros((), dtype=np.int32)
         arr[()] = 42
         sel_all = selections.select((), ...)
         db.setDatasetValues(dset_id, sel_all, arr)
+        dset_json = db.getObjectById(dset_id)
+        self.assertTrue("lastModified" in dset_json)
+        self.assertTrue(dset_json["lastModified"] > dset_create_time)
 
         arr = db.getDatasetValues(dset_id, sel_all)
         self.assertEqual(arr[()], 42)

From 924ee00100e8e19f00160f543adee5004827e2d5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Sep 2025 18:23:24 +0100
Subject: [PATCH 078/129] fix for scalar datasets

---
 src/h5json/hdf5db.py          | 11 ++++++++++-
 test/unit/hsds_writer_test.py | 29 ++++++++++++++++++++++++-----
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 1dbaf43b..933c1ce4 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -603,7 +603,11 @@ def getDatasetValues(self, dset_id, sel):
                     stop = start + sel_inter.count[dim]
                     slices.append(slice(start, stop, 1))
                 slices = tuple(slices)
-                arr[slices] = update_val
+                # TBD: needs updating to work in the general case!
+                if slices == ():
+                    arr[slices] = update_val[slices]
+                else:
+                    arr[slices] = update_val
 
         return arr
 
@@ -620,6 +624,11 @@ def setDatasetValues(self, dset_id, sel, arr):
             raise ValueError("Only hyperslab selections are currently supported")
         if not isinstance(arr, np.ndarray):
             raise TypeError("Expected ndarray for data value")
+        tgt_dt = self.getDtype(dset_json)
+        src_dt = arr.dtype
+        if src_dt != tgt_dt:
+            raise TypeError("arr.dtype doesn't match dataset dtype")
+
         if shape_json["class"] == "H5S_NULL":
             raise ValueError("writing to null space dataset not supported")
         if shape_json["class"] == "H5S_SCALAR":
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
index d731836a..ecdedf02 100644
--- a/test/unit/hsds_writer_test.py
+++ b/test/unit/hsds_writer_test.py
@@ -244,23 +244,42 @@ def testReaderWriter(self):
         self.assertTrue(db.writer.lastModified is None)  # no flush yet
 
         # create a scalar dataset
-        dset_id = db.createDataset(shape=(), dtype=np.int32)
-        dset_json = db.getObjectById(dset_id)
+        dsetA_id = db.createDataset(shape=(), dtype=np.int32)
+        dset_json = db.getObjectById(dsetA_id)
         self.assertTrue("created" in dset_json)
         dset_create_time = dset_json["created"]
         self.assertTrue(dset_create_time > 0)
 
+        db.createHardLink(root_id, "dset_a", dsetA_id)
+
         arr = np.zeros((), dtype=np.int32)
         arr[()] = 42
         sel_all = selections.select((), ...)
-        db.setDatasetValues(dset_id, sel_all, arr)
-        dset_json = db.getObjectById(dset_id)
+        db.setDatasetValues(dsetA_id, sel_all, arr)
+
+        dset_json = db.getObjectById(dsetA_id)
         self.assertTrue("lastModified" in dset_json)
         self.assertTrue(dset_json["lastModified"] > dset_create_time)
 
-        arr = db.getDatasetValues(dset_id, sel_all)
+        arr = db.getDatasetValues(dsetA_id, sel_all)
         self.assertEqual(arr[()], 42)
 
+        # create a scalar dataset with string
+        dt_str = special_dtype(vlen=str)
+        dsetB_id = db.createDataset(shape=(), dtype=dt_str)
+        dset_json = db.getObjectById(dsetB_id)
+        db.createHardLink(root_id, "dset_b", dsetB_id)
+
+        arr = np.zeros((), dtype=dt_str)
+        arr[()] = "hello world"
+        db.setDatasetValues(dsetB_id, sel_all, arr)
+
+        arr = db.getDatasetValues(dsetB_id, sel_all)
+
+        e = arr[()]
+        self.assertEqual(e, "hello world")
+        self.assertTrue(isinstance(e, str))
+
         db.close()
 
     def testH5PyToHS(self):

From 1f90429438b59ac92a4a97c99d10bbd6fce295e5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 12 Sep 2025 17:05:24 +0100
Subject: [PATCH 079/129] move hsds plugins to h5pyd

---
 pyproject.toml                      |   1 -
 src/h5json/hdf5db.py                |   2 +-
 src/h5json/hsdsstore/hsds_reader.py | 322 -----------
 src/h5json/hsdsstore/hsds_writer.py | 631 ----------------------
 src/h5json/hsdsstore/httpconn.py    | 804 ----------------------------
 src/h5json/openid.py                | 437 ---------------
 testall.py                          |   2 -
 7 files changed, 1 insertion(+), 2198 deletions(-)
 delete mode 100644 src/h5json/hsdsstore/hsds_reader.py
 delete mode 100644 src/h5json/hsdsstore/hsds_writer.py
 delete mode 100644 src/h5json/hsdsstore/httpconn.py
 delete mode 100644 src/h5json/openid.py

diff --git a/pyproject.toml b/pyproject.toml
index d911700a..11302438 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,6 @@ packages = [
     "h5json",
     "h5json.jsonstore",
     "h5json.h5pystore",
-    "h5json.hsdsstore",
     "h5json.schema",
     "h5json.apps",
 ]
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 933c1ce4..02753ec5 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -277,11 +277,11 @@ def _checkWriter(self):
 
     def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
-        self.log.debug(f"getObjectById {obj_id}")
         self._checkReader()
         tag = getHashTagForId(obj_id)
         if tag not in self.db or refresh:
             # load the obj from the reader
+            self.log.debug(f"getObjectById - fetching {obj_id} from reader")
             obj_json = self.reader.getObjectById(obj_id)
             self.db[tag] = obj_json
         obj_json = self.db[tag]
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
deleted file mode 100644
index e7dfa26d..00000000
--- a/src/h5json/hsdsstore/hsds_reader.py
+++ /dev/null
@@ -1,322 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import logging
-
-from ..objid import getCollectionForId, getUuidFromId
-
-from ..hdf5dtype import createDataType
-from ..array_util import jsonToArray, bytesToArray
-from .. import selections
-from ..h5reader import H5Reader
-from .httpconn import HttpConn
-
-
-class HSDSReader(H5Reader):
-    """
-    This class can be used by HDF5DB to read content from an hdf5-json file
-    """
-
-    def __init__(
-        self,
-        domain_path,
-        app_logger=None,
-        endpoint=None,
-        username=None,
-        password=None,
-        bucket=None,
-        api_key=None,
-        use_session=True,
-        expire_time=0,
-        max_objects=0,
-        max_age=0,
-        retries=3,
-        timeout=30.0,
-    ):
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-
-        self.log.debug("HSDSReader init(")
-
-        kwargs = {}
-        self.log.debug(f"    domain_path: {domain_path}")
-        if endpoint:
-            self.log.debug(f"    endpoint: {endpoint}")
-            kwargs["endpoint"] = endpoint
-        if username:
-            self.log.debug(f"    username: {username}")
-            kwargs["username"] = username
-        if password:
-            self.log.debug(f"    password: {'*' * len(password)}")
-            kwargs["password"] = password
-        if bucket:
-            self.log.debug(f"    bucket: {bucket}")
-            kwargs["bucket"] = bucket
-        if api_key:
-            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
-            kwargs["api_key"] = api_key
-        if use_session:
-            self.log.debug(f"    use_session: {use_session}")
-            kwargs["user_session"] = use_session
-
-        if expire_time:
-            self.log.debug(f"    expire_time: {expire_time}")
-            kwargs["expire_time"] = expire_time
-        if max_objects:
-            self.log.debug(f"    max_objects: {max_objects}")
-            kwargs["max_objects"] = max_objects
-        if max_age:
-            self.log.debug(f"    max_age: {max_age}")
-            kwargs["max_age"] = max_age
-        if retries:
-            self.log.debug(f"    retries: {retries}")
-            kwargs["retries"] = retries
-        if timeout:
-            self.log.debug(f"    timeout: {timeout}")
-            kwargs["timeout"] = timeout
-        # save these for when we create the connection
-        self._http_kwargs = kwargs
-        self._http_conn = None
-        self._stats = {"created": 0, "lastModified": 0, "owner": ""}
-
-        super().__init__(domain_path, app_logger=app_logger)
-
-    def open(self):
-        if self._http_conn and not self._http_conn.isClosed():
-            return self._root_id  # open already called
-
-        if self._http_conn:
-            http_conn = self._http_conn
-        else:
-            kwargs = self._http_kwargs
-            http_conn = HttpConn(self.filepath, **kwargs)
-
-        http_conn.open()
-
-        hsds_info = http_conn.serverInfo()
-        self.log.debug(f"got hsds info: {hsds_info}")
-
-        # try to do a GET from the domain
-        req = "/"
-        params = {}
-        """
-        if max_objects is None or max_objects > 0:
-            # get object meta objects
-            # TBD: have hsds support a max limit of objects to return
-            params["getobjs"] = 1
-        params["include_attrs"] = 1
-        params["include_links"] = 1
-        """
-
-        rsp = http_conn.GET(req, params=params)
-
-        if rsp.status_code != 200:
-            # file must exist
-            http_conn.close()
-            raise IOError(rsp.status_code, rsp.reason)
-
-        domain_json = rsp.json()
-        self.log.debug(f"got domain_json: {domain_json}")
-
-        # update stats
-        for key in ("created", "lastModified", "owner", "limits", "version", "compressors"):
-            if key in domain_json:
-                self._stats[key] = domain_json[key]
-
-        if "root" not in domain_json:
-            http_conn.close()
-            raise IOError(404, "Location is a folder, not a file")
-
-        root_id = domain_json["root"]
-        self._root_id = root_id
-
-        """
-        if "domain_objs" in root_json:
-            domain_objs = root_json["domain_objs"]
-            objdb.load(domain_objs)
-        """
-
-        self._http_conn = http_conn
-
-        return self._root_id
-
-    @property
-    def http_conn(self):
-        return self._http_conn
-
-    def close(self):
-        if self._http_conn:
-            self._http_conn.close()
-
-    def isClosed(self):
-        if not self._http_conn:
-            return True
-        else:
-            return self._http_conn.isClosed()
-
-    def get_root_id(self):
-        """ Return root id """
-        return self._root_id
-
-    def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_values=False):
-        """ return object with given id """
-
-        collection = getCollectionForId(obj_id)
-
-        req = f"/{collection}/{obj_id}"
-        self.log.debug("sending req: {req}")
-
-        params = {}
-        if include_attrs:
-            params["include_attrs"] = 1
-        if include_links:
-            params["include_links"] = 1
-
-        rsp = self.http_conn.GET(req, params=params)
-
-        if rsp.status_code != 200:
-            raise IOError(rsp.status_code, rsp.reason)
-
-        obj_json = rsp.json()
-        # remove any unneeded keys
-        redundant_keys = ("hrefs", "root", "domain", "bucket", "linkCount", "attributeCount")
-        for key in redundant_keys:
-            if key in obj_json:
-                del obj_json[key]
-
-        self.log.debug(f"got json for id: {obj_id}: {obj_json}")
-        return obj_json
-
-    def getAttribute(self, obj_id, name, includeData=True):
-        """
-        Get attribute given an object id and name
-        returns: JSON object
-        """
-        self.log.debug(f"getAttribute({obj_id}), [{name}], include_data={includeData})")
-        collection = getCollectionForId(obj_id)
-        req = f"/{collection}/{obj_id}/attributes/{name}"
-
-        params = {}
-        params["IncludeData"] = 1 if includeData else 0
-
-        rsp = self.http_conn.GET(req, params=params)
-
-        if rsp.status_code in (404, 410):
-            self.log.warning(f"attribute {name} not found")
-            return None
-
-        if rsp.status_code != 200:
-            self.log.error(f"GET {req} failed with status_code: {rsp.status_code}")
-            raise IOError(rsp.status_code, rsp.reason)
-        attr_json = rsp.json()
-
-        if "hrefs" in attr_json:
-            del attr_json["hrefs"]
-
-        return attr_json
-
-    def getDtype(self, obj_json):
-        """ Return the dtype for the type given by obj_json """
-        if "type" not in obj_json:
-            raise KeyError("no type item found")
-        type_item = obj_json["type"]
-        if isinstance(type_item, str) and type_item.startswith("datatypes/"):
-            # this is a reference to a committed type
-            ctype_id = "t-" + getUuidFromId(type_item)
-            ctype_json = self.getObjectById(ctype_id)
-            if "type" not in ctype_json:
-                raise KeyError(f"Unexpected datatype: {ctype_json}")
-            # Use the ctype's item json
-            type_item = ctype_json["type"]
-        dtype = createDataType(type_item)
-        return dtype
-
-    def getDatasetValues(self, dset_id, sel=None, dtype=None):
-        """
-        Get values from dataset identified by obj_id.
-        If a slices list or tuple is provided, it should have the same
-        number of elements as the rank of the dataset.
-        """
-
-        self.log.debug(f"getDatasetValues({dset_id}), sel={sel}")
-        collection = getCollectionForId(dset_id)
-        if collection != "datasets":
-            msg = f"unexpected id: {dset_id} for getDatasetValues"
-            self.log.warning(msg)
-            return ValueError(msg)
-
-        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
-            query_param = None  # just return the entire array
-        elif isinstance(sel, (selections.SimpleSelection, selections.FancySelection)):
-            query_param = sel.getQueryParam()
-        else:
-            raise NotImplementedError(f"selection type: {type(sel)} not supported")
-
-        mtype = dtype  # TBD - support read time dtype
-        mshape = sel.mshape
-
-        req = f"/{collection}/{dset_id}/value"
-        params = {}
-
-        if query_param:
-            params["select"] = query_param
-
-        if mtype.names != dtype.names:
-            params["fields"] = ":".join(mtype.names)
-
-        MAX_SELECT_QUERY_LEN = 100
-        if query_param and len(query_param) > MAX_SELECT_QUERY_LEN:
-            # use a post method to avoid possible long query strings
-            try:
-                rsp = self.http_conn.POST(req, body=params, format="binary")
-            except IOError as ioe:
-                self.log.info(f"got IOError: {ioe.errno}")
-                raise IOError(f"Error retrieving data: {ioe.errno}")
-        else:
-            # make a http GET
-            try:
-                rsp = self.http_conn.GET(req, params=params, format="binary")
-            except IOError as ioe:
-                self.log.info(f"got IOError: {ioe.errno}")
-                raise IOError(ioe.errno, "Error retrieving data")
-
-        if rsp.status_code != 200:
-            self.log.info(f"got http error: {rsp.status_code}")
-            raise IOError(rsp.status_code, "Error retrieving data")
-
-        if rsp.is_binary:
-            # got binary response
-            self.log.info(f"binary response, {len(rsp.text)} bytes")
-            arr = bytesToArray(rsp.text, mtype, mshape)
-        else:
-            # got JSON response
-            # need some special conversion for compound types --
-            # each element must be a tuple, but the JSON decoder
-            # gives us a list instead.
-            self.log.info("json response")
-
-            data = rsp.json()["value"]
-            # self.log.debug(data)
-
-            arr = jsonToArray(mshape, mtype, data)
-            self.log.debug(f"jsonToArray returned: {arr}")
-
-        return arr
-
-    def getStats(self):
-        """ return a dictionary object with at minimum the following keys:
-            'created': creation time
-            'lastModified': modificationTime
-            'owner': owner name
-        """
-        return self._stats
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
deleted file mode 100644
index 9166937a..00000000
--- a/src/h5json/hsdsstore/hsds_writer.py
+++ /dev/null
@@ -1,631 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import logging
-import time
-
-from ..objid import getCollectionForId
-
-from ..hdf5dtype import isVlen
-from ..array_util import arrayToBytes, bytesArrayToList
-from ..dset_util import getNumElements, getDims
-from .. import selections
-from ..h5writer import H5Writer
-from .httpconn import HttpConn
-
-
-class HSDSWriter(H5Writer):
-    """
-    This class can be used by HDF5DB to read content from an hdf5-json file
-    """
-
-    def __init__(
-        self,
-        domain_path,
-        append=False,
-        no_data=False,
-        app_logger=None,
-        endpoint=None,
-        username=None,
-        password=None,
-        bucket=None,
-        api_key=None,
-        use_session=True,
-        expire_time=0,
-        max_objects=0,
-        max_age=0,
-        retries=3,
-        timeout=30.0,
-        track_order=False,
-        owner=None,
-        linked_domain=None
-
-    ):
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-
-        if append:
-            self._init = False
-        else:
-            self._init = True
-
-        if no_data:
-            self._no_data = True
-        else:
-            self._no_data = False
-
-        self.log.debug("HSDSWriter init")
-
-        kwargs = {}
-        self.log.debug(f"    domain_path: {domain_path}")
-        self.log.debug(f"    append: {append}")
-        if endpoint:
-            self.log.debug(f"    endpoint: {endpoint}")
-            kwargs["endpoint"] = endpoint
-        if username:
-            self.log.debug(f"    username: {username}")
-            kwargs["username"] = username
-        if password:
-            self.log.debug(f"    password: {'*' * len(password)}")
-            kwargs["password"] = password
-        if bucket:
-            self.log.debug(f"    bucket: {bucket}")
-            kwargs["bucket"] = bucket
-        if api_key:
-            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
-            kwargs["api_key"] = api_key
-        if use_session:
-            self.log.debug(f"    use_session: {use_session}")
-            kwargs["user_session"] = use_session
-        if expire_time:
-            self.log.debug(f"    expire_time: {expire_time}")
-            kwargs["expire_time"] = expire_time
-        if max_objects:
-            self.log.debug(f"    max_objects: {max_objects}")
-            kwargs["max_objects"] = max_objects
-        if max_age:
-            self.log.debug(f"    max_age: {max_age}")
-            kwargs["max_age"] = max_age
-        if retries:
-            self.log.debug(f"    retries: {retries}")
-            kwargs["retries"] = retries
-        if timeout:
-            self.log.debug(f"    timeout: {timeout}")
-            kwargs["timeout"] = timeout
-        self._http_kwargs = kwargs  # save for when we create the connection
-
-        super().__init__(domain_path, app_logger=app_logger)
-
-        self._http_conn = None
-        self._root_id = None
-        self._append = append
-        self._track_order = track_order
-        self._owner = owner
-        self._linked_domain = linked_domain
-        self._last_flush_time = 0
-        self._stats = {"created": 0, "lastModified": 0, "owner": ""}
-
-    def open(self):
-        """ setup domain for writing """
-        if not self._db_ref:
-            # no db set yet
-            raise IOError("DB not set")
-
-        if self._http_conn and not self._http_conn.isClosed():
-            return self._root_id
-
-        if not self._http_conn:
-            kwargs = self._http_kwargs
-            kwargs["retries"] = 1  # tbd: test setting
-            http_conn = HttpConn(self.filepath, **kwargs)
-            if self._append:
-                http_conn._mode = "a"
-                self.log.debug("hsdswriter - set http_conn mode to a")
-            self._http_conn = http_conn
-
-        http_conn = self._http_conn
-        self.log.debug("hsdswriter - open http conn")
-        http_conn.open()
-
-        hsds_info = self._http_conn.serverInfo()
-        self.log.debug(f"got hsds info: {hsds_info}")
-
-        # fetch the domain json
-
-        # try to do a GET from the domain
-        req = "/"
-        params = {}
-        """
-        if max_objects is None or max_objects > 0:
-            # get object meta objects
-            # TBD: have hsds support a max limit of objects to return
-            params["getobjs"] = 1
-            params["include_attrs"] = 1
-            params["include_links"] = 1
-        """
-
-        domain_json = None
-        rsp = http_conn.GET(req, params=params)
-        self.log.debug(f"hsdswriter initial get status_code: {rsp.status_code}")
-
-        if rsp.status_code not in (200, 404, 410):
-            msg = f"Got status code: {rsp.status_code} on initial domain get"
-            self.log.warning(msg)
-            raise IOError(msg)
-
-        if rsp.status_code == 200:
-            if self._append:
-                # domain exists already
-                domain_json = rsp.json()
-                if "root" not in domain_json:
-                    # this a folder not a domain
-                    self.log.warning(f"folder: {self.filepath} has no root property")
-                    http_conn.close()
-                    raise IOError(404, "Location is a folder, not a file")
-            else:
-                # not append - delete existing domain
-                self.log.info("hsds_writer - delete domain")
-                self.log.info(f"sending delete request for {self.filepath}")
-                delete_rsp = http_conn.DELETE(req, params=params)
-                if delete_rsp.status_code not in (200, 410):
-                    # failed to delete
-                    http_conn.close()
-                    raise IOError(rsp.status_code, rsp.reason)
-
-        if not domain_json:
-            # domain doesn't exist, create it
-            self.log.debug("hsds_writer create domain")
-            body = {}
-            if self.db.root_id:
-                # initialize domain using the db's root_id
-                body["root_id"] = self.db.root_id
-            if self._owner:
-                body["owner"] = self._owner
-            if self._linked_domain:
-                body["linked_domain"] = self._linked_domain
-            if self._track_order:
-                create_props = {"CreateOrder": 1}
-                group_body = {"creationProperties": create_props}
-                body["group"] = group_body
-            rsp = http_conn.PUT(req, params=params, body=body)
-            if rsp.status_code != 201:
-                http_conn.close()
-                raise IOError(rsp.status_code, rsp.reason)
-            domain_json = rsp.json()
-            self.log.info(f"got rsp on PUT domain: {domain_json}")
-            if "root" not in domain_json:
-                http_conn.close()
-                raise IOError(404, "Unexpected error")
-
-        self.log.debug(f"got domain_json: {domain_json}")
-
-        if "root" not in domain_json:
-            http_conn.close()
-            raise IOError(404, "Location is a folder, not a file")
-
-        root_id = domain_json["root"]
-        self.log.debug(f"hsds_writer got root_id: {root_id}")
-
-        self._root_id = root_id
-
-        # update stats
-        for key in ("created", "lastModified", "owner", "limits", "version", "compressors"):
-            if key in domain_json:
-                self._stats[key] = domain_json[key]
-
-        return self._root_id
-
-    @property
-    def http_conn(self):
-        return self._http_conn
-
-    def getDatasetSize(self, dset_id):
-        """ Return the size of the given dataset """
-
-        dset_json = self.db.getObjectById(dset_id)
-        num_elements = getNumElements(dset_json)
-        dtype = self.db.getDtype(dset_json)
-        if isVlen(dtype):
-            item_size = 1024  # random guess at size of variable length types
-        else:
-            item_size = dtype.itemsize
-        return num_elements * item_size
-
-    def createObjects(self, obj_ids):
-        """ create the objects referenced in obj_ids """
-
-        MAX_INIT_SIZE = 4096  # max size to include init values in dataset creation
-
-        def multiPost(items):
-            self.log.debug(f"hsds_writer> POST request {collection} for {len(items)} objects")
-            for item in items:
-                self.log.debug(f"hsds_writer> POST item: {item}")
-            post_rsp = self.http_conn.POST("/" + collection, items)
-            self.log.debug(f"hsds_writer> POST post_rsp.status_code: {post_rsp.status_code}")
-            items.clear()
-
-        self.log.debug(f"hsds_writer> createObjects, {len(obj_ids)} objects")
-        MAX_OBJECTS_PER_REQUEST = 300
-        collections = ("groups", "datasets", "datatypes")
-        col_items = {}
-        dset_value_update_ids = set()
-        for collection in collections:
-            col_items[collection] = []
-
-        for obj_id in obj_ids:
-            if obj_id == self._root_id:
-                continue  # this was created when the domain was
-            collection = getCollectionForId(obj_id)
-            obj_json = self.db.getObjectById(obj_id)
-            item = {"id": obj_id}
-            self.log.debug(f"create id: {obj_id}")
-            for key in obj_json:  # ("links", "attributes"):
-                if key == "updates":
-                    # not part of the obj json
-                    continue
-                if key == "attributes":
-                    # will update attribute later
-                    continue
-                if key == "links":
-                    # links will also be updated later
-                    continue
-                if key == "shape":
-                    # just send the dims, not the shape json
-                    shape_json = obj_json["shape"]
-                    if shape_json["class"] == "H5S_SIMPLE":
-                        dims = shape_json["dims"]
-                        item[key] = dims
-                else:
-                    # just copy the key value directly
-                    item[key] = obj_json[key]
-
-            # initialize dataset values if provided and not too large
-            if collection == "datasets":
-                dset_dims = getDims(obj_json)  # will be None for null space datasets
-                dset_size = self.getDatasetSize(obj_id)  # number of bytes defined by the shape
-                init_arr = None  # data to be passed to post create method
-                updates = obj_json.get("updates")
-                if updates and len(updates) == 1 and dset_size < MAX_INIT_SIZE:
-                    sel, arr = updates[0]
-                    if sel.select_type == selections.H5S_SELECT_ALL:
-                        init_arr = arr
-                        updates.clear()  # reset the update list
-                if self._init and init_arr is None and dset_dims is not None:
-                    # get all values from dataset if small enough
-                    if dset_size < MAX_INIT_SIZE:
-                        sel_all = selections.select(dset_dims, ...)
-                        init_arr = self.db.getDatasetValues(obj_id, sel_all)
-                if init_arr is not None:
-                    value = bytesArrayToList(init_arr)
-                    item["value"] = value
-                elif updates or self._init:
-                    dset_value_update_ids.add(obj_id)  # will set dataset value below
-
-            # add to the list of new items for the given collection
-            items = col_items[collection]
-            items.append(item)
-
-            if len(items) == MAX_OBJECTS_PER_REQUEST:
-                multiPost(items)
-
-        # handle any remainder items
-        for collection in collections:
-            items = col_items[collection]
-            if items:
-                multiPost(items)
-
-        # write any initial dataset values
-        if dset_value_update_ids:
-            self.updateValues(dset_value_update_ids)
-
-    def deleteObjects(self, obj_ids):
-        """ remove the given obj ids from the HSDS store """
-
-        # no multi-delete operation yet, so delete one by one
-        for obj_id in obj_ids:
-            collection = getCollectionForId(obj_id)
-            req = f"/{collection}/{obj_id}"
-            http_rsp = self.http_conn.DELETE(req)
-            if http_rsp.status_code not in (200, 410):
-                self.log.error(f"got {http_rsp.status_code} for DELETE {req}")
-
-    def updateLinks(self, grp_ids):
-        """ update any modified links of the given objects """
-
-        self.log.debug("hsds_writer> updateLinks")
-        items = {}  # dict which will hold a map of grp ids to links to create
-        removals = {}  # map of grp_ids to link titles to be deleted
-        count = 0
-
-        for grp_id in grp_ids:
-            if getCollectionForId(grp_id) != "groups":
-                continue  # ignore datasets and datatypes
-            grp_json = self.db.getObjectById(grp_id)
-            grp_links = grp_json["links"]
-            link_titles = list(grp_links.keys())
-            for link_title in link_titles:
-                link_json = grp_links[link_title]
-                if "created" not in link_json:
-                    self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
-                created = link_json["created"]
-                if "DELETED" in link_json:
-                    if created > self._last_flush_time:
-                        # link hasn't been created yet
-                        msg = f"hsds_writer> {grp_id}: link: {link_title} deleted before flush"
-                        self.log.debug(msg)
-                    else:
-                        # link has been persisted, remove
-                        if grp_id not in removals:
-                            removals[grp_id] = set()
-                        removals[grp_id].add(link_title)
-                elif created > self._last_flush_time:
-                    self.log.debug(f"hsds_writer> {grp_id}: new link: {link_title}")
-                    count += 1
-                    # new link, add to our list
-                    if grp_id not in items:
-                        items[grp_id] = {"links": {}}
-                    links = items[grp_id]["links"]
-                    link_class = link_json["class"]
-                    new_link = {"class": link_class, "created": created}
-                    # convert to hsds representation
-                    if link_class == "H5L_TYPE_HARD":
-                        new_link["id"] = link_json["id"]
-                    elif link_class == "H5L_TYPE_SOFT":
-                        new_link["h5path"] = link_json["h5path"]
-                    elif link_class == "H5L_TYPE_EXTERNAL":
-                        new_link["h5path"] = link_json["h5path"]
-                        new_link["h5domain"] = link_json["file"]  # use h5domain for file key
-                    elif link_class == "H5L_TYPE_USER_DEFINED":
-                        self.log.warning(f"ignoring user-defined link: {link_title}")
-                        continue
-                    else:
-                        raise IOError(f"unexpected link class: {link_class}")
-                    links[link_title] = new_link
-                    self.log.debug(f"setting link {link_title} to {new_link}")
-                else:
-                    self.log.debug(f"link {link_title} has already been persisted")
-
-        if removals:
-            # TBD: hsds doesn't have a multiple object link deletion operation yet
-            # so make one request per object id
-            for grp_id in removals:
-                titles = removals[grp_id]
-                params = {"titles": "/".join(titles)}
-                del_rsp = self.http_conn.DELETE("/groups/" + grp_id + links, params=params)
-                if del_rsp.status_code != 200:
-                    self.log.error("failed to delete links for grp: {grp_id} titles: {titles}")
-                    raise IOError("hsds_writer failed to delete links")
-                else:
-                    self.log.debug(f"hsds_writer> {grp_id} deleted {len(titles)} links")
-                    self._lastModified = time.time()
-                    # remove links from link_json in db
-                    grp_json = self.db.getObjectById(grp_id)
-                    grp_links = grp_json["links"]
-                    for title in titles:
-                        del grp_links[title]
-
-        if items:
-            body = {"grp_ids": items}
-            put_rsp = self.http_conn.PUT("/groups/" + self._root_id + "/links", body=body)
-            if put_rsp.status_code not in (200, 201):
-                self.log.error(f"failed to update links for request: {body}")
-                raise IOError("hsds_writer unable to update links")
-            else:
-                self.log.debug(f"hsds_writer> {grp_id} {count} links updated")
-                self._lastModified = time.time()
-
-    def _deleteAttribute(self, obj_id, attr_name):
-        # delete the given attribute
-
-        col_name = getCollectionForId(obj_id)
-        req = f"/{col_name}/{obj_id}/attributes/{attr_name}"
-        http_rsp = self.http_conn.DELETE(req)
-        if http_rsp.status_code != 200:
-            self.log.error("failed to delete attribute for obj: {obj_id} name: {attr_name}")
-            raise IOError("hsds_writer failed to delete attribute")
-
-    def updateAttributes(self, obj_ids):
-        """ update any modified links of the given objects """
-
-        self.log.debug("hsds_writer> updateAttributes")
-        items = {}  # dict which will hold a map of objects ids to attributes to create
-        removals = {}  # map of obj_ids to attributes to be deleted
-        separator = '|'  # use this character to join attribute names for deletion
-
-        count = 0
-
-        for obj_id in obj_ids:
-            obj_json = self.db.getObjectById(obj_id)
-            obj_attrs = obj_json["attributes"]
-            for attr_name in obj_attrs:
-                attr_json = obj_attrs[attr_name]
-
-                if "created" not in attr_json:
-                    self.log.error(f"hsds_writer> expected created timestamp in attr: {attr_json}")
-                created = attr_json["created"]
-                if "DELETED" in attr_json:
-                    if created > self._last_flush_time:
-                        # attribute hasn't been created yet
-                        msg = f"hsds_writer> {obj_id}: attr: {attr_name} deleted before flush"
-                        self.log.debug(msg)
-                    else:
-                        # attribute has been persisted, remove
-                        if attr_name.find(separator) != -1:
-                            # need to delete individually
-                            self._deleteAttribute(obj_id, attr_name)
-                        else:
-                            # can delete in a batch
-                            if obj_id not in removals:
-                                removals[obj_id] = set()
-                            removals[obj_id].add(attr_name)
-                elif created > self._last_flush_time:
-                    self.log.debug(f"hsds_writer> {obj_id} attribute {attr_name} created")
-                    count += 1
-                    # new attribute, add to our list
-                    if obj_id not in items:
-                        items[obj_id] = {"attributes": {}}
-                    attrs = items[obj_id]["attributes"]
-                    attrs[attr_name] = attr_json
-                else:
-                    self.log.debug(f"hsds_writer> {obj_id}: attr: {attr_name} has already been deleted")
-
-        if removals:
-            # TBD: hsds doesn't have a multiple object attribute deletion operation yet
-            # so make one request per object id
-            # Delete with custom separator
-
-            for obj_id in removals:
-                attr_names = removals[obj_id]
-                params = {"attr_names": separator.join(attr_names)}
-                params["separator"] = separator
-                collection = getCollectionForId(obj_id)
-                req = f"/{collection}/{obj_id}/attributes"
-                rsp = self.http_conn.DELETE(req, params=params)
-                if rsp.status_code != 200:
-                    self.log.error("failed to delete attribute for obj: {obj_id}")
-                    raise IOError("hsds_writer failed to delete attributes")
-
-        if items:
-            body = {"obj_ids": items}
-            req = f"/groups/{self._root_id}/attributes"
-            put_rsp = self.http_conn.PUT(req, body=body)
-            if put_rsp.status_code not in (200, 201):
-                self.log.error(f"hsds_writer> put {req} failed, status: {put_rsp.status_code}")
-            else:
-                self.log.debug(f"hsds_writer> {count} attributes updated")
-                self._lastModified = time.time()
-
-    def updateValue(self, dset_id, sel, arr):
-        """ update the given dataset using selection and array """
-        self.log.debug("hsds_writer> updateValue")
-        params = {}
-        data = arrayToBytes(arr)
-        self.log.debug(f"writing binary data, {len(data)} bytes")
-
-        if sel.select_type != selections.H5S_SELECT_ALL:
-            select_param = sel.getQueryParam()
-            self.log.debug(f"got select query param: {select_param}")
-            params["select"] = select_param
-
-        req = f"/datasets/{dset_id}/value"
-        rsp = self.http_conn.PUT(req, body=data, params=params, format="binary")
-        if rsp.status_code != 200:
-            self.log.error(f"PUT {req} returned error: {rsp.status_code}")
-        else:
-            self.log.debug(f"PUT {len(data)} bytes successful")
-            self._lastModified = time.time()
-
-    def updateValues(self, dset_ids):
-        """ write any pending dataset values """
-
-        self.log.debug("hsds_writer> updateValues")
-        for dset_id in dset_ids:
-            if getCollectionForId(dset_id) != "datasets":
-                continue  # ignore groups and datatypes
-            dset_json = self.db.getObjectById(dset_id)
-            dset_dims = getDims(dset_json)
-            if dset_dims is None:
-                # no data to update
-                continue
-            if self._init:
-                # get all data for the dataset
-                # TBD: do this by chunks
-                sel_all = selections.select(dset_dims, ...)
-                arr = self.db.getDatasetValues(dset_id, sel_all)
-                if arr is not None:
-                    self.updateValue(dset_id, sel_all, arr)
-            else:
-                if "updates" not in dset_json:
-                    continue
-                updates = dset_json["updates"]
-                if updates:
-                    self.log.debug(f"hsds_writer> {dset_id} update count: {len(updates)}")
-                    for (sel, arr) in updates:
-                        self.updateValue(dset_id, sel, arr)
-                    updates.clear()
-
-    def flush(self):
-        """ Write dirty items """
-        if self.closed:
-            # no db set yet
-            self.log.warning("hsds_writer> flush called but no db")
-            return IOError("writer is closed")
-        if not self._http_conn:
-            self.log.warning("hsds_writer no http connection")
-            raise IOError("no http connection")
-        self.log.info("hsds_writer.flush()")
-        self.log.debug(f"    new object count: {len(self.db.new_objects)}")
-        self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
-        self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
-        root_id = self._root_id
-        dirty_ids = self.db.dirty_objects.copy()
-        if self._init:
-            # initialize objects
-            self.log.debug(f"hsds_writer> flush -- init is True self.db: {len(self.db.db)} objects")
-            self.db.readAll()
-            self.log.debug(f"hsds_writer>flush, init after readAll, {len(self.db.db)} objects")
-            obj_ids = set(self.db.db.keys())
-            obj_ids.remove(root_id)  # root group created when domain was
-            self.log.debug(f"init createObjects: {obj_ids}")
-            self.createObjects(obj_ids)
-            dirty_ids.update(obj_ids)
-            dirty_ids.add(root_id)  # add back root for attribute and link creation
-            if not self._no_data:
-                # initialize dataset values
-                pass
-                # self.updateValues(obj_ids)
-            self._init = False
-        elif self.db.new_objects:
-            self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")
-            for obj_id in self.db.new_objects:
-                self.log.debug(f"hsds_writer> new obj id: {obj_id}")
-            self.createObjects(self.db.new_objects)
-            dirty_ids.update(self.db.new_objects)
-        else:
-            self.log.debug("no new objects to persist")
-
-        if dirty_ids:
-            self.log.debug(f"hsds_writer> dirty ids: {dirty_ids}")
-            self.updateLinks(dirty_ids)
-            self.updateAttributes(dirty_ids)
-            if not self._no_data:
-                self.updateValues(dirty_ids)
-
-        if self.db.deleted_objects:
-            self.log.debug(f"deleted ids: {self.db.deleted_objects}")
-            self.deleteObjects(self.db.deleted_objects)
-
-        self._last_flush_time = time.time()
-        self.log.debug("hsds_writer> flush successful")
-        # all objects written successfully
-        return True
-
-    def close(self):
-        # over-ride of H5Writer method
-        self.flush()
-
-    def isClosed(self):
-        """ return closed status """
-        return False if self._http_conn else True
-
-    def get_root_id(self):
-        """ Return root id """
-        return self._root_id
-
-    def getStats(self):
-        """ return a dictionary object with at minimum the following keys:
-            'created': creation time
-            'lastModified': modificationTime
-            'owner': owner name
-        """
-        return self._stats
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
deleted file mode 100644
index dc2ff9b1..00000000
--- a/src/h5json/hsdsstore/httpconn.py
+++ /dev/null
@@ -1,804 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-
-from __future__ import absolute_import
-
-import os
-import sys
-import time
-import base64
-
-import requests
-import requests_unixsocket
-from requests import ConnectionError
-from requests.adapters import HTTPAdapter, Retry
-import json
-import logging
-
-from .. import openid
-from .. import config
-
-
-def eprint(*args, **kwargs):
-    print(*args, file=sys.stderr, **kwargs)
-
-
-DEFAULT_TIMEOUT = (
-    10,
-    1000,
-)  # #20  # 180  # seconds - allow time for hsds service to bounce
-
-"""
-def verifyCert(self):
-    # default to validate CERT for https requests, unless
-    # the H5PYD_VERIFY_CERT environment variable is set and True
-    #
-    # TBD: set default to True once the signing authority of data.hdfgroup.org is
-    # recognized
-    if "H5PYD_VERIFY_CERT" in os.environ:
-        verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
-        if verify_cert.startswith('F'):
-            return False
-    return True
-"""
-
-
-def getAzureApiKey():
-    """construct API key for Active Directory if configured"""
-    # TBD: GoogleID?
-
-    api_key = None
-
-    # if Azure AD ids are set, pass them to HttpConn via api_key dict
-    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
-
-    ad_app_id = None  # Azure AD HSDS Server id
-    if "HS_AD_APP_ID" in os.environ:
-        ad_app_id = os.environ["HS_AD_APP_ID"]
-    elif "hs_ad_app_id" in cfg:
-        ad_app_id = cfg["hs_ad_app_id"]
-    ad_tenant_id = None  # Azure AD tenant id
-    if "HS_AD_TENANT_ID" in os.environ:
-        ad_tenant_id = os.environ["HS_AD_TENANT_ID"]
-    elif "hs_ad_tenant_id" in cfg:
-        ad_tenant_id = cfg["hs_ad_tenant_id"]
-
-    ad_resource_id = None  # Azure AD resource id
-    if "HS_AD_RESOURCE_ID" in os.environ:
-        ad_resource_id = os.environ["HS_AD_RESOURCE_ID"]
-    elif "hs_ad_resource_id" in cfg:
-        ad_resource_id = cfg["hs_ad_resource_id"]
-
-    ad_client_secret = None  # Azure client secret
-    if "HS_AD_CLIENT_SECRET" in os.environ:
-        ad_client_secret = os.environ["HS_AD_CLIENT_SECRET"]
-    elif "hs_ad_client_secret" in cfg:
-        ad_client_secret = cfg["hs_ad_client_secret"]
-
-    if ad_app_id and ad_tenant_id and ad_resource_id:
-        # contruct dict to pass to HttpConn
-        api_key = {
-            "AD_APP_ID": ad_app_id,
-            "AD_TENANT_ID": ad_tenant_id,
-            "AD_RESOURCE_ID": ad_resource_id,
-            "openid_provider": "azure",
-        }
-        # optional config
-        if ad_client_secret:
-            api_key["AD_CLIENT_SECRET"] = ad_client_secret
-    return api_key  # None if AAD not configured
-
-
-def getKeycloakApiKey():
-    # check for keycloak next
-    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
-    api_key = None
-    # check to see if we are configured for keycloak authentication
-    if "HS_KEYCLOAK_URI" in os.environ:
-        keycloak_uri = os.environ["HS_KEYCLOAK_URI"]
-    elif "hs_keycloak_uri" in cfg:
-        keycloak_uri = cfg["hs_keycloak_uri"]
-    else:
-        keycloak_uri = None
-    if "HS_KEYCLOAK_CLIENT_ID" in os.environ:
-        keycloak_client_id = os.environ["HS_KEYCLOAK_CLIENT_ID"]
-    elif "hs_keycloak_client_id" in cfg:
-        keycloak_client_id = cfg["hs_keycloak_client_id"]
-    else:
-        keycloak_client_id = None
-    if "HS_KEYCLOAK_REALM" in os.environ:
-        keycloak_realm = cfg["HS_KEYCLOAK_REALM"]
-    elif "hs_keycloak_realm" in cfg:
-        keycloak_realm = cfg["hs_keycloak_realm"]
-    else:
-        keycloak_realm = None
-
-    if keycloak_uri and keycloak_client_id and keycloak_uri:
-        api_key = {
-            "keycloak_uri": keycloak_uri,
-            "keycloak_client_id": keycloak_client_id,
-            "keycloak_realm": keycloak_realm,
-            "openid_provider": "keycloak",
-        }
-    return api_key
-
-
-class HttpResponse:
-    """ wrapper for http request responses """
-    def __init__(self, rsp, logger=None):
-        self._rsp = rsp
-        self._logger = logger
-        if logger is None:
-            self.log = logging
-        else:
-            self.log = logging.getLogger(logger)
-        self._text = None
-
-    @property
-    def status_code(self):
-        """ return response status code """
-        return self._rsp.status_code
-
-    @property
-    def reason(self):
-        """ return response reason """
-        return self._rsp.reason
-
-    @property
-    def content_type(self):
-        """ return content type """
-        rsp = self._rsp
-        if 'Content-Type' in rsp.headers:
-            content_type = rsp.headers['Content-Type']
-        else:
-            content_type = ""
-        return content_type
-
-    @property
-    def content_length(self):
-        """ Return length of response if available """
-        if 'Content-Length' in self._rsp.headers:
-            content_length = self._rsp.headers['Content-Length']
-        else:
-            content_length = None
-        return content_length
-
-    @property
-    def is_binary(self):
-        """ return True if the response indicates binary data """
-
-        if self.content_type == "application/octet-stream":
-            return True
-        else:
-            return False
-
-    @property
-    def is_json(self):
-        """ return true if response indicates json """
-
-        if self.content_type.startswith("application/json"):
-            return True
-        else:
-            return False
-
-    @property
-    def text(self):
-        """ getresponse content as bytes """
-
-        if not self._text:
-            rsp = self._rsp
-            if not self.is_binary:
-                # hex encoded response?
-                # this is returned by API Gateway for lambda responses
-                self._text = bytes.fromhex(rsp.text)
-            else:
-                if self.content_length:
-                    self.log.debug(f"got binary response, {self.content_length} bytes")
-                else:
-                    self.log.debug("got binary response, content_length unknown")
-
-                HTTP_CHUNK_SIZE = 4096
-                http_chunks = []
-                downloaded_bytes = 0
-                for http_chunk in rsp.iter_content(chunk_size=HTTP_CHUNK_SIZE):
-                    if http_chunk:  # filter out keep alive chunks
-                        self.log.debug(f"got http_chunk - {len(http_chunk)} bytes")
-                        downloaded_bytes += len(http_chunk)
-                        http_chunks.append(http_chunk)
-                if len(http_chunks) == 0:
-                    raise IOError("no data returned")
-                if len(http_chunks) == 1:
-                    # can return first and only chunk as response
-                    self._text = http_chunks[0]
-                else:
-                    msg = f"retrieved {len(http_chunks)} http_chunks "
-                    msg += f" {downloaded_bytes} total bytes"
-                    self.log.info(msg)
-                    self._text = bytearray(downloaded_bytes)
-                    index = 0
-                    for http_chunk in http_chunks:
-                        self._text[index:(index + len(http_chunk))] = http_chunk
-                        index += len(http_chunk)
-
-        return self._text
-
-    def json(self):
-        """ Return json from response"""
-
-        rsp = self._rsp
-
-        if not self.is_json:
-            raise IOError("response is not json")
-
-        rsp_json = json.loads(rsp.text)
-        self.log.debug(f"rsp_json - {len(rsp.text)} bytes")
-        return rsp_json
-
-
-class HttpConn:
-    """
-    Some utility methods based on equivalents in base class.
-    """
-
-    def __init__(
-        self,
-        domain_name,
-        endpoint=None,
-        username=None,
-        password=None,
-        bucket=None,
-        api_key=None,
-        mode="a",
-        expire_time=1.0,
-        max_objects=None,
-        max_age=1.0,
-        logger=None,
-        retries=3,
-        timeout=DEFAULT_TIMEOUT,
-        **kwds,
-    ):
-        self._domain = domain_name
-        self._mode = mode
-        self._domain_json = None
-        self._retries = retries
-        self._timeout = timeout
-        self._api_key = api_key
-        self._s = None  # Sessions
-        self._server_info = None
-        self._external_refs = []
-
-        self._logger = logger
-        if logger is None:
-            self.log = logging
-        else:
-            self.log = logging.getLogger(logger)
-        msg = f"HttpConn.init(domain: {domain_name}"
-        msg += f"expire_time: {expire_time:6.2f} sec retries: {retries}"
-        self.log.debug(msg)
-
-        if self._timeout != DEFAULT_TIMEOUT:
-            self.log.info(f"HttpConn.init - timeout = {self._timeout}")
-        if not endpoint:
-            if "HS_ENDPOINT" in os.environ:
-                endpoint = os.environ["HS_ENDPOINT"]
-
-        if not endpoint:
-            msg = "no endpoint set"
-            raise ValueError(msg)
-
-        self._endpoint = endpoint
-
-        if not username:
-            if "HS_USERNAME" in os.environ:
-                username = os.environ["HS_USERNAME"]
-        if isinstance(username, str) and (not username or username.upper() == "NONE"):
-            username = None
-        self._username = username
-
-        if not password:
-            if "HS_PASSWORD" in os.environ:
-                password = os.environ["HS_PASSWORD"]
-        if isinstance(password, str) and (not password or password.upper() == "NONE"):
-            password = None
-        self._password = password
-
-        if not bucket:
-            if "HS_BUCKET" in os.environ:
-                bucket = os.environ["HS_BUCKET"]
-            if isinstance(bucket, str) and (not bucket or bucket.upper() == "NONE"):
-                bucket = None
-        self._bucket = bucket
-
-        if api_key is None and "HS_API_KEY" in os.environ:
-            api_key = os.environ["HS_API_KEY"]
-        if isinstance(api_key, str) and (not api_key or api_key.upper() == "NONE"):
-            api_key = None
-        if not api_key:
-            api_key = getAzureApiKey()
-        if not api_key:
-            api_key = getKeycloakApiKey()
-
-        # Convert api_key to OpenIDHandler
-        if isinstance(api_key, dict):
-            # Maintain Azure-defualt backwards compatibility, but allow
-            # both environment variable and kwarg override.
-            provider = api_key.get("openid_provider", "azure")
-            if provider == "azure":
-                self.log.debug("creating OpenIDHandler for Azure")
-                self._api_key = openid.AzureOpenID(endpoint, api_key)
-            elif provider == "google":
-                self.log.debug("creating OpenIDHandler for Google")
-
-                config = api_key.get("client_secret", None)
-                scopes = api_key.get("scopes", None)
-                self._api_key = openid.GoogleOpenID(
-                    endpoint, config=config, scopes=scopes
-                )
-            elif provider == "keycloak":
-                self.log.debug("creating OpenIDHandler for Keycloak")
-
-                # for Keycloak, pass in username and password
-                self._api_key = openid.KeycloakOpenID(
-                    endpoint, config=api_key, username=username, password=password
-                )
-            else:
-                self.log.error(f"Unknown openid provider: {provider}")
-
-    def getHeaders(self, username=None, password=None, headers=None):
-
-        if headers is None:
-            headers = {}
-
-        # This should be the default - but explicitly set anyway
-        if "Accept-Encoding" not in headers:
-            headers['Accept-Encoding'] = "deflate, gzip"
-
-        elif "Authorization" in headers:
-            return headers  # already have auth key
-        if username is None:
-            username = self._username
-        if password is None:
-            password = self._password
-
-        if self._api_key:
-            self.log.debug("using api key")
-            # use OpenId handler to get a bearer token
-            token = ""
-
-            # Get a token, possibly refreshing if needed.
-            if isinstance(self._api_key, openid.OpenIDHandler):
-                token = self._api_key.token
-
-            # Token was provided as a string.
-            elif isinstance(self._api_key, str):
-                token = self._api_key
-
-            if token:
-                auth_string = b"Bearer " + token.encode("ascii")
-                headers["Authorization"] = auth_string
-        elif username is not None and password is not None:
-            self.log.debug(f"use basic auth with username: {username}")
-            auth_string = username + ":" + password
-            auth_string = auth_string.encode("utf-8")
-            auth_string = base64.b64encode(auth_string)
-            auth_string = b"Basic " + auth_string
-            headers["Authorization"] = auth_string
-        else:
-            self.log.debug("no auth header")
-            # no auth header
-            pass
-
-        return headers
-
-    def serverInfo(self):
-        if self._server_info:
-            return self._server_info
-
-        if self._endpoint is None:
-            raise IOError("object not initialized")
-
-        # make an about request
-        rsp = self.GET("/about")
-        if rsp.status_code != 200:
-            raise IOError(rsp.status_code, rsp.reason)
-        server_info = rsp.json()
-        if server_info:
-            self._server_info = server_info
-        return server_info
-
-    def server_version(self):
-        server_info = self.serverInfo()
-        if "hsds_version" in server_info:
-            server_version = server_info["hsds_version"]
-        else:
-            # no standard way to get version for other implements...
-            server_version = None
-        return server_version
-
-    def verifyCert(self):
-        # default to validate CERT for https requests, unless
-        # the H5PYD_VERIFY_CERT environment variable is set and True
-        #
-        # TBD: set default to True once the signing authority of data.hdfgroup.org is
-        # recognized
-        if "H5PYD_VERIFY_CERT" in os.environ:
-            verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
-            if verify_cert.startswith("F"):
-                return False
-        return True
-
-    def GET(self, req, format="json", params=None, headers=None):
-        if self._endpoint is None:
-            raise IOError("object not initialized")
-        if not self._s:
-            raise IOError("http session is closed")
-        # check that domain is defined (except for some specific requests)
-        if req not in ("/domains", "/about", "/info", "/") and self._domain is None:
-            raise IOError(f"no domain defined: req: {req}")
-
-        rsp = None
-
-        headers = self.getHeaders(headers=headers)
-
-        if params is None:
-            params = {}
-        if "domain" not in params:
-            params["domain"] = self._domain
-        if "bucket" not in params and self._bucket:
-            params["bucket"] = self._bucket
-        if self._api_key and not isinstance(self._api_key, dict):
-            params["api_key"] = self._api_key
-        domain = params["domain"]
-        self.log.debug(f"GET: {req} [{domain}] bucket: {self._bucket}")
-
-        if format == "binary":
-            headers["accept"] = "application/octet-stream"
-
-        self.log.info(f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}")
-
-        for k in params:
-            if k != "domain":
-                v = params[k]
-                self.log.debug(f"GET params {k}:{v}")
-
-        try:
-            s = self._s
-            stream = True  # tbd  - config for no streaming?
-            ts = time.time()
-            rsp = s.get(
-                self._endpoint + req,
-                params=params,
-                headers=headers,
-                stream=stream,
-                timeout=self._timeout,
-                verify=self.verifyCert(),
-            )
-            elapsed = time.time() - ts
-            self.log.info(f"status: GET {rsp.status_code}, elapsed: {elapsed:.4f}")
-        except ConnectionError as ce:
-            self.log.error(f"connection error: {ce}")
-            raise IOError("Connection Error")
-        except Exception as e:
-            self.log.error(f"got {type(e)} exception: {e}")
-            raise IOError("Unexpected exception")
-
-        if rsp.status_code != 200:
-            self.log.warning(f"GET {req} returned status: {rsp.status_code}")
-
-        return HttpResponse(rsp)
-
-    def PUT(self, req, body=None, format="json", params=None, headers=None):
-        if self._endpoint is None:
-            raise IOError("object not initialized")
-        if self._domain is None:
-            raise IOError("no domain defined")
-        if not self._s:
-            raise IOError("http session is closed")
-
-        if params:
-            self.log.info(f"PUT params: {params}")
-        else:
-            params = {}
-
-        if "domain" not in params:
-            params["domain"] = self._domain
-        if "bucket" not in params and self._bucket:
-            params["bucket"] = self._bucket
-        if self._api_key:
-            params["api_key"] = self._api_key
-
-        # verify the file was open for modification
-        if self._mode == "r":
-            raise IOError("Unable to create group (No write intent on file)")
-
-        # try to do a PUT to the domain
-
-        headers = self.getHeaders(headers=headers)
-
-        if format == "binary":
-            headers["Content-Type"] = "application/octet-stream"
-            # binary write
-            data = body
-        else:
-            headers["Content-Type"] = "application/json"
-            data = json.dumps(body)
-
-        self.log.info(f"PUT: {req} format: {format} [{len(data)} bytes]")
-
-        try:
-            s = self._s
-            ts = time.time()
-            rsp = s.put(
-                self._endpoint + req,
-                data=data,
-                headers=headers,
-                params=params,
-                verify=self.verifyCert(),
-            )
-            elapsed = time.time() - ts
-            self.log.info(f"status: PUT {rsp.status_code}, elapsed: {elapsed:.4f}")
-        except ConnectionError as ce:
-            self.log.error(f"connection error: {ce}")
-            raise IOError("Connection Error")
-
-        if rsp.status_code == 201 and req == "/":
-            self.log.info("clearing domain_json cache")
-            self._domain_json = None
-        if rsp.status_code not in (200, 201):
-            self.log.warning(f"got status code: {rsp.status_code} for PUT {req}")
-        self.log.info(f"PUT returning: {rsp}")
-
-        return HttpResponse(rsp)
-
-    def POST(self, req, body=None, format="json", params=None, headers=None):
-        if self._endpoint is None:
-            raise IOError("object not initialized")
-        if self._domain is None:
-            raise IOError("no domain defined")
-        if not self._s:
-            raise IOError("http session is closed")
-
-        if params is None:
-            params = {}
-        if "domain" not in params:
-            params["domain"] = self._domain
-        if "bucket" not in params and self._bucket:
-            params["bucket"] = self._bucket
-        if self._api_key:
-            params["api_key"] = self._api_key
-
-        # verify we have write intent (unless this is a dataset point selection)
-        if req.startswith("/datasets/") and req.endswith("/value"):
-            point_sel = True
-        else:
-            point_sel = False
-        if self._mode == "r" and not point_sel:
-            raise IOError("Unable perform request (No write intent on file)")
-
-        # try to do a POST to the domain
-
-        headers = self.getHeaders(headers=headers)
-
-        if isinstance(body, bytes):
-            headers["Content-Type"] = "application/octet-stream"
-            data = body
-        else:
-            # assume json
-            try:
-                data = json.dumps(body)
-            except TypeError:
-                msg = f"Unable to convert {body} to json"
-                self.log.error(msg)
-                raise IOError("JSON encoding error")
-        if format == "binary":
-            # receive data as binary
-            headers["accept"] = "application/octet-stream"
-
-        self.log.info("POST: " + req)
-
-        try:
-            s = self._s
-            ts = time.time()
-            rsp = s.post(
-                self._endpoint + req,
-                data=data,
-                headers=headers,
-                params=params,
-                verify=self.verifyCert(),
-            )
-            elapsed = time.time() - ts
-            self.log.info(f"status: POST {rsp.status_code}, elapsed: {elapsed:.4f}")
-        except ConnectionError as ce:
-            self.log.warning(f"connection error: {ce}")
-            raise IOError(str(ce))
-
-        if rsp.status_code not in (200, 201):
-            self.log.error(f"got status_code: {rsp.status_code} for DELETE: {req}")
-
-        return HttpResponse(rsp)
-
-    def DELETE(self, req, params=None, headers=None):
-        if self._endpoint is None:
-            raise IOError("object not initialized")
-        if not self._s:
-            raise IOError("http session is closed")
-
-        if req not in ("/domains", "/") and self._domain is None:
-            raise IOError("no domain defined")
-        if params is None:
-            params = {}
-        if "domain" not in params:
-            params["domain"] = self._domain
-        if "bucket" not in params and self._bucket:
-            params["bucket"] = self._bucket
-        if self._api_key:
-            params["api_key"] = self._api_key
-
-        # verify we have write intent
-        if self._mode == "r":
-            raise IOError("Unable perform request (No write intent on file)")
-
-        # try to do a DELETE of the resource
-        headers = self.getHeaders(headers=headers)
-
-        self.log.info("DEL: " + req)
-        try:
-            ts = time.time()
-            rsp = self._s.delete(
-                self._endpoint + req,
-                headers=headers,
-                params=params,
-                verify=self.verifyCert(),
-            )
-            self.log.info(f"status: {rsp.status_code}")
-            elapsed = time.time() - ts
-            self.log.info(f"status: DELETE {rsp.status_code}, elapsed: {elapsed:.4f}")
-        except ConnectionError as ce:
-            self.log.error(f"connection error: {ce}")
-            raise IOError("Connection Error")
-
-        if rsp.status_code == 200 and req == "/":
-            self.log.info("clearing domain_json cache")
-            self._domain_json = None
-
-        if rsp.status_code != 200:
-            self.log.warning(f"got status_code: {rsp.status_code} for DELETE {req}")
-
-        return HttpResponse(rsp)
-
-    def add_external_ref(self, fid):
-        # this is used by the group class to keep references to external links open
-        if fid.__class__.__name__ != "FileID":
-            raise TypeError("add_external_ref, expected FileID type")
-        self._external_refs.append(fid)
-
-    def open(self):
-        self.log.debug("http_conn.open")
-        if self._s:
-            return  # already open
-
-        retries = self._retries
-        backoff_factor = 1
-        status_forcelist = (500, 502, 503, 504)
-        if self._endpoint.startswith("http+unix://"):
-            self.log.debug(f"create unixsocket session: {self._endpoint}")
-            s = requests_unixsocket.Session()
-        else:
-            # regular request session
-            s = requests.Session()
-
-            retry = Retry(
-                total=retries,
-                read=retries,
-                connect=retries,
-                backoff_factor=backoff_factor,
-                status_forcelist=status_forcelist,
-            )
-            kwargs = {"max_retries": retry, "pool_connections": 16, "pool_maxsize": 16}
-            s.mount("http://", HTTPAdapter(**kwargs))
-            s.mount("https://", HTTPAdapter(**kwargs))
-        self.log.debug("Httpconn set self._s")
-        self._s = s
-
-    def close(self):
-        if self._s:
-            self.log.debug("http_conn.close")
-            self._s.close()
-            self._s = None
-
-    def isClosed(self):
-        if self._s is None:
-            return True
-        else:
-            return False
-
-    @property
-    def domain(self):
-        return self._domain
-
-    @property
-    def username(self):
-        return self._username
-
-    @property
-    def endpoint(self):
-        return self._endpoint
-
-    @property
-    def password(self):
-        return self._password
-
-    @property
-    def mode(self):
-        return self._mode
-
-    @property
-    def domain_json(self):
-        if self._domain_json is None:
-            rsp = self.GET("/")
-            if rsp.status_code != 200:
-                raise IOError(rsp.reason)
-            # assume JSON
-            self._domain_json = rsp.json()
-        return self._domain_json
-
-    @property
-    def root_uuid(self):
-        domain_json = self.domain_json
-        if "root" not in domain_json:
-            raise IOError("Unexpected response")
-        root_uuid = domain_json["root"]
-        return root_uuid
-
-    @property
-    def compressors(self):
-        compressors = []
-        if "compressors" in self.domain_json:
-            compressors = self.domain_json["compressors"]
-        if not compressors:
-            compressors = [
-                "gzip",
-            ]
-        return compressors
-
-    @property
-    def modified(self):
-        """Last modified time of the domain as a datetime object."""
-        domain_json = self.domain_json
-        if "lastModified" not in domain_json:
-            raise IOError("Unexpected response")
-        last_modified = domain_json["lastModified"]
-        return last_modified
-
-    @property
-    def created(self):
-        """Creation time of the domain"""
-        domain_json = self.domain_json
-        if "created" not in domain_json:
-            raise IOError("Unexpected response")
-        created = domain_json["created"]
-        return created
-
-    @property
-    def owner(self):
-        """username of creator of domain"""
-        domain_json = self.domain_json
-        username = None
-        if "owner" in domain_json:
-            # currently this is only available for HSDS
-            username = domain_json["owner"]
-        return username
-
-    @property
-    def logging(self):
-        """return name of logging handler"""
-        return self.log
diff --git a/src/h5json/openid.py b/src/h5json/openid.py
deleted file mode 100644
index af38d94a..00000000
--- a/src/h5json/openid.py
+++ /dev/null
@@ -1,437 +0,0 @@
-import os
-import sys
-import json
-import requests
-import time
-from abc import ABC, abstractmethod
-from datetime import datetime
-
-from . import config as hsconfig
-
-
-def eprint(*args, **kwargs):
-    print(*args, file=sys.stderr, **kwargs)
-
-
-# Azure
-try:
-    import adal
-except ModuleNotFoundError:
-    pass  # change this to the eprint below to see the import error
-    # eprint()"Unable to import azure auth packages")
-
-# Google
-try:
-    from google_auth_oauthlib.flow import InstalledAppFlow as GoogleInstalledAppFlow
-    from google.auth.transport.requests import Request as GoogleRequest
-    from google.oauth2.credentials import Credentials as GoogleCredentials
-    from google.oauth2 import id_token as GoogleIDToken
-except ModuleNotFoundError:
-    pass  # change this to the eprint below to see the import error
-    # eprint("Unable to import google auth packages")
-
-
-class OpenIDHandler(ABC):
-
-    def __init__(self, endpoint, use_token_cache=True, username=None, password=None):
-        """Initialize the token."""
-
-        # Location of the token cache.
-        self._token_cache_file = os.path.expanduser('~/.hstokencfg')
-        self._endpoint = endpoint
-        self._username = username
-        self._password = password
-
-        # The _token attribute should be a dict with at least the following keys:
-        #
-        # accessToken - The OpenID token to send.
-        # refreshToken - The refresh token (optional).
-        # expiresOn - The unix timestamp when the token expires (optional).
-
-        if not use_token_cache or not os.path.isfile(self._token_cache_file):
-            self._token = None
-        else:
-            if username:
-                file_key = username + '@' + endpoint
-            else:
-                file_key = endpoint
-            with open(self._token_cache_file, 'r') as token_file:
-                self._token = json.load(token_file).get(file_key, None)
-
-    @abstractmethod
-    def acquire(self):
-        """Acquire a new token from the provider."""
-        pass
-
-    @abstractmethod
-    def refresh(self):
-        """Refresh an existing token with the provider."""
-        pass
-
-    @property
-    def username(self):
-        """ Return username if known """
-        return self._username
-
-    @property
-    def expired(self):
-        """Return if the token is expired."""
-        t = self._token
-        # add some buffer to account for clock skew
-        return t is not None and 'expiresOn' in t and time.time() + 10.0 >= t['expiresOn']
-
-    @property
-    def token(self):
-        """Return the token if valid, otherwise get a new one."""
-
-        if self.expired:
-            self.refresh()
-            if self._token:
-                self.write_token_cache()
-
-        if self._token is None:
-            self.acquire()
-            self.write_token_cache()
-
-        return self._token['accessToken']
-
-    def write_token_cache(self):
-        """Write the token to a file cache."""
-
-        cache_exists = os.path.isfile(self._token_cache_file)
-
-        if self._username:
-            file_key = self._username + '@' + self._endpoint
-        else:
-            file_key = self._endpoint
-
-        # Create a new cache file.
-        if not cache_exists and self._token is not None:
-            with open(self._token_cache_file, 'w') as token_file:
-                json.dump({file_key: self._token}, token_file)
-
-        # Update an exisiting cache file.
-        elif cache_exists:
-            with open(self._token_cache_file, 'r+') as token_file:
-                cache = json.loads(token_file.read())
-
-                # Store valid tokens.
-                if self._token is not None:
-                    cache[file_key] = self._token
-
-                # Delete invalid tokens.
-                elif file_key in cache:
-                    del cache[file_key]
-
-                token_file.seek(0)
-                token_file.truncate(0)
-                json.dump(cache, token_file)
-
-
-class AzureOpenID(OpenIDHandler):
-
-    AUTHORITY_URI = 'https://login.microsoftonline.com'  # login endpoint for AD auth
-
-    def __init__(self, endpoint, config=None):
-        """Store configuration."""
-
-        # Configuration manager
-        hs_config = hsconfig.get_config()
-
-        # Config is a dictionary.
-        if isinstance(config, dict):
-            self.config = config
-
-        # Maybe client_secrets are in environment variables?
-        else:
-
-            self.config = {
-                'AD_APP_ID': hs_config.get("hs_ad_app_id", None),
-                'AD_TENANT_ID': hs_config.get("hs_ad_tenant_id", None),
-                'AD_RESOURCE_ID': hs_config.get("hs_ad_resource_id", None),
-                'AD_CLIENT_SECRET': hs_config.get("hs_ad_client_secret", None)
-            }
-
-        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
-            use_token_cache = False
-        else:
-            use_token_cache = True
-
-        super().__init__(endpoint, use_token_cache=use_token_cache)
-
-    def write_token_cache(self):
-        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
-            pass  # don't use token cache for unattended authentication
-        else:
-            super().write_token_cache()
-
-    def acquire(self):
-        """Acquire a new Azure token."""
-
-        if "adal" not in sys.modules:
-            msg = "adal module not found, run: pip install -e . '.[azure]'"
-            raise ModuleNotFoundError(msg)
-
-        app_id = self.config["AD_APP_ID"]
-        resource_id = self.config["AD_RESOURCE_ID"]
-        tenant_id = self.config["AD_TENANT_ID"]
-        client_secret = self.config.get("AD_CLIENT_SECRET", None)
-        authority_uri = self.AUTHORITY_URI + '/' + tenant_id
-
-        # Try to get a token using different oauth flows.
-        context = adal.AuthenticationContext(authority_uri, enable_pii=True, api_version=None)
-
-        try:
-            if client_secret is not None:
-                code = context.acquire_token_with_client_credentials(resource_id, app_id, client_secret)
-            else:
-                code = context.acquire_user_code(resource_id, app_id)
-
-        except Exception as e:
-            eprint(f"unable to process AD token: {e}")
-            self._token = None
-            self.write_token_cache()
-            raise
-
-        if "message" in code:
-            eprint(code["message"])
-            mgmt_token = context.acquire_token_with_device_code(resource_id, code, app_id)
-
-        elif "accessToken" in code:
-            mgmt_token = code
-
-        else:
-            eprint("Could not authenticate with AD")
-
-        # Only store some fields.
-        self._token = {
-            'accessToken': mgmt_token['accessToken'],
-            'refreshToken': mgmt_token.get('refreshToken', None),
-            'tenantId': mgmt_token.get('tenantId', tenant_id),
-            'clientId': mgmt_token.get('_clientId', app_id),
-            'resource': mgmt_token.get('resource', resource_id)
-        }
-
-        # Parse time to timestamp.
-        if 'expiresOn' in mgmt_token:
-            expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
-            self._token['expiresOn'] = expire_dt.timestamp()
-
-    def refresh(self):
-        """Try to renew an Azure token."""
-
-        try:
-
-            # This will work for device code flow, but not with client
-            # credentials. If we have the secret, we can just request a new
-            # token anyways.
-
-            authority_uri = self.AUTHORITY_URI + '/' + self._token['tenantId']
-            context = adal.AuthenticationContext(authority_uri, api_version=None)
-            mgmt_token = context.acquire_token_with_refresh_token(self._token['refreshToken'],
-                                                                  self._token['clientId'],
-                                                                  self._token['resource'],
-                                                                  None)
-
-            # New token does not have all the metadata.
-            self._token['accessToken'] = mgmt_token['accessToken']
-            self._token['refreshToken'] = mgmt_token['refreshToken']
-
-            # Parse time to timestamp.
-            if 'expiresOn' in mgmt_token:
-                expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
-                self._token['expiresOn'] = expire_dt.timestamp()
-
-        except Exception:
-            self._token = None
-
-
-class GoogleOpenID(OpenIDHandler):
-
-    def __init__(self, endpoint, config=None, scopes=None):
-        """Store configuration."""
-
-        if "google.oauth2" not in sys.modules:
-            msg = "google.oauth2 module not found, run: pip install -e . '.[google]'"
-            raise ModuleNotFoundError(msg)
-
-        # Configuration manager
-        hs_config = hsconfig.get_config()
-
-        if scopes is None:
-            scopes = hs_config.get('hs_google_scopes', 'openid').split()
-        self.scopes = scopes
-
-        # Config is a client_secrets dictionary.
-        if isinstance(config, dict):
-            self.config = config
-
-        # Config points to a client_secrets.json file.
-        elif isinstance(config, str) and os.path.isfile(config):
-            with open(config, 'r') as f:
-                self.config = json.loads(f.read())
-
-        # Maybe client_secrets are in environment variables?
-        else:
-            self.config = {
-                'installed': {
-                    'project_id': hs_config.get('hs_google_project_id', None),
-                    'client_id': hs_config.get('hs_google_client_id', None),
-                    'client_secret': hs_config.get('hs_google_client_secret', None),
-                    'auth_uri': 'https://accounts.google.com/o/oauth2/auth',
-                    'token_uri': 'https://oauth2.googleapis.com/token',
-                    'auth_provider_x509_cert_url': 'https://www.googleapis.com/oauth2/v1/certs',
-                    'redirect_uris': ['urn:ietf:wg:oauth:2.0:oob', 'http://localhost']
-                }
-            }
-
-        super().__init__(endpoint)
-
-    def _parse(self, creds):
-        """Parse credentials."""
-
-        # NOTE: In Google OpenID, if a client is set up for InstalledAppFlow
-        # then the client_secret is not actually treated as a secret. Acquire
-        # will ALWAYS prompt for user input before granting a token.
-
-        token = {
-            'accessToken': creds.id_token,
-            'refreshToken': creds.refresh_token,
-            'tokenUri': creds.token_uri,
-            'clientId': creds.client_id,
-            'clientSecret': creds.client_secret,
-            'scopes': creds.scopes
-        }
-
-        # The expiry field that is in creds is for the OAuth token, not the
-        # OpenID token. We need to validate the OpenID tokenn to get the exp.
-        idinfo = GoogleIDToken.verify_oauth2_token(creds.id_token, GoogleRequest())
-        if 'exp' in idinfo:
-            token['expiresOn'] = idinfo['exp']
-
-        return token
-
-    def acquire(self):
-        """Acquire a new Google token."""
-
-        flow = GoogleInstalledAppFlow.from_client_config(self.config,
-                                                         scopes=self.scopes)
-        creds = flow.run_console()
-        self._token = self._parse(creds)
-
-    def refresh(self):
-        """Try to renew a token."""
-
-        try:
-
-            token = self._token
-            creds = GoogleCredentials(token=None,
-                                      refresh_token=token['refreshToken'],
-                                      scopes=token['scopes'],
-                                      token_uri=token['tokenUri'],
-                                      client_id=token['clientId'],
-                                      client_secret=token['clientSecret'])
-
-            creds.refresh(GoogleRequest())
-            self._token = self._parse(creds)
-
-        except Exception:
-            self._token = None
-
-
-class KeycloakOpenID(OpenIDHandler):
-
-    def __init__(self, endpoint, config=None, scopes=None, username=None, password=None):
-        """Store configuration."""
-
-        # Configuration manager
-        hs_config = hsconfig.get_config()
-
-        if scopes is None:
-            scopes = hs_config.get('hs_keycloak_scopes', 'openid').split()
-        self.scopes = scopes
-
-        # Config is a client_secrets dictionary.
-        if isinstance(config, dict):
-            self.config = config
-
-        # Config points to a client_secrets.json file.
-        elif isinstance(config, str) and os.path.isfile(config):
-            with open(config, 'r') as f:
-                self.config = json.loads(f.read())
-
-        # Maybe configs are in environment variables?
-        else:
-            self.config = {
-                'keycloak_client_id': hs_config.get('hs_keycloak_client_id', None),
-                'keycloak_client_secret': hs_config.get('hs_keycloak_client_secret', None),
-                'keycloak_realm': hs_config.get('hs_keycloak_realm', None),
-                'keycloak_uri': hs_config.get('hs_keycloak_uri', None)
-            }
-
-        super().__init__(endpoint, username=username, password=password)
-
-    def _getKeycloakUrl(self):
-        if not self.config['keycloak_uri']:
-            raise KeyError("keycloak_uri not set")
-        if not self.config['keycloak_realm']:
-            raise KeyError("Keycloak realm not set")
-        if not self.config['keycloak_client_id']:
-            raise KeyError("keycloak client_id not set")
-
-        url = self.config['keycloak_uri']
-        url += "/realms/"
-        url += self.config['keycloak_realm']
-        url += "/protocol/openid-connect/token"
-
-        return url
-
-    def _parse(self, creds):
-        """Parse credentials."""
-
-        # validate json returned by keycloak
-        if "token_type" not in creds:
-            raise IOError("Unexpected Keycloak JWT, no token_type")
-        if creds["token_type"].lower() != "bearer":
-            raise IOError("Unexpected Keycloak JWT, expected Bearer token")
-
-        token = {}
-        if "access_token" not in creds:
-            raise IOError("Unexpected Keycloak JWT, no access_token")
-        token["accessToken"] = creds["access_token"]
-        if "refesh_token" in creds:
-            token["refreshToken"] = creds["refresh_token"]
-        if "expires_in" in creds:
-            now = time.time()
-            token['expiresOn'] = now + creds["expires_in"]
-
-        # TBD: client_secret
-        # TBD: scopes
-        # TBD: client_id
-
-        return token
-
-    def acquire(self):
-        """Acquire a new Keycloak token."""
-        keycloak_url = self._getKeycloakUrl()
-
-        headers = {"Content-Type": "application/x-www-form-urlencoded"}
-        body = {}
-        body["username"] = self._username
-        body["password"] = self._password
-        body["grant_type"] = "password"
-        body["client_id"] = self.config.get("keycloak_client_id")
-        rsp = requests.post(keycloak_url, data=body, headers=headers)
-
-        if rsp.status_code not in (200, 201):
-            print(f"POST error: {rsp.status_code}")
-            raise IOError(f"Keycloak response: {rsp.status_code}")
-
-        creds = rsp.json()  # TBD: catch json format errors?
-        self._token = self._parse(creds)
-
-    def refresh(self):
-        """Try to renew a token."""
-        # TBD
-        # unclear if refresh is supported without a client secret
-        self._token = None
diff --git a/testall.py b/testall.py
index a33cb327..1cb36136 100755
--- a/testall.py
+++ b/testall.py
@@ -24,8 +24,6 @@
     "h5json_writer_test",
     "h5py_reader_test",
     "h5py_writer_test",
-    "hsds_reader_test",
-    "hsds_writer_test",
 ]
 
 use_hsds = True

From c60e1c9c3e6b2c386564149311168f74e9586c94 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 12 Sep 2025 18:14:40 +0100
Subject: [PATCH 080/129] moved hsds reader/writer tests to h5pyd

---
 test/unit/hsds_reader_test.py | 145 -------------
 test/unit/hsds_writer_test.py | 370 ----------------------------------
 2 files changed, 515 deletions(-)
 delete mode 100644 test/unit/hsds_reader_test.py
 delete mode 100644 test/unit/hsds_writer_test.py

diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
deleted file mode 100644
index ce75d540..00000000
--- a/test/unit/hsds_reader_test.py
+++ /dev/null
@@ -1,145 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import unittest
-import logging
-import random
-import string
-import numpy as np
-from h5json import Hdf5db
-from h5json.hsdsstore.hsds_reader import HSDSReader
-from h5json import selections
-
-
-class HSDSReaderTest(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        super(HSDSReaderTest, self).__init__(*args, **kwargs)
-        # main
-
-        self.log = logging.getLogger()
-        if len(self.log.handlers) > 0:
-            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
-        else:
-            lhStdout = None
-
-        self.log.setLevel(logging.DEBUG)
-        handler = logging.FileHandler("./hsds_reader_test.log")
-        # add handler to logger
-        self.log.addHandler(handler)
-
-        if lhStdout is not None:
-            self.log.removeHandler(lhStdout)
-
-    def testSimple(self):
-        filepath = "/home/test_user1/test/tall.h5"
-        kwargs = {"app_logger": self.log}
-        db = Hdf5db(**kwargs)
-        hsds_reader = HSDSReader(filepath, **kwargs)
-        db.reader = hsds_reader
-        root_id = db.open()
-
-        # check domain stats
-        stats = db.reader.getStats()
-        self.assertTrue(stats["created"] > 0)
-        self.assertTrue(stats["lastModified"] > 0)
-        self.assertTrue(stats["owner"])
-        self.assertTrue("compressors" in stats)
-        self.assertTrue(len(stats["compressors"]) > 0)
-        self.assertTrue("limits" in stats)
-        self.assertTrue(len(stats["limits"]) > 0)
-
-        db.close()
-        self.assertTrue(db.closed)
-        obj_id = db.open()
-        self.assertEqual(obj_id, root_id)
-
-        root_json = db.getObjectById(root_id)
-        self.assertTrue("id" in root_json)
-
-        root_attrs = root_json["attributes"]
-        self.assertEqual(len(root_attrs), 2)
-        self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
-
-        root_links = root_json["links"]
-        self.assertEqual(len(root_links), 2)
-        self.assertEqual(list(root_links.keys()), ["g1", "g2"])
-        g1_link = root_links["g1"]
-        self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
-        g1_id = g1_link["id"]
-        self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
-
-        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
-        dset_json = db.getObjectById(dset111_id)
-        dset_type = dset_json["type"]
-        self.assertEqual(dset_type["class"], "H5T_INTEGER")
-        self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
-
-        dset_attrs = dset_json["attributes"]
-        self.assertEqual(len(dset_attrs), 2)
-        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
-        dset_shape = dset_json["shape"]
-        self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-        self.assertEqual(dset_shape["dims"], [10, 10])
-
-        # got the 5th row of the dataset
-        sel_row = selections.select((10, 10), (5, slice(0, 10)))
-        row = db.getDatasetValues(dset111_id, sel_row)
-        self.assertTrue(isinstance(row, np.ndarray))
-        self.assertEqual(row.shape, (10,))
-        for i in range(10):
-            v = row[i]
-            self.assertEqual(v, i * 5)
-
-        sel_all = selections.select((10, 10), ...)
-        arr = db.getDatasetValues(dset111_id, sel_all)
-        self.assertTrue(isinstance(arr, np.ndarray))
-        self.assertEqual(arr.shape, (10, 10))
-        for i in range(10):
-            for j in range(10):
-                v = arr[i, j]
-                self.assertEqual(v, i * j)
-
-        # try adding an attribute
-        db.createAttribute(dset111_id, "attr3", value=42)
-        dset_json = db.getObjectById(dset111_id)
-        dset_attrs = dset_json["attributes"]
-        self.assertEqual(len(dset_attrs), 3)
-        self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
-        attr3_json = dset_attrs["attr3"]
-        attr3_shape = attr3_json["shape"]
-        self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
-        attr3_type = attr3_json["type"]
-        self.assertEqual(attr3_type["class"], "H5T_INTEGER")
-        self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
-        attr3_value = attr3_json["value"]
-        self.assertEqual(attr3_value, 42)
-
-        db.close()
-
-    def testNoFile(self):
-        # create a random string so we don't try to open an existing file
-        filename = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
-        filepath = "/home/test_user1/test/" + filename
-        kwargs = {"app_logger": self.log}
-        db = Hdf5db(**kwargs)
-        hsds_reader = HSDSReader(filepath, **kwargs)
-        db.reader = hsds_reader
-        try:
-            db.open()
-            self.assertTrue(False)
-        except IOError as ioe:
-            self.assertEqual(ioe.errno, 404)
-
-
-if __name__ == "__main__":
-    # setup test files
-
-    unittest.main()
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
deleted file mode 100644
index ecdedf02..00000000
--- a/test/unit/hsds_writer_test.py
+++ /dev/null
@@ -1,370 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import unittest
-import logging
-import random
-import string
-import requests
-import numpy as np
-from h5json import Hdf5db
-from h5json.hsdsstore.httpconn import HttpConn
-from h5json.hsdsstore.hsds_writer import HSDSWriter
-from h5json.hsdsstore.hsds_reader import HSDSReader
-from h5json.h5pystore.h5py_reader import H5pyReader
-from h5json.hdf5dtype import special_dtype, Reference
-from h5json import selections
-
-
-class HSDSWriterTest(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        super(HSDSWriterTest, self).__init__(*args, **kwargs)
-        # main
-        self.session = requests.Session()
-
-        # create logger
-        logfname = "hsds_writer_test.log"
-        loglevel = logging.DEBUG
-        logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
-        self.log = logging.getLogger()
-        self.log.info("init!")
-
-    def testSimple(self):
-
-        domain_path = "hdf5://home/test_user1/test/writer_test.h5"
-
-        db = Hdf5db(app_logger=self.log)
-        db.writer = HSDSWriter(domain_path, app_logger=self.log)
-        root_id = db.open()
-
-        stats = db.writer.getStats()
-        for k in ("created", "lastModified", "owner"):
-            self.assertTrue(k in stats)
-        http_conn = HttpConn(domain_path, mode='r', retries=1)
-        http_conn.open()
-
-        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
-        db.createAttribute(root_id, "attr2", 42)
-
-        g1_id = db.createGroup()
-        db.createHardLink(root_id, "g1", g1_id)
-        db.createAttribute(g1_id, "a1", "hello")
-        g2_id = db.createGroup()
-        db.createHardLink(root_id, "g2", g2_id)
-
-        # validate - get the root group and check counts
-        http_rsp = http_conn.GET(f"/groups/{root_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        root_json = http_rsp.json()
-        # attribute count should still be zero (hasn't been flushed yet)
-        self.assertEqual(root_json["attributeCount"], 0)
-        # same for link count
-        self.assertEqual(root_json["linkCount"], 0)
-        self.assertTrue(db.writer.lastModified is None)  # no write yet
-        db.flush()
-        self.assertTrue(db.writer.lastModified > 0)  # timestamp should be updated
-
-        # validate - get the root group again and see if counts are updated
-        http_rsp = http_conn.GET(f"/groups/{root_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        root_json = http_rsp.json()
-        # attribute count should still be zero (hasn't been flushed yet)
-        self.assertEqual(root_json["attributeCount"], 2)
-        # same for link count
-        self.assertEqual(root_json["linkCount"], 2)
-
-        g1_1_id = db.createGroup()
-        db.createHardLink(g1_id, "g1.1", g1_1_id)
-        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-        arr = np.zeros((10, 10), dtype=np.int32)
-        for i in range(10):
-            for j in range(10):
-                arr[i, j] = i * j
-        sel_all = selections.select((10, 10), ...)
-        db.setDatasetValues(dset_111_id, sel_all, arr)
-        db.flush()
-
-        # validate - get the dataset and check values
-        http_rsp = http_conn.GET(f"/datasets/{dset_111_id}/value")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        self.assertTrue("value" in rsp_json)
-        rsp_value = rsp_json["value"]
-        self.assertEqual(len(rsp_value), 10)
-        for i in range(10):
-            row = rsp_value[i]
-            self.assertEqual(len(row), 10)
-            for j in range(10):
-                self.assertEqual(row[j], i * j)
-
-        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
-        db.createSoftLink(g2_id, "slink", "somewhere")
-        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
-        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
-        db.flush()
-
-        # create a link, then delete before flushing
-        tmp_grp_id = db.createGroup("tmp_group")
-        db.createHardLink(g1_1_id, "tmp_group", tmp_grp_id)
-        db.deleteLink(g1_1_id, "tmp_group")
-        db.flush()
-
-        # validate - check that links got updated
-        http_rsp = http_conn.GET(f"/groups/{g2_id}/links")
-        self.assertEqual(http_rsp.status_code, 200)
-        g2links_json = http_rsp.json()
-        self.assertTrue("links" in g2links_json)
-        g2links = g2links_json["links"]
-        self.assertTrue(len(g2links), 2)  # custom link will be ignored
-
-        db.createAttribute(g1_id, "a1", "hello")
-        db.createAttribute(g1_id, "a2", "bye-bye")
-        db.flush()
-
-        # validate - check that attributes got created
-        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        attrs_json = rsp_json["attributes"]
-        self.assertEqual(len(attrs_json), 2)
-
-        # delete an attribute
-        db.deleteAttribute(g1_id, "a1")
-        db.flush()
-
-        # validate - check that the attribute got deleted
-        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        attrs_json = rsp_json["attributes"]
-        self.assertEqual(len(attrs_json), 1)
-
-        # create an attribute that happens to use the separator character
-        db.createAttribute(g1_id, "a|z", "goofy")
-        db.flush()
-
-        # validate - check that attributes got created
-        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        attrs_json = rsp_json["attributes"]
-        self.assertEqual(len(attrs_json), 2)
-
-        # delete an attribute
-        db.deleteAttribute(g1_id, "a|z")
-        db.flush()
-
-        # validate - check that the attribute got deleted
-        http_rsp = http_conn.GET(f"/groups/{g1_id}/attributes")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        attrs_json = rsp_json["attributes"]
-        self.assertEqual(len(attrs_json), 1)
-
-        g21 = db.createGroup()
-        db.createHardLink(g2_id, "g2.1", g21)
-        db.flush()
-
-        # update one element of the dataset
-        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
-        arr = np.zeros((), dtype=np.int32)
-        arr[()] = 42
-        db.setDatasetValues(dset_111_id, sel, arr)
-        db.flush()
-
-        # validate - check that just the one element is modified
-        http_rsp = http_conn.GET(f"/datasets/{dset_111_id}/value")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        self.assertTrue("value" in rsp_json)
-        rsp_value = rsp_json["value"]
-        self.assertEqual(len(rsp_value), 10)
-        for i in range(10):
-            row = rsp_value[i]
-            self.assertEqual(len(row), 10)
-            for j in range(10):
-                if i == 4 and j == 4:
-                    expected = 42
-                else:
-                    expected = i * j
-                self.assertEqual(row[j], expected)
-
-        # create a scalar dataset
-        dset_112_id = db.createDataset(shape=(), dtype=np.int32)
-        arr = np.zeros((), dtype=np.int32)
-        arr[()] = 42
-        sel_all = selections.select((), ...)
-        db.setDatasetValues(dset_112_id, sel_all, arr)
-        db.createHardLink(g1_id, "dset1.1.2", dset_112_id)
-        db.flush()
-
-        # validate - get the scalar dataset value
-        http_rsp = http_conn.GET(f"/datasets/{dset_112_id}/value")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        self.assertTrue("value" in rsp_json)
-        rsp_value = rsp_json["value"]
-        self.assertEqual(rsp_value, 42)
-
-        # create a dataset and try to read from it
-        dset_222_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-        sel_all = selections.select((10, 10), ...)
-        arr = db.getDatasetValues(dset_222_id, sel_all)
-        self.assertTrue((arr == 0).all())
-
-        db.close()
-
-    def testReaderWriter(self):
-        # try reading and writing to an HSDS domain
-        # create a random string so we don't try to open an existing file
-        filename = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
-        domain_path = "/home/test_user1/test/" + filename + ".h5"
-        db = Hdf5db(app_logger=self.log)
-        db.writer = HSDSWriter(domain_path, app_logger=self.log)
-        self.assertEqual(db.writer.filepath, domain_path)
-        root_id = db.open()
-        self.assertTrue(root_id)
-        db.reader = HSDSReader(domain_path, app_logger=self.log)
-        db.close()
-
-        root_id2 = db.open()
-        self.assertEqual(root_id, root_id2)
-        root_json = db.getObjectById(root_id)
-        self.assertTrue("id" not in root_json)
-        self.assertTrue("created" in root_json)
-        self.assertTrue(root_json["created"] > 0)
-        self.assertTrue(db.writer.lastModified is None)  # no flush yet
-
-        # create a scalar dataset
-        dsetA_id = db.createDataset(shape=(), dtype=np.int32)
-        dset_json = db.getObjectById(dsetA_id)
-        self.assertTrue("created" in dset_json)
-        dset_create_time = dset_json["created"]
-        self.assertTrue(dset_create_time > 0)
-
-        db.createHardLink(root_id, "dset_a", dsetA_id)
-
-        arr = np.zeros((), dtype=np.int32)
-        arr[()] = 42
-        sel_all = selections.select((), ...)
-        db.setDatasetValues(dsetA_id, sel_all, arr)
-
-        dset_json = db.getObjectById(dsetA_id)
-        self.assertTrue("lastModified" in dset_json)
-        self.assertTrue(dset_json["lastModified"] > dset_create_time)
-
-        arr = db.getDatasetValues(dsetA_id, sel_all)
-        self.assertEqual(arr[()], 42)
-
-        # create a scalar dataset with string
-        dt_str = special_dtype(vlen=str)
-        dsetB_id = db.createDataset(shape=(), dtype=dt_str)
-        dset_json = db.getObjectById(dsetB_id)
-        db.createHardLink(root_id, "dset_b", dsetB_id)
-
-        arr = np.zeros((), dtype=dt_str)
-        arr[()] = "hello world"
-        db.setDatasetValues(dsetB_id, sel_all, arr)
-
-        arr = db.getDatasetValues(dsetB_id, sel_all)
-
-        e = arr[()]
-        self.assertEqual(e, "hello world")
-        self.assertTrue(isinstance(e, str))
-
-        db.close()
-
-    def testH5PyToHS(self):
-        # test reading from HDF5 file and writing to HSDS
-
-        file_path = "data/hdf5/tall.h5"
-        domain_path = "hdf5://home/test_user1/test/hsds_writer_test_tall.h5"
-
-        db = Hdf5db(app_logger=self.log)
-        db.reader = H5pyReader(file_path)
-        db.writer = HSDSWriter(domain_path)
-        root_id = db.open()
-        root_json = db.getObjectById(root_id)
-        db.flush()
-
-        # validate - get the root group and see if counts are correct
-        http_conn = HttpConn(domain_path, mode='r', retries=1)
-        http_conn.open()
-        http_rsp = http_conn.GET(f"/groups/{root_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        root_json = http_rsp.json()
-        self.assertEqual(root_json["id"], root_id)
-        # attribute count should still be zero (hasn't been flushed yet)
-        self.assertEqual(root_json["attributeCount"], 2)
-        # same for link count
-        self.assertEqual(root_json["linkCount"], 2)
-
-        # get the g1 hard link
-        http_rsp = http_conn.GET(f"/groups/{root_id}/links/g1")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        g1_link = rsp_json["link"]
-        g1_id = g1_link["id"]
-
-        # get the g1 group json
-        http_rsp = http_conn.GET(f"/groups/{g1_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        g1_json = http_rsp.json()
-        self.assertEqual(g1_json["attributeCount"], 0)
-        self.assertEqual(g1_json["linkCount"], 2)
-
-        # get the g1.1 link
-        http_rsp = http_conn.GET(f"/groups/{g1_id}/links/g1.1")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        g1_1_link = rsp_json["link"]
-        g1_1_id = g1_1_link["id"]
-
-        # Get the g1.1 json
-        http_rsp = http_conn.GET(f"/groups/{g1_1_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        g1_json = http_rsp.json()
-        self.assertEqual(g1_json["attributeCount"], 0)
-        self.assertEqual(g1_json["linkCount"], 2)
-
-        # get the dset1.1.1 link
-        http_rsp = http_conn.GET(f"/groups/{g1_1_id}/links/dset1.1.1")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        dset1_1_1_link = rsp_json["link"]
-        dset1_1_1_id = dset1_1_1_link["id"]
-
-        # get the dset1.1.1 json
-        http_rsp = http_conn.GET(f"/datasets/{dset1_1_1_id}")
-        self.assertEqual(http_rsp.status_code, 200)
-        dset1_1_1_json = http_rsp.json()
-        dset1_1_1_shape = dset1_1_1_json["shape"]
-        self.assertEqual(dset1_1_1_shape["class"], "H5S_SIMPLE")
-
-        # get the dset1_1_1 data
-        http_rsp = http_conn.GET(f"/datasets/{dset1_1_1_id}/value")
-        self.assertEqual(http_rsp.status_code, 200)
-        rsp_json = http_rsp.json()
-        dset1_1_1_value = rsp_json["value"]
-        self.assertEqual(len(dset1_1_1_value), 10)
-        for i in range(10):
-            row = dset1_1_1_value[i]
-            self.assertEqual(len(row), 10)
-            for j in range(10):
-                self.assertEqual(row[j], i * j)
-
-        db.close()
-
-
-if __name__ == "__main__":
-    # setup test files
-
-    unittest.main()

From 29ae2370f6de0a66891edf13378c70aa01aa717e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 16 Sep 2025 18:13:44 +0100
Subject: [PATCH 081/129] fix for getDatasetValues

---
 src/h5json/hdf5db.py          |  2 +-
 test/unit/h5py_reader_test.py | 21 ++++++++++++++++++++-
 test/unit/hdf5db_test.py      | 27 +++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 02753ec5..c8442aff 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -587,7 +587,7 @@ def getDatasetValues(self, dset_id, sel):
         if fetch:
             arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
         else:
-            arr = np.zeros(sel.shape, dtype=dtype)
+            arr = np.zeros(sel.mshape, dtype=dtype)
 
         if "updates" in dset_json:
             # apply any non-flushed changes that intersect the current selection
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index 8f76543c..e4cc9c7d 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -13,8 +13,11 @@
 
 import logging
 import time
+import numpy as np
+
 from h5json import Hdf5db
 from h5json.h5pystore.h5py_reader import H5pyReader
+from h5json import selections
 
 
 class H5pyReaderTest(unittest.TestCase):
@@ -70,7 +73,23 @@ def testSimple(self):
             self.assertTrue(k in attr1_json)
         dset_shape = dset_json["shape"]
         self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
-        self.assertEqual(dset_shape["dims"], [10, 10])
+        dims = dset_shape["dims"]
+        self.assertEqual(dims, [10, 10])
+        dims = tuple(dims)
+
+        # read one element from a dataset
+        sel = selections.select(dims, (slice(4, 5), slice(5, 6)))
+        arr = db.getDatasetValues(dset111_id, sel)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.shape, (1, 1))
+        self.assertEqual(arr[0, 0], 20)
+
+        # read one row
+        sel = selections.select(dims, (slice(4, 5), slice(0, 10)))
+        arr = db.getDatasetValues(dset111_id, sel)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.shape, (1, 10))
+        self.assertEqual(list(arr[0]), list(range(0, 40, 4)))
 
         # try adding an attribute
         db.createAttribute(dset111_id, "attr3", value=42)
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 1eca8e2a..04df3156 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -448,6 +448,33 @@ def testSimpleDataset(self):
 
         db.close()
 
+    def testBoolDataset(self):
+        shape = (10,)
+        dtype = np.dtype(bool)
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        sel_first = selections.select(shape, slice(0, 1))
+        arr = db.getDatasetValues(dset_id, sel_first)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, (1,))
+        self.assertEqual(arr[0], False)
+
+        # update one element
+        sel_second = selections.select(shape, slice(1, 2))
+        db.setDatasetValues(dset_id, sel_second, np.array([True,], dtype=dtype))
+
+        # read back three elements
+        sel_three = selections.select(shape, slice(0, 3))
+        arr = db.getDatasetValues(dset_id, sel_three)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, (3,))
+        self.assertEqual(list(arr[...]), [False, True, False])
+
+        db.close()
+
     def testScalarDataset(self):
         dtype = np.int32
 

From b904ea51733f5901be77d07dfb9f946183014a4a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 16 Sep 2025 19:02:42 +0100
Subject: [PATCH 082/129] fix for datasets with fillvalue

---
 src/h5json/hdf5db.py     | 13 ++++++++++++-
 test/unit/hdf5db_test.py | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index c8442aff..5f9714a4 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -570,6 +570,11 @@ def getDatasetValues(self, dset_id, sel):
 
         dtype = self.getDtype(dset_json)
 
+        if "creationProperties" in dset_json:
+            cpl = dset_json["creationProperties"]
+        else:
+            cpl = {}
+
         # determine if we need to make a read request or not
         if dset_id in self._new_objects:
             fetch = False
@@ -587,7 +592,13 @@ def getDatasetValues(self, dset_id, sel):
         if fetch:
             arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
         else:
-            arr = np.zeros(sel.mshape, dtype=dtype)
+            if "fillValue" in cpl:
+                fillValue = cpl["fillValue"]
+                # TBD: fix for compound types
+                arr = np.zeros(sel.mshape, dtype=dtype)
+                arr[...] = fillValue
+            else:
+                arr = np.zeros(sel.mshape, dtype=dtype)
 
         if "updates" in dset_json:
             # apply any non-flushed changes that intersect the current selection
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 04df3156..63030ef2 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -526,6 +526,24 @@ def testResizableDataset(self):
 
         db.close()
 
+    def testFillValueDataset(self):
+        dtype = np.uint32
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        cpl = {"fillValue": 0xdeadbeef}
+        dset_id = db.createDataset((), dtype=dtype, cpl=cpl)
+        db.createHardLink(root_id, "dset", dset_id)
+        dset_json = db.getObjectById(dset_id)
+        self.assertTrue("creationProperties" in dset_json)
+        cpl = dset_json["creationProperties"]
+        self.assertTrue("fillValue" in cpl)
+        self.assertEqual(cpl["fillValue"], 0xdeadbeef)
+        sel_all = selections.select((), ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, ())
+        self.assertEqual(arr[()], 0xdeadbeef)
+
 
 if __name__ == "__main__":
     # setup test files

From 65b94c12af0a7cdb1726a60432c7ac4114b48c73 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 17 Sep 2025 17:32:20 +0100
Subject: [PATCH 083/129] added dset_util functions

---
 src/h5json/dset_util.py             | 629 ++++++++++++++++++++++++++--
 src/h5json/filters.py               | 246 ++++++++++-
 src/h5json/h5pystore/h5py_reader.py |  14 +-
 testall.py                          |   9 -
 4 files changed, 849 insertions(+), 49 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 37d67f1e..34d5d0d2 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -10,36 +10,27 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 
-import time
+import math
 import numpy as np
+from .hdf5dtype import getItemSize
+from .objid import isValidUuid
+from . import config
 
+CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
+CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
+DEFAULT_TYPE_SIZE = 128  # Type size case when it is variable
 
-def resize_dataset(dset_json, shape):
-    shape_json = dset_json["shape"]
-    shape_class = shape_json["class"]
-    if shape_class != "H5S_SIMPLE":
-        raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
-    if len(shape_json["dims"]) != len(shape):
-        raise ValueError("Resize shape parameter doesn't match dataset's rank")
-    if "maxdims" not in shape_json:
-        raise ValueError("Dataset is not resizable")
-    dims = shape_json["dims"]
-    maxdims = shape_json["maxdims"]
 
-    if shape_json["dims"] == list(shape):
-        # no change, just return
-        return
-    for i in range(len(dims)):
-        extent = shape[i]
-        if extent < 0:
-            raise ValueError("dimensions can't be negative")
-        if maxdims[i] == "H5S_UNLIMITED":
-            # any positive extent is ok
-            continue
-        if extent > maxdims[i]:
-            raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
+def getShapeClass(data_shape):
+    """ Return shape class of the given data shape """
 
-    shape_json["dims"] = list(shape)
+    if not isinstance(data_shape, dict):
+        raise TypeError("expected dict object")
+
+    if "class" not in data_shape:
+        raise KeyError("expected 'class' key for data shape")\
+
+    return data_shape["class"]
 
 
 def getDims(dset_json):
@@ -65,6 +56,50 @@ def getNumElements(dset_json):
     return int(np.prod(getDims(dset_json)))
 
 
+def getRank(data_shape):
+    """ Return rank of given data shape_json """
+
+    shape_class = getShapeClass(data_shape)
+
+    if shape_class == "H5S_NULL":
+        return 0
+    elif shape_class == "H5S_SCALAR":
+        return 0
+    elif shape_class == "H5S_SIMPLE":
+        if "dims" not in data_shape:
+            raise KeyError("expected dims key for H5S_SIMPLE data shape")
+        return len(data_shape["dims"])
+    else:
+        raise ValueError(f"unexpected data shape class: {shape_class}")
+
+
+def getDsetRank(dset_json):
+    """Get rank returning 0 for scalar or NULL data shapes"""
+    data_shape = dset_json["shape"]
+    return getRank(data_shape)
+
+
+def isNullSpace(dset_json):
+    """Return true if this dataset is a null data space"""
+    shape_class = getShapeClass(dset_json["shape"])
+    if shape_class == "H5S_NULL":
+        return True
+    else:
+        return False
+
+
+def isScalarSpace(dset_json):
+    """ return true if this is a scalar dataset """
+
+    data_shape = dset_json["shape"]
+    shape_class = getShapeClass(data_shape)
+    if shape_class == "H5S_NULL":
+        return False
+
+    rank = getRank(data_shape)
+    return True if rank == 0 else False
+
+
 def getDatasetLayout(dset_json):
     """ Return layout json from creation property list or layout json """
     layout = None
@@ -89,3 +124,547 @@ def getDatasetLayoutClass(dset_json):
     else:
         layout_class = None
     return layout_class
+
+
+CHUNK_LAYOUT_CLASSES = (
+    "H5D_CHUNKED",
+    "H5D_CHUNKED_REF",
+    "H5D_CHUNKED_REF_INDIRECT",
+    "H5D_CONTIGUOUS_REF",
+)
+
+
+def get_dset_size(shape_json, typesize):
+    """Return the size of the dataspace.  For
+    any unlimited dimensions, assume a value of 1.
+    (so the return size will be the absolute minimum)
+    """
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return typesize  # just return size for one item
+    if typesize == "H5T_VARIABLE":
+        typesize = DEFAULT_TYPE_SIZE  # just take a guess at the item size
+    dset_size = typesize
+    shape = shape_json["dims"]
+    rank = len(shape)
+
+    for n in range(rank):
+        if shape[n] == 0:
+            # extendable extent with value of 0
+            continue  # assume this is one
+        dset_size *= shape[n]
+    return dset_size
+
+
+def resize_dataset(dset_json, shape):
+    shape_json = dset_json["shape"]
+    shape_class = shape_json["class"]
+    if shape_class != "H5S_SIMPLE":
+        raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
+    if len(shape_json["dims"]) != len(shape):
+        raise ValueError("Resize shape parameter doesn't match dataset's rank")
+    if "maxdims" not in shape_json:
+        raise ValueError("Dataset is not resizable")
+    dims = shape_json["dims"]
+    maxdims = shape_json["maxdims"]
+
+    if shape_json["dims"] == list(shape):
+        # no change, just return
+        return
+    for i in range(len(dims)):
+        extent = shape[i]
+        if extent < 0:
+            raise ValueError("dimensions can't be negative")
+        if maxdims[i] == "H5S_UNLIMITED":
+            # any positive extent is ok
+            continue
+        if extent > maxdims[i]:
+            raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
+
+    shape_json["dims"] = list(shape)
+
+
+def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
+    """
+    create a chunk layout for datasets use contiguous storage.
+    """
+    if not isinstance(item_size, int):
+        msg = "ContiguousLayout can only be used with fixed-length types"
+        raise ValueError(msg)
+
+    if chunk_min is None:
+        msg = "chunk_min not set"
+        raise ValueError(msg)
+    if chunk_max is None:
+        msg = "chunk_max not set"
+        raise ValueError(msg)
+
+    if chunk_max < chunk_min:
+        raise ValueError("chunk_max cannot be less than chunk_min")
+
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+    dims = shape_json["dims"]
+    rank = len(dims)
+    if rank == 0:
+        raise ValueError("rank must be positive for Contiguous Layout")
+    for dim in dims:
+        if dim < 0:
+            raise ValueError("extents must be positive for Contiguous Layout")
+        if dim == 0:
+            # data shape with no elements, just return dims as layout
+            return dims
+
+    nsize = item_size
+    layout = [1,] * rank
+
+    for i in range(rank):
+        dim = rank - i - 1
+        extent = dims[dim]
+        if extent * nsize < chunk_max:
+            # just use the full extent as layout
+            layout[dim] = extent
+            nsize *= extent
+        else:
+            n = extent
+            while n > 1:
+                n = -(-n // 2)  # use negatives so we round up on odds
+                if n * nsize < chunk_max:
+                    break
+            layout[dim] = n
+            break  # just use 1's for the rest of the layout
+
+    return layout
+
+
+def getChunkSize(layout, type_size):
+    """Return chunk size given layout.
+    i.e. just the product of the values in the list.
+    """
+    if type_size == "H5T_VARIABLE":
+        type_size = DEFAULT_TYPE_SIZE
+
+    chunk_size = type_size
+    for n in layout:
+        if n <= 0:
+            raise ValueError("Invalid chunk layout")
+        chunk_size *= n
+    return chunk_size
+
+
+def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
+    """
+    Use chunk layout given in the creationPropertiesList (if defined and
+    layout is valid).
+    Return chunk_layout_json
+    """
+
+    rank = 0
+    space_dims = None
+    chunk_dims = None
+    max_dims = None
+
+    if "dims" in shape_json:
+        space_dims = shape_json["dims"]
+        rank = len(space_dims)
+
+    if "maxdims" in shape_json:
+        max_dims = shape_json["maxdims"]
+    if "dims" in layout:
+        chunk_dims = layout["dims"]
+
+    if chunk_dims:
+        # validate that the chunk_dims are valid and correlates with the
+        # dataset shape
+        if isinstance(chunk_dims, int):
+            chunk_dims = [
+                chunk_dims,
+            ]  # promote to array
+        if len(chunk_dims) != rank:
+            msg = "Layout rank does not match shape rank"
+            raise ValueError(msg)
+        for i in range(rank):
+            dim_extent = space_dims[i]
+            chunk_extent = chunk_dims[i]
+            if not isinstance(chunk_extent, int):
+                msg = "Layout dims must be integer or integer array"
+                raise ValueError(msg)
+            if chunk_extent <= 0:
+                msg = "Invalid layout value"
+                raise ValueError(msg)
+            if max_dims is None:
+                if chunk_extent > dim_extent:
+                    msg = "Invalid layout value"
+                    raise ValueError(reason=msg)
+            elif max_dims[i] != 0:
+                if chunk_extent > max_dims[i]:
+                    msg = "Invalid layout value for extensible dimension"
+                    raise ValueError(msg)
+            else:
+                pass  # allow any positive value for unlimited dimensions
+
+    if "class" not in layout:
+        msg = "class key not found in layout for creation property list"
+        raise ValueError(msg)
+
+    layout_class = layout["class"]
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        # reference to a dataset in a traditional HDF5 files with
+        # contiguous storage
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types...
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            raise ValueError(msg)
+        if "file_uri" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'file_uri' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            raise ValueError(msg)
+        if "offset" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'offset' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            raise ValueError(msg)
+        if "size" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'size' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            raise ValueError(msg)
+        if "dims" in layout:
+            # used defined chunk layout not allowed for H5D_CONTIGUOUS_REF
+            msg = "'dims' key can not be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            raise ValueError(msg)
+    elif layout_class == "H5D_CHUNKED_REF":
+        # reference to a dataset in a traditional HDF5 files with
+        # chunked storage
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types..
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            raise ValueError(msg)
+        if "file_uri" not in layout:
+            # needed for H5D_CHUNKED_REF
+            msg = "'file_uri' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            raise ValueError(msg)
+        if "dims" not in layout:
+            # needed for H5D_CHUNKED_REF
+            msg = "'dimns' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            raise ValueError(msg)
+        if "chunks" not in layout:
+            msg = "'chunks' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            raise ValueError(msg)
+    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
+        # reference to a dataset in a traditional HDF5 files with chunked
+        # storage using an auxiliary dataset
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types..
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            raise ValueError(msg)
+        if "dims" not in layout:
+            # needed for H5D_CHUNKED_REF_INDIRECT
+            msg = "'dims' key must be provided for "
+            msg += "H5D_CHUNKED_REF_INDIRECT layout"
+            raise ValueError(msg)
+        if "chunk_table" not in layout:
+            msg = "'chunk_table' key must be provided for "
+            msg += "H5D_CHUNKED_REF_INDIRECT layout"
+            raise ValueError(msg)
+        chunk_table_id = layout["chunk_table"]
+        if not isValidUuid(chunk_table_id, "Dataset"):
+            msg = f"Invalid chunk table id: {chunk_table_id}"
+            raise ValueError(msg)
+
+    elif layout_class == "H5D_CHUNKED":
+        if "dims" not in layout:
+            msg = "dims key not found in layout for creation property list"
+            raise ValueError(msg)
+        if shape_json["class"] != "H5S_SIMPLE":
+            msg = "Bad Request: chunked layout not valid with shape class: "
+            msg += f"{shape_json['class']}"
+            raise ValueError(msg)
+    elif layout_class == "H5D_CONTIGUOUS":
+        if "dims" in layout:
+            msg = "dims key found in layout for creation property list "
+            msg += "for H5D_CONTIGUOUS storage class"
+            raise ValueError(msg)
+    elif layout_class == "H5D_COMPACT":
+        if "dims" in layout:
+            msg = "dims key found in layout for creation property list "
+            msg += "for H5D_COMPACT storage class"
+            raise ValueError(msg)
+    else:
+        msg = f"Unexpected layout: {layout_class}"
+        raise ValueError(msg)
+
+
+def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"):
+    """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+
+    layout = list(layout)
+    dims = shape_json["dims"]
+    rank = len(dims)
+    extendable_dims = 0  # number of dimensions that are extenable
+    maxdims = None
+    if "maxdims" in shape_json:
+        maxdims = shape_json["maxdims"]
+        for n in range(rank):
+            if maxdims[n] == 0 or maxdims[n] > dims[n]:
+                extendable_dims += 1
+
+    dset_size = get_dset_size(shape_json, typesize)
+    if dset_size <= chunk_min and extendable_dims == 0:
+        # just use the entire dataspace shape as one big chunk
+        return tuple(dims)
+
+    chunk_size = getChunkSize(layout, typesize)
+    if chunk_size >= chunk_min:
+        return tuple(layout)  # good already
+    while chunk_size < chunk_min:
+        # just adjust along extendable dimensions first
+        old_chunk_size = chunk_size
+        for n in range(rank):
+            dim = rank - n - 1  # start from last dim
+
+            if extendable_dims > 0:
+                if maxdims[dim] == 0:
+                    # infinitely extendable dimensions
+                    layout[dim] *= 2
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                elif maxdims[dim] > layout[dim]:
+                    # can only be extended so much
+                    layout[dim] *= 2
+                    if layout[dim] >= dims[dim]:
+                        layout[dim] = maxdims[dim]  # trim back
+                        extendable_dims -= 1  # one less extenable dimension
+
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                    else:
+                        pass  # ignore non-extensible for now
+            else:
+                # no extendable dimensions
+                if dims[dim] > layout[dim]:
+                    # can expand chunk along this dimension
+                    layout[dim] *= 2
+                    if layout[dim] > dims[dim]:
+                        layout[dim] = dims[dim]  # trim back
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                else:
+                    pass  # can't extend chunk along this dimension
+        if chunk_size <= old_chunk_size:
+            # stop iteration if we haven't increased the chunk size
+            break
+        elif chunk_size > chunk_min:
+            break  # we're good
+        else:
+            pass  # do another round
+    return tuple(layout)
+
+
+def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX, layout_class="H5D_CHUNKED"):
+    """Compute a reduced chunk shape with a size in bytes less than chunk_max."""
+    layout = list(layout)
+    chunk_size = getChunkSize(layout, typesize)
+    if chunk_size <= chunk_max:
+        return tuple(layout)  # good already
+    rank = len(layout)
+
+    while chunk_size > chunk_max:
+        # just adjust along extendable dimensions first
+        old_chunk_size = chunk_size
+        for dim in range(rank):
+            if layout[dim] > 1:
+                # tricky way to do  x // 2 with ceil
+                layout[dim] = -(-layout[dim] // 2)
+                chunk_size = getChunkSize(layout, typesize)
+                if chunk_size <= chunk_max:
+                    break
+            else:
+                pass  # can't shrink chunk along this dimension
+        if chunk_size >= old_chunk_size:
+            # reality check to see if we'll ever break out of the while loop
+            break
+        elif chunk_size <= chunk_max:
+            break  # we're good
+        else:
+            pass  # do another round
+    return tuple(layout)
+
+
+def guessChunk(shape_json, typesize):
+    """Guess an appropriate chunk layout for a dataset, given its shape and
+    the size of each element in bytes.  Will allocate chunks only as large
+    as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
+    each axis, slightly favoring bigger values for the last index.
+
+    Undocumented and subject to change without warning.
+    """
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+
+    if "maxdims" in shape_json:
+        shape = shape_json["maxdims"]
+    else:
+        shape = shape_json["dims"]
+
+    if typesize == "H5T_VARIABLE":
+        typesize = 128  # just take a guess at the item size
+
+    # For unlimited dimensions we have to guess. use 1024
+    shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
+
+    return shape
+
+
+def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, chunk_max=None):
+    """ Get the layout json given by creation_props.
+        Raise bad request error if invalid """
+
+    min_chunk_size = CHUNK_MIN  # int(config.get("min_chunk_size"))
+    max_chunk_size = CHUNK_MAX  # int(config.get("max_chunk_size"))
+
+    item_size = getItemSize(type_json)
+    if chunk_min is None:
+        chunk_min = 1000 * 1000
+    if chunk_max is None:
+        chunk_max = 4 * 1000 * 1000
+
+    if chunk_min > chunk_max:
+        msg = "chunk_max must be larger than chunk_min"
+        raise ValueError(msg)
+
+    layout = None
+    if "layout" in creation_props:
+        layout_props = creation_props["layout"]
+    else:
+        layout_props = None
+
+    if layout_props:
+        if "class" not in layout_props:
+            msg = "expected class key in layout props"
+            raise KeyError(msg)
+        layout_class = layout_props["class"]
+        if layout_class == "H5D_CONTIGUOUS":
+            # treat contiguous as chunked
+            layout_class = "H5D_CHUNKED"
+        else:
+            layout_class = layout_props["class"]
+    elif shape["class"] != "H5S_NULL":
+        layout_class = "H5D_CHUNKED"
+    else:
+        layout_class = None
+
+    if layout_class == "H5D_COMPACT":
+        layout = {"class": "H5D_COMPACT"}
+    elif layout_class:
+        # initialize to H5D_CHUNKED
+        layout = {"class": "H5D_CHUNKED"}
+    else:
+        # null space - no layout
+        layout = None
+
+    if layout_props and "dims" in layout_props:
+        chunk_dims = layout_props["dims"]
+    else:
+        chunk_dims = None
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+        chunk_dims = getContiguousLayout(shape, item_size, **kwargs)
+        layout["dims"] = chunk_dims
+
+    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
+        # do auto-chunking
+        chunk_dims = guessChunk(shape, item_size)
+
+    if layout_class == "H5D_CHUNKED":
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        # adjust the chunk shape if chunk size is too small or too big
+        adjusted_chunk_dims = None
+        if chunk_size < min_chunk_size:
+            kwargs = {"chunk_min": min_chunk_size, "layout_class": layout_class}
+            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape, **kwargs)
+        elif chunk_size > max_chunk_size:
+            kwargs = {"chunk_max": max_chunk_size}
+            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
+        if adjusted_chunk_dims:
+            layout["dims"] = adjusted_chunk_dims
+        else:
+            layout["dims"] = chunk_dims  # don't need to adjust chunk size
+
+        # set partition_count if needed:
+        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
+        set_partition = False
+        if max_chunks_per_folder > 0:
+            if "dims" in shape and "dims" in layout:
+                set_partition = True
+
+        if set_partition:
+            chunk_dims = layout["dims"]
+            shape_dims = shape["dims"]
+            if "maxdims" in shape:
+                max_dims = shape["maxdims"]
+            else:
+                max_dims = None
+            num_chunks = 1
+            rank = len(shape_dims)
+            unlimited_count = 0
+            if max_dims:
+                for i in range(rank):
+                    if max_dims[i] == 0:
+                        unlimited_count += 1
+            for i in range(rank):
+                max_dim = 1
+                if max_dims:
+                    max_dim = max_dims[i]
+                    if max_dim == 0:
+                        # don't really know what the ultimate extent
+                        # could be, but assume 10^6 for total number of
+                        # elements and square-shaped array...
+                        MAX_ELEMENT_GUESS = 10.0 ** 6
+                        exp = 1 / unlimited_count
+                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
+                else:
+                    max_dim = shape_dims[i]
+                num_chunks *= math.ceil(max_dim / chunk_dims[i])
+
+            if num_chunks > max_chunks_per_folder:
+                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
+                msg = f"set partition count to: {partition_count}, "
+                msg += f"num_chunks: {num_chunks}"
+                layout["partition_count"] = partition_count
+            else:
+                pass  # partition not needed
+
+    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        # nothing to do about inefficiently small chunks, but large chunks
+        # can be subdivided
+        if chunk_size < min_chunk_size:
+            pass  # too small
+        elif chunk_size > max_chunk_size:
+            pass  # too large
+        layout["dims"] = chunk_dims
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index cda38178..4e985b3f 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -12,6 +12,238 @@
 
 import h5py
 
+from .hdf5dtype import isVlen
+
+# List of registered filters.  Not all are supported by every reader and writer.
+#
+#
+# tuple of filter key, filter id, and options,
+FILTER_DEFS = (
+    ("H5Z_FILTER_NONE", 0, "none", ()),
+    ("H5Z_FILTER_DEFLATE", 1, "gzip", ("level",)),  # aka as "zlib" for blosc
+    ("H5Z_FILTER_SHUFFLE", 2, "shuffle", ()),
+    ("H5Z_FILTER_FLETCHER32", 3, "fletcher32", ()),
+    ("H5Z_FILTER_SZIP", 4, "szip", ("bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine")),
+    ("H5Z_FILTER_NBIT", 5, "nbit", ()),
+    ("H5Z_FILTER_SCALEOFFSET", 6, "scaleoffset", ("scaleType", "scaleOffset")),
+    ("H5Z_FILTER_LZF", 32000, "lzf", ()),
+    ("H5Z_FILTER_BLOSC", 32001, "blosclz", ()),
+    ("H5Z_FILTER_SNAPPY", 32003, "snappy", ()),
+    ("H5Z_FILTER_LZ4", 32004, "lz4", ()),
+    ("H5Z_FILTER_LZ4HC", 32005, "lz4hc", ()),
+    ("H5Z_FILTER_BITSHUFFLE", 32008, "bitshuffle", ()),
+    ("H5Z_FILTER_ZSTD", 32015, "zstd", ()),
+)
+
+HDF_FILTER_OPTION_ENUMS = {
+    "coding": {
+        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
+        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
+    },
+    "scaleType": {
+        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
+        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
+        h5py.h5z.SO_INT: "H5Z_SO_INT",
+    },
+}
+
+COMPRESSION_FILTER_IDS = (
+    "H5Z_FILTER_DEFLATE",
+    "H5Z_FILTER_SZIP",
+    "H5Z_FILTER_SCALEOFFSET",
+    "H5Z_FILTER_LZF",
+    "H5Z_FILTER_BLOSC",
+    "H5Z_FILTER_SNAPPY",
+    "H5Z_FILTER_LZ4",
+    "H5Z_FILTER_LZ4HC",
+    "H5Z_FILTER_ZSTD",
+)
+
+COMPRESSION_FILTER_NAMES = (
+    "gzip",
+    "szip",
+    "lzf",
+    "blosclz",
+    "snappy",
+    "lz4",
+    "lz4hc",
+    "zstd",
+)
+
+
+def getFilterItem(key):
+    """
+    Return filter code, id, and name, based on an id, a name or a code.
+    """
+
+    if key == "deflate":
+        key = "gzip"  # use gzip as equivalent
+    for item in FILTER_DEFS:
+        # check for a match by key, id, or alias (the first three elements)
+        for i in range(3):
+            if key == item[i]:
+                return {"class": item[0], "id": item[1], "name": item[2], "options": item[3]}
+    return None  # not found
+
+
+def getFiltersJson(create_props, supported_filters=None):
+    """ return standardized filter representation from creation properties
+        raise bad request if invalid """
+
+    # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
+    # filters.html#grammar-token-filter_list
+
+    if "filters" not in create_props:
+        return {}  # null set
+
+    f_in = create_props["filters"]
+
+    if not isinstance(f_in, list):
+        msg = "Expected filters in creation_props to be a list"
+        raise TypeError(msg)
+
+    f_out = []
+    for filter in f_in:
+        if isinstance(filter, int) or isinstance(filter, str):
+            item = getFilterItem(filter)
+            if not item:
+                msg = f"filter {filter} not recognized"
+                raise ValueError(msg)
+
+            if item["name"] not in supported_filters:
+                msg = f"filter {filter} is not supported"
+                raise ValueError(msg)
+            f_out.append(item)
+        elif isinstance(filter, dict):
+            if "class" not in filter:
+                msg = "expected 'class' key for filter property"
+                raise KeyError(msg)
+            if filter["class"] != "H5Z_FILTER_USER":
+                item = getFilterItem(filter["class"])
+            elif "id" in filter:
+                item = getFilterItem(filter["id"])
+            elif "name" in filter:
+                item = getFilterItem(filter["name"])
+            else:
+                item = None
+            if not item:
+                msg = f"filter {filter['class']} not recognized"
+                raise ValueError(msg)
+            if "id" not in filter:
+                filter["id"] = item["id"]
+            elif item["id"] != filter["id"]:
+                msg = f"Expected {filter['class']} to have id: "
+                msg += f"{item['id']} but got {filter['id']}"
+                raise ValueError(msg)
+            if "name" not in filter:
+                filter["name"] = item["name"]
+            if filter["name"] not in supported_filters:
+                msg = f"filter {filter} is not supported"
+                raise KeyError(msg)
+
+            f_out.append(filter)
+        else:
+            msg = f"Unexpected type for filter: {filter}"
+            raise ValueError(msg)
+
+    # return standardized filter representation
+    return f_out
+
+
+def getFilters(dset_json):
+    """Return list of filters, or empty list"""
+    if "creationProperties" not in dset_json:
+        return []
+    creationProperties = dset_json["creationProperties"]
+    if "filters" not in creationProperties:
+        return []
+    filters = creationProperties["filters"]
+    return filters
+
+
+def getCompressionFilter(filters):
+    """Return compression filter from filters, or None"""
+    for filter in filters:
+        if "class" not in filter:
+            # expected class key - malformed filter def
+            continue
+        filter_class = filter["class"]
+        if filter_class in COMPRESSION_FILTER_IDS:
+            return filter
+        if all(
+            (
+                filter_class == "H5Z_FILTER_USER",
+                "name" in filter,
+                filter["name"] in COMPRESSION_FILTER_NAMES,
+            )
+        ):
+            return filter
+    return None
+
+
+def getShuffleFilter(filters):
+    """Return shuffle filter, or None"""
+    FILTER_CLASSES = ("H5Z_FILTER_SHUFFLE", "H5Z_FILTER_BITSHUFFLE")
+    for filter in filters:
+        if "class" not in filter:
+            # invalid filter def?
+            continue
+        filter_class = filter["class"]
+        if filter_class in FILTER_CLASSES:
+            return filter
+
+    return None
+
+
+def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
+    """Get list of filter operations to be used for this dataset"""
+    filter_map = app["filter_map"]
+
+    if dset_id in filter_map:
+        return filter_map[dset_id]
+
+    compressionFilter = getCompressionFilter(filters)
+
+    filter_ops = {}
+
+    shuffleFilter = getShuffleFilter(filters)
+
+    if shuffleFilter and not isVlen(dtype):
+        shuffle_name = shuffleFilter["name"]
+        if shuffle_name == "shuffle":
+            filter_ops["shuffle"] = 1  # use regular shuffle
+        elif shuffle_name == "bitshuffle":
+            filter_ops["shuffle"] = 2  # use bitshuffle
+        else:
+            filter_ops["shuffle"] = 0  # no shuffle
+    else:
+        filter_ops["shuffle"] = 0  # no shuffle
+
+    if compressionFilter:
+        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
+            filter_ops["compressor"] = "zlib"  # blosc compressor
+        else:
+            if "name" in compressionFilter:
+                filter_ops["compressor"] = compressionFilter["name"]
+            else:
+                filter_ops["compressor"] = "lz4"  # default to lz4
+        if "level" not in compressionFilter:
+            filter_ops["level"] = 5  # medium level
+        else:
+            filter_ops["level"] = int(compressionFilter["level"])
+
+    if filter_ops:
+        # save the chunk shape and dtype
+        filter_ops["chunk_shape"] = chunk_shape
+        filter_ops["dtype"] = dtype
+        filter_map[dset_id] = filter_ops  # save
+
+        return filter_ops
+    else:
+        return None
+
+
+"""
 _HDF_FILTERS = {
     1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
     2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
@@ -30,17 +262,7 @@
     32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
 }
 
-_HDF_FILTER_OPTION_ENUMS = {
-    "coding": {
-        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
-        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
-    },
-    "scaleType": {
-        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
-        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
-        h5py.h5z.SO_INT: "H5Z_SO_INT",
-    },
-}
+
 
 # h5py supported filters
 _H5PY_FILTERS = {
@@ -53,3 +275,5 @@
 }
 
 _H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
+
+"""
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index bb32a6e9..28b82ddf 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -392,29 +392,35 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
             self.log.warning(f"Unknown layout value: {nLayout}")
 
         num_filters = plist.get_nfilters()
+        print("num_filters:", num_filters)
         filter_props = []
         if num_filters:
             for n in range(num_filters):
                 filter_info = plist.get_filter(n)
+                print("filter_info:", filter_info)
                 opt_values = filter_info[2]
                 filter_prop = {}
                 filter_id = filter_info[0]
                 filter_prop["id"] = filter_id
                 if filter_info[3]:
                     filter_prop["name"] = bytesArrayToList(filter_info[3])
-                if filter_id in filters._HDF_FILTERS:
-                    hdf_filter = filters._HDF_FILTERS[filter_id]
+                hdf_filter = filters.getFilterItem(filter_id)
+                if hdf_filter:
+                    print("got hdf filter:", hdf_filter)
+
                     filter_prop["class"] = hdf_filter["class"]
                     if "options" in hdf_filter:
                         filter_opts = hdf_filter["options"]
+                        print("got filter_opts:", filter_opts)
                         for i in range(len(filter_opts)):
                             if len(opt_values) <= i:
                                 break  # end of option values
                             opt_value = opt_values[i]
                             opt_value_enum = None
                             option_name = filter_opts[i]
-                            if option_name in filters._HDF_FILTER_OPTION_ENUMS:
-                                option_enums = filters._HDF_FILTER_OPTION_ENUMS[option_name]
+                            print(f"option_name: {option_name} opt_value: {opt_value}")
+                            if option_name in filters.HDF_FILTER_OPTION_ENUMS:
+                                option_enums = filters.HDF_FILTER_OPTION_ENUMS[option_name]
                                 if opt_value in option_enums:
                                     opt_value_enum = option_enums[opt_value]
                             if opt_value_enum:
diff --git a/testall.py b/testall.py
index 1cb36136..5911277e 100755
--- a/testall.py
+++ b/testall.py
@@ -26,15 +26,6 @@
     "h5py_writer_test",
 ]
 
-use_hsds = True
-for key in ("HS_ENDPOINT", "HS_USERNAME", "HS_PASSWORD"):
-    if key not in os.environ:
-        use_hsds = False
-        print(f"not including HSDS tests, no {key} environment set")
-        break
-
-if use_hsds:
-    unit_tests.append("hsds_reader_test")
 unit_tests = tuple(unit_tests)
 
 integ_tests = ("h5tojson_test", "jsontoh5_test")

From eb138bc31203c4855030941ed3a68e5535bbcda3 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 18 Sep 2025 13:23:55 +0100
Subject: [PATCH 084/129] added filter functions

---
 src/h5json/filters.py                 | 36 ---------------------------
 src/h5json/h5pystore/h5py_reader.py   |  6 -----
 src/h5json/h5pystore/h5py_writer.py   | 14 +++++++++++
 src/h5json/h5writer.py                | 10 ++++++++
 src/h5json/hdf5db.py                  |  9 +++++++
 src/h5json/jsonstore/h5json_writer.py |  5 ++++
 6 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 4e985b3f..8268985f 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -241,39 +241,3 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
         return filter_ops
     else:
         return None
-
-
-"""
-_HDF_FILTERS = {
-    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
-    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
-    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
-    4: {
-        "class": "H5Z_FILTER_SZIP",
-        "alias": "szip",
-        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
-    },
-    5: {"class": "H5Z_FILTER_NBIT"},
-    6: {
-        "class": "H5Z_FILTER_SCALEOFFSET",
-        "alias": "scaleoffset",
-        "options": ["scaleType", "scaleOffset"],
-    },
-    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
-}
-
-
-
-# h5py supported filters
-_H5PY_FILTERS = {
-    "gzip": 1,
-    "shuffle": 2,
-    "fletcher32": 3,
-    "szip": 4,
-    "scaleoffset": 6,
-    "lzf": 32000,
-}
-
-_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-
-"""
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 28b82ddf..b4b4c184 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -392,12 +392,10 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
             self.log.warning(f"Unknown layout value: {nLayout}")
 
         num_filters = plist.get_nfilters()
-        print("num_filters:", num_filters)
         filter_props = []
         if num_filters:
             for n in range(num_filters):
                 filter_info = plist.get_filter(n)
-                print("filter_info:", filter_info)
                 opt_values = filter_info[2]
                 filter_prop = {}
                 filter_id = filter_info[0]
@@ -406,19 +404,15 @@ def _getHDF5DatasetCreationProperties(self, dset, type_class):
                     filter_prop["name"] = bytesArrayToList(filter_info[3])
                 hdf_filter = filters.getFilterItem(filter_id)
                 if hdf_filter:
-                    print("got hdf filter:", hdf_filter)
-
                     filter_prop["class"] = hdf_filter["class"]
                     if "options" in hdf_filter:
                         filter_opts = hdf_filter["options"]
-                        print("got filter_opts:", filter_opts)
                         for i in range(len(filter_opts)):
                             if len(opt_values) <= i:
                                 break  # end of option values
                             opt_value = opt_values[i]
                             opt_value_enum = None
                             option_name = filter_opts[i]
-                            print(f"option_name: {option_name} opt_value: {opt_value}")
                             if option_name in filters.HDF_FILTER_OPTION_ENUMS:
                                 option_enums = filters.HDF_FILTER_OPTION_ENUMS[option_name]
                                 if opt_value in option_enums:
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 2cb42c0b..dc62ed72 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -491,3 +491,17 @@ def getStats(self):
         stats["lastModified"] = stat_info.st_mtime
         stats['owner'] = stat_info.st_uid  # TBD: convert to username?
         return stats
+
+    def getFilters(self, compressors_only=False):
+        """ return list of filters supported by h5py  """
+
+        h5py_filters = ["H5Z_FILTER_DEFLATE",]
+
+        if not compressors_only:
+            h5py_filters.append("H5Z_FILTER_SHUFFLE")
+            h5py_filters.append("H5Z_FILTER_FLETCHER32")
+            h5py_filters.append("H5Z_FILTER_SZIP")
+            h5py_filters.append("H5Z_FILTER_NBIT")
+            h5py_filters.append("H5Z_FILTER_SCALEOFFSET")
+
+        return tuple(h5py_filters)
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index a4b9a522..422a0450 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -99,6 +99,11 @@ def getStats(self):
         """
         pass
 
+    @abstractmethod
+    def getFilters(self, compressors_only=False):
+        """ returns a list of filters supported by the writer """
+        pass
+
 
 class H5NullWriter(H5Writer):
     """
@@ -170,3 +175,8 @@ def getStats(self):
         stats["lastModified"] = 0
         stats['owner'] = ""
         return stats
+
+    def getFilters(self, compressors_only=False):
+        """ return empty list of filters  """
+
+        return ()
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 5f9714a4..18e4f3e0 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -15,6 +15,7 @@
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import resize_dataset
+from .filters import getFiltersJson
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
 from .apiversion import _apiver
@@ -834,6 +835,14 @@ def createDataset(
 
         dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
         if cpl:
+            if "filters" in cpl:
+                if self.writer:
+                    supported_filters = self.writer.getSupportedFilters()
+                else:
+                    supported_filters = ()
+                # validate and normalize supplied filter property list
+                filters_json = getFiltersJson(cpl, supported_filters=supported_filters)
+                cpl["filters"] = filters_json
             dset_json["creationProperties"] = cpl
         else:
             dset_json["creationProperties"] = {}
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index 343c045f..f37ac415 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -308,3 +308,8 @@ def getStats(self):
         stats["lastModified"] = stat_info.st_mtime
         stats['owner'] = stat_info.st_uid  # TBD: convert to username?
         return stats
+
+    def getFilters(self, compressors_only=False):
+        """ return empty list of filters  """
+
+        return ()

From fec0a43ed71ca6e529be4e60a4f0836f8fff2e9b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 18 Sep 2025 17:17:08 +0100
Subject: [PATCH 085/129] added more dset utility functions

---
 src/h5json/dset_util.py     | 207 ++++++++++++++++++-------
 src/h5json/filters.py       |  48 ------
 test/unit/dset_util_test.py | 299 ++++++++++++++++++++++++++++++++++++
 testall.py                  |   1 +
 4 files changed, 453 insertions(+), 102 deletions(-)
 create mode 100755 test/unit/dset_util_test.py

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 34d5d0d2..c3a24e87 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -33,19 +33,47 @@ def getShapeClass(data_shape):
     return data_shape["class"]
 
 
-def getDims(dset_json):
-    """ return extents of the dataset shape as a tuple """
-    shape_json = dset_json["shape"]
-    shape_class = shape_json["class"]
-    if shape_class == "H5S_NULL":
+def getShapeDims(shape):
+    """
+    Get dims from a given shape json.  Return [1,] for Scalar datasets,
+    None for null dataspaces
+    """
+    dims = None
+    if isinstance(shape, int):
+        dims = [shape, ]
+    elif isinstance(shape, list) or isinstance(shape, tuple):
+        dims = shape  # can use as is
+    elif isinstance(shape, str):
+        # only valid string value is H5S_NULL
+        if shape != "H5S_NULL":
+            raise ValueError("Invalid value for shape")
         dims = None
-    elif shape_class == "H5S_SCALAR":
-        dims = ()
-    elif shape_class == "H5S_SIMPLE":
-        dims = tuple(shape_json["dims"])
+    elif isinstance(shape, dict):
+        if shape.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
+            # this is a shape_json obj
+            shape_json = shape
+        elif "shape" in shape:
+            # dataset or attribute json
+            shape_json = shape["shape"]
+        else:
+            raise ValueError(f"Unknown shape: {shape}")
+
+        if "class" not in shape_json:
+            raise ValueError("'class' key not found in shape")
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
+            dims = None
+        elif shape_class == "H5S_SCALAR":
+            dims = []
+        elif shape_class == "H5S_SIMPLE":
+            if "dims" not in shape_json:
+                raise ValueError("'dims' key expected for shape")
+            dims = shape_json["dims"]
+        else:
+            raise ValueError(f"Unknown shape: {shape_json}")
     else:
-        raise ValueError(f"Unexpected shape class: {shape_class}")
-    return dims
+        raise ValueError(f"Unexpected shape class: {type(shape)}")
+    return tuple(dims)
 
 
 def getNumElements(dset_json):
@@ -53,51 +81,37 @@ def getNumElements(dset_json):
         returns None for null shape, 1 for scalar shape, and product of
         extents otherwise """
 
-    return int(np.prod(getDims(dset_json)))
+    return int(np.prod(getShapeDims(dset_json)))
 
 
 def getRank(data_shape):
-    """ Return rank of given data shape_json """
-
-    shape_class = getShapeClass(data_shape)
+    """ Return rank of given data shape """
 
-    if shape_class == "H5S_NULL":
+    dims = getShapeDims(data_shape)
+    if dims is None:
         return 0
-    elif shape_class == "H5S_SCALAR":
-        return 0
-    elif shape_class == "H5S_SIMPLE":
-        if "dims" not in data_shape:
-            raise KeyError("expected dims key for H5S_SIMPLE data shape")
-        return len(data_shape["dims"])
     else:
-        raise ValueError(f"unexpected data shape class: {shape_class}")
-
+        return len(dims)
 
-def getDsetRank(dset_json):
-    """Get rank returning 0 for scalar or NULL data shapes"""
-    data_shape = dset_json["shape"]
-    return getRank(data_shape)
 
-
-def isNullSpace(dset_json):
+def isNullSpace(shape):
     """Return true if this dataset is a null data space"""
-    shape_class = getShapeClass(dset_json["shape"])
-    if shape_class == "H5S_NULL":
+
+    dims = getShapeDims(shape)
+    if dims is None:
         return True
     else:
         return False
 
 
-def isScalarSpace(dset_json):
+def isScalar(shape):
     """ return true if this is a scalar dataset """
 
-    data_shape = dset_json["shape"]
-    shape_class = getShapeClass(data_shape)
-    if shape_class == "H5S_NULL":
+    dims = getShapeDims(shape)
+    if dims is None or len(dims) > 0:
         return False
-
-    rank = getRank(data_shape)
-    return True if rank == 0 else False
+    else:
+        return True
 
 
 def getDatasetLayout(dset_json):
@@ -134,26 +148,31 @@ def getDatasetLayoutClass(dset_json):
 )
 
 
-def get_dset_size(shape_json, typesize):
+def getDatasetSize(shape_json, typesize):
     """Return the size of the dataspace.  For
     any unlimited dimensions, assume a value of 1.
     (so the return size will be the absolute minimum)
     """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
+
+    if isNullSpace(shape_json):
         return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return typesize  # just return size for one item
+
     if typesize == "H5T_VARIABLE":
-        typesize = DEFAULT_TYPE_SIZE  # just take a guess at the item size
-    dset_size = typesize
-    shape = shape_json["dims"]
-    rank = len(shape)
+        dset_size = DEFAULT_TYPE_SIZE  # just take a guess at the item size
+    else:
+        dset_size = typesize
+
+    if isScalar(shape_json):
+        return dset_size  # just return size for one item
+
+    dims = getShapeDims(shape_json)
+    rank = len(dims)
 
     for n in range(rank):
-        if shape[n] == 0:
+        if dims[n] == 0:
             # extendable extent with value of 0
             continue  # assume this is one
-        dset_size *= shape[n]
+        dset_size *= dims[n]
     return dset_size
 
 
@@ -218,21 +237,21 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
             # data shape with no elements, just return dims as layout
             return dims
 
-    nsize = item_size
+    n_size = item_size
     layout = [1,] * rank
 
     for i in range(rank):
         dim = rank - i - 1
         extent = dims[dim]
-        if extent * nsize < chunk_max:
+        if extent * n_size < chunk_max:
             # just use the full extent as layout
             layout[dim] = extent
-            nsize *= extent
+            n_size *= extent
         else:
             n = extent
             while n > 1:
                 n = -(-n // 2)  # use negatives so we round up on odds
-                if n * nsize < chunk_max:
+                if n * n_size < chunk_max:
                     break
             layout[dim] = n
             break  # just use 1's for the rest of the layout
@@ -255,6 +274,86 @@ def getChunkSize(layout, type_size):
     return chunk_size
 
 
+def isExtensible(dims, maxdims):
+    """
+    Determine if the dataset can be extended
+    """
+    if maxdims is None or len(dims) == 0:
+        return False
+    rank = len(dims)
+    if len(maxdims) != rank:
+        raise ValueError("rank of maxdims does not match dataset")
+    for n in range(rank):
+        # TBD - shouldn't have H5S_UNLIMITED in any new files.
+        # Remove check once this is confirmed
+        if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
+            return True
+    return False
+
+
+def getDsetMaxDims(dset_json):
+    """
+    Get maxdims from a given shape.  Return [1,] for Scalar datasets
+
+    Use with H5S_NULL datasets will throw a ValueError
+    """
+    if "shape" not in dset_json:
+        msg = "No shape found in dset_json"
+        raise KeyError(msg)
+    shape_json = dset_json["shape"]
+    shape_class = getShapeClass(shape_json)
+    maxdims = None
+    if shape_class == "H5S_NULL":
+        msg = "Expected shape class other than H5S_NULL"
+        raise ValueError(msg)
+    elif shape_class == "H5S_SCALAR":
+        maxdims = [1,]
+    elif shape_class == "H5S_SIMPLE":
+        if "maxdims" in shape_json:
+            maxdims = shape_json["maxdims"]
+    else:
+        msg = f"Unexpected shape class: {shape_class}"
+        raise ValueError(msg)
+    return maxdims
+
+
+def getChunkDims(dset_json):
+    """ get chunk shape for given dset_json """
+
+    layout = getDatasetLayout(dset_json)
+    if layout and "dims" in layout:
+        return layout["dims"]
+    else:
+        # H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key
+        # Check the layout dict in dset_json to see if it's
+        # defined there
+        if "layout" in dset_json:
+            layout = dset_json["layout"]
+            if "dims" in layout:
+                return layout["dims"]
+    return None
+
+
+def getChunkLayout(dset_json):
+    """Get chunk layout.  Throw 500 if used with non-H5D_CHUNKED layout"""
+    if "layout" not in dset_json:
+        msg = "No layout found in dset_json"
+        raise KeyError(msg)
+    layout_json = dset_json["layout"]
+    if "class" not in layout_json:
+        msg = f"Expected class key for layout: {layout_json}"
+        raise KeyError(msg)
+    layout_class = layout_json["class"]
+    if layout_class not in CHUNK_LAYOUT_CLASSES:
+        msg = f"Unexpected shape layout: {layout_class}"
+        raise ValueError(msg)
+    if "dims" not in layout_json:
+        msg = f"Expected dims key in layout: {layout_json}"
+        raise KeyError(msg)
+    layout = layout_json["dims"]
+    return layout
+
+
 def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
     """
     Use chunk layout given in the creationPropertiesList (if defined and
@@ -425,7 +524,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class=
             if maxdims[n] == 0 or maxdims[n] > dims[n]:
                 extendable_dims += 1
 
-    dset_size = get_dset_size(shape_json, typesize)
+    dset_size = getDatasetSize(shape_json, typesize)
     if dset_size <= chunk_min and extendable_dims == 0:
         # just use the entire dataspace shape as one big chunk
         return tuple(dims)
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 8268985f..178a82bf 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -193,51 +193,3 @@ def getShuffleFilter(filters):
             return filter
 
     return None
-
-
-def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
-    """Get list of filter operations to be used for this dataset"""
-    filter_map = app["filter_map"]
-
-    if dset_id in filter_map:
-        return filter_map[dset_id]
-
-    compressionFilter = getCompressionFilter(filters)
-
-    filter_ops = {}
-
-    shuffleFilter = getShuffleFilter(filters)
-
-    if shuffleFilter and not isVlen(dtype):
-        shuffle_name = shuffleFilter["name"]
-        if shuffle_name == "shuffle":
-            filter_ops["shuffle"] = 1  # use regular shuffle
-        elif shuffle_name == "bitshuffle":
-            filter_ops["shuffle"] = 2  # use bitshuffle
-        else:
-            filter_ops["shuffle"] = 0  # no shuffle
-    else:
-        filter_ops["shuffle"] = 0  # no shuffle
-
-    if compressionFilter:
-        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
-            filter_ops["compressor"] = "zlib"  # blosc compressor
-        else:
-            if "name" in compressionFilter:
-                filter_ops["compressor"] = compressionFilter["name"]
-            else:
-                filter_ops["compressor"] = "lz4"  # default to lz4
-        if "level" not in compressionFilter:
-            filter_ops["level"] = 5  # medium level
-        else:
-            filter_ops["level"] = int(compressionFilter["level"])
-
-    if filter_ops:
-        # save the chunk shape and dtype
-        filter_ops["chunk_shape"] = chunk_shape
-        filter_ops["dtype"] = dtype
-        filter_map[dset_id] = filter_ops  # save
-
-        return filter_ops
-    else:
-        return None
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
new file mode 100755
index 00000000..7c4556ea
--- /dev/null
+++ b/test/unit/dset_util_test.py
@@ -0,0 +1,299 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+
+from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, getContiguousLayout, expandChunk
+
+
+class DsetUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(DsetUtilTest, self).__init__(*args, **kwargs)
+        # main
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testGuessChunk(self):
+
+        typesize = "H5T_VARIABLE"
+        logging.debug("hello")
+
+        shape = {"class": "H5S_NULL"}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(layout is None)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (1,))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        typesize = 8
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [5]}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (5,))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 100, 100]}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 3)
+        for i in range(3):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 1024)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (1,))
+
+        shape = {"class": "H5S_NULL"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, None)
+
+    def testShrinkChunk(self):
+        CHUNK_MIN = 500
+        CHUNK_MAX = 5000
+        typesize = 1
+        layout = (1, 2, 3)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        self.assertEqual(shrunk, layout)
+
+        layout = (100, 200, 300)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        rank = len(layout)
+        for i in range(rank):
+            self.assertTrue(shrunk[i] >= 1)
+            self.assertTrue(shrunk[i] <= 1000 * (i + 1))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        layout = (300, 200, 100)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        rank = len(layout)
+        for i in range(rank):
+            self.assertTrue(shrunk[i] >= 1)
+            self.assertTrue(shrunk[i] <= 1000 * (3 - i))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        CHUNK_MIN = 1 * 1024 * 1024
+        CHUNK_MAX = 4 * 1024 * 1024
+        typesize = 4
+        layout = (117, 201, 189, 1)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        self.assertEqual(shrunk, (59, 101, 95, 1))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+    def testExpandChunk(self):
+        CHUNK_MIN = 5000
+        CHUNK_MAX = 50000
+
+        typesize = 20
+        shape = {"class": "H5S_SIMPLE", "dims": [12, ], "maxdims": [20, ]}
+        layout = (20,)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        # chunk layout can't be larger than dataspace
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        self.assertEqual(expanded, (20,))
+
+        typesize = 1
+        shape = {"class": "H5S_SIMPLE", "dims": [10, 10, 10]}
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        # chunk layout can't be larger than dataspace
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        self.assertEqual(expanded, (10, 10, 10))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [1000, 2000, 3000]}
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [1000,]}
+        layout = (10,)
+        num_bytes = getChunkSize(layout, "H5T_VARIABLE")
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, "H5T_VARIABLE", shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, "H5T_VARIABLE")
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 10, 1000],
+            "maxdims": [1000, 100, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 0, 1000],
+            "maxdims": [1000, 100, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 10, 1000],
+            "maxdims": [1000, 0, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+    def testGetContiguousLayout(self):
+        typesize = 4
+        chunk_min = 400
+        chunk_max = 800
+
+        kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
+
+        def get_num_bytes(dims):
+            num_bytes = typesize
+            for n in dims:
+                num_bytes *= n
+            return num_bytes
+
+        try:
+            shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
+            layout = getContiguousLayout(shape, "H5T_VARIABLE", **kwargs)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+        shape = {"class": "H5S_NULL"}
+        layout = getContiguousLayout(shape, typesize, **kwargs)
+        self.assertTrue(layout is None)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = getContiguousLayout(shape, typesize, **kwargs)
+        self.assertEqual(layout, (1,))
+
+        for extent in (1, 100, 10000):
+            dims = [
+                extent,
+            ]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 1)
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 9, 90):
+            dims = [extent, extent]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 2)
+            for i in range(2):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= extent)
+            self.assertEqual(layout[1], extent)
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 10, 100):
+            dims = [extent, extent, 50]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 3)
+            for i in range(3):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= dims[i])
+
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 100, 1000):
+            dims = [extent, 4]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 2)
+            for i in range(2):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= dims[i])
+
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/testall.py b/testall.py
index 5911277e..04aa4798 100755
--- a/testall.py
+++ b/testall.py
@@ -19,6 +19,7 @@
     "array_util_test",
     "objid_test",
     "hdf5dtype_test",
+    "dset_util_test",
     "hdf5db_test",
     "h5json_reader_test",
     "h5json_writer_test",

From 5ab9b6580577aea840f428d3a910b6dfeeb835b1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 3 Oct 2025 15:56:51 +0200
Subject: [PATCH 086/129] added shape_util.py

---
 src/h5json/config.py         | 213 ----------------------------------
 src/h5json/dset_util.py      | 219 +++++++----------------------------
 src/h5json/filters.py        |  37 ++++++
 src/h5json/shape_util.py     | 141 ++++++++++++++++++++++
 test/unit/dset_util_test.py  |   9 --
 test/unit/shape_util_test.py | 121 +++++++++++++++++++
 testall.py                   |   1 +
 7 files changed, 342 insertions(+), 399 deletions(-)
 delete mode 100755 src/h5json/config.py
 create mode 100644 src/h5json/shape_util.py
 create mode 100755 test/unit/shape_util_test.py

diff --git a/src/h5json/config.py b/src/h5json/config.py
deleted file mode 100755
index b7602ffd..00000000
--- a/src/h5json/config.py
+++ /dev/null
@@ -1,213 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import os
-import json
-
-
-class Config:
-    """
-    User Config state
-    """
-    _cfg = {}  # global state
-
-    def __init__(self, config_file=None, **kwargs):
-        if Config._cfg:
-            return  # already initialized
-        if config_file:
-            self._config_file = config_file
-        elif os.path.isfile(".hscfg"):
-            self._config_file = ".hscfg"
-        else:
-            self._config_file = os.path.expanduser("~/.hscfg")
-        # process config file if found
-        if os.path.isfile(self._config_file):
-            line_number = 0
-            with open(self._config_file) as f:
-                for line in f:
-                    line_number += 1
-                    s = line.strip()
-                    if not s:
-                        continue
-                    if s[0] == '#':
-                        # comment line
-                        continue
-                    fields = s.split('=')
-                    if len(fields) < 2:
-                        print(f"config file: {self._config_file} line: {line_number} is not valid")
-                        continue
-                    k = fields[0].strip()
-                    v = fields[1].strip()
-                    if k == "complex_names":
-                        self.complex_names = v
-                    elif k == "bool_names":
-                        self.bool_names = v
-                    elif k == "track_order":
-                        self.track_order = v
-                    else:
-                        Config._cfg[k] = v
-
-        # add standard keys if not already picked up
-        for k in ("hs_endpoint", "hs_username", "hs_password", "hs_api_key"):
-            if k not in Config._cfg:
-                Config._cfg[k] = ""
-
-        # override any config values with environment variable if found
-        for k in Config._cfg.keys():
-            if k.upper() in os.environ:
-                Config._cfg[k] = os.environ[k.upper()]
-
-        # update any values that are passed in to the constructor
-        for k in kwargs.keys():
-            Config._cfg[k] = kwargs[k]
-
-        # finally, set defaults for any expected keys that are not already set
-        for k in ("hs_endpoint", "hs_username", "hs_endpoint"):
-            if k not in Config._cfg:
-                Config._cfg[k] = None
-        if "bool_names" not in Config._cfg:
-            Config._cfg["bool_names"] = (b"FALSE", b"TRUE")
-        if "complex_names" not in Config._cfg:
-            Config._cfg["complex_names"] = ("r", "i")
-        if "track_order" not in Config._cfg:
-            Config._cfg["track_order"] = False
-
-    def __getitem__(self, name):
-        """ Get a config item  """
-        if name not in Config._cfg:
-            if name.upper() in os.environ:
-                Config._cfg[name] = os.environ[name.upper()]
-            else:
-                return None
-        return Config._cfg[name]
-
-    def get(self, name, default):
-        """ return config value for name or default if None """
-        val = self.__getitem__(name)
-        if val is None:
-            return default
-        else:
-            return default
-
-    def __setitem__(self, name, obj):
-        """ set config item """
-        Config._cfg[name] = obj
-
-    def __delitem__(self, name):
-        """ Delete option. """
-        del Config._cfg[name]
-
-    def __len__(self):
-        return len(Config._cfg)
-
-    def __iter__(self):
-        """ Iterate over config names """
-        keys = Config._cfg.keys()
-        for key in keys:
-            yield key
-
-    def __contains__(self, name):
-        return name in Config._cfg
-
-    def __repr__(self):
-        return json.dumps(Config._cfg)
-
-    def keys(self):
-        return Config._cfg.keys()
-
-    @property
-    def hs_endpoint(self):
-        return Config._cfg.get("hs_endpoint")
-
-    @property
-    def hs_username(self):
-        return Config._cfg.get("hs_username")
-
-    @property
-    def hs_password(self):
-        return Config._cfg.get("hs_password")
-
-    @property
-    def hs_api_key(self):
-        return Config._cfg.get("hs_api_key")
-
-    @property
-    def bool_names(self):
-        if "bool_names" in Config._cfg:
-            names = Config._cfg["bool_names"]
-        else:
-            names = (b"FALSE", b"TRUE")
-        return names
-
-    @bool_names.setter
-    def bool_names(self, value):
-        if isinstance(value, str):
-            names = value.split(())
-            if len(names) < 2:
-                raise ValueError("bool_names must have two items")
-            elif len(names) == 2:
-                pass
-            else:
-                names = names[:2]  # just use the first two items
-        elif len(value) != 2:
-            raise ValueError("expected two-element list for bool_names")
-        else:
-            names = value
-        Config._cfg["bool_names"] = tuple(names)
-
-    @property
-    def complex_names(self):
-        if "complex_names" in Config._cfg:
-            names = Config._cfg["complex_names"]
-        else:
-            names = ("r", "i")
-        return names
-
-    @complex_names.setter
-    def complex_names(self, value):
-        if isinstance(value, str):
-            names = value.split()
-            if len(names) < 2:
-                raise ValueError("complex_names must have two items")
-            elif len(names) == 2:
-                pass
-            else:
-                names = names[:2]  # just use the first two items
-        elif len(value) != 2:
-            raise ValueError("complex_names must have two values")
-        else:
-            names = value
-
-        Config._cfg["complex_names"] = tuple(names)
-
-    @property
-    def track_order(self):
-        if "track_order" in Config._cfg:
-            track = Config._cfg["track_order"]
-        else:
-            track = False
-        return track
-
-    @track_order.setter
-    def track_order(self, value):
-        if isinstance(value, str):
-            tokens = value.split()
-            if len(tokens) == 0:
-                track = False
-            else:
-                track = bool(tokens[0])  # strip any comments
-        else:
-            track = bool(value)
-        Config._cfg["track_order"] = track
-
-
-def get_config(config_file=None, **kwargs):
-    return Config(config_file=config_file, **kwargs)
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index c3a24e87..4327a044 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -11,107 +11,19 @@
 ##############################################################################
 
 import math
-import numpy as np
 from .hdf5dtype import getItemSize
+from .shape_util import getDataSize
 from .objid import isValidUuid
-from . import config
 
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
 CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
-DEFAULT_TYPE_SIZE = 128  # Type size case when it is variable
 
-
-def getShapeClass(data_shape):
-    """ Return shape class of the given data shape """
-
-    if not isinstance(data_shape, dict):
-        raise TypeError("expected dict object")
-
-    if "class" not in data_shape:
-        raise KeyError("expected 'class' key for data shape")\
-
-    return data_shape["class"]
-
-
-def getShapeDims(shape):
-    """
-    Get dims from a given shape json.  Return [1,] for Scalar datasets,
-    None for null dataspaces
-    """
-    dims = None
-    if isinstance(shape, int):
-        dims = [shape, ]
-    elif isinstance(shape, list) or isinstance(shape, tuple):
-        dims = shape  # can use as is
-    elif isinstance(shape, str):
-        # only valid string value is H5S_NULL
-        if shape != "H5S_NULL":
-            raise ValueError("Invalid value for shape")
-        dims = None
-    elif isinstance(shape, dict):
-        if shape.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
-            # this is a shape_json obj
-            shape_json = shape
-        elif "shape" in shape:
-            # dataset or attribute json
-            shape_json = shape["shape"]
-        else:
-            raise ValueError(f"Unknown shape: {shape}")
-
-        if "class" not in shape_json:
-            raise ValueError("'class' key not found in shape")
-        shape_class = shape_json["class"]
-        if shape_class == "H5S_NULL":
-            dims = None
-        elif shape_class == "H5S_SCALAR":
-            dims = []
-        elif shape_class == "H5S_SIMPLE":
-            if "dims" not in shape_json:
-                raise ValueError("'dims' key expected for shape")
-            dims = shape_json["dims"]
-        else:
-            raise ValueError(f"Unknown shape: {shape_json}")
-    else:
-        raise ValueError(f"Unexpected shape class: {type(shape)}")
-    return tuple(dims)
-
-
-def getNumElements(dset_json):
-    """ return the number of elements defined by the dataset's shape
-        returns None for null shape, 1 for scalar shape, and product of
-        extents otherwise """
-
-    return int(np.prod(getShapeDims(dset_json)))
-
-
-def getRank(data_shape):
-    """ Return rank of given data shape """
-
-    dims = getShapeDims(data_shape)
-    if dims is None:
-        return 0
-    else:
-        return len(dims)
-
-
-def isNullSpace(shape):
-    """Return true if this dataset is a null data space"""
-
-    dims = getShapeDims(shape)
-    if dims is None:
-        return True
-    else:
-        return False
-
-
-def isScalar(shape):
-    """ return true if this is a scalar dataset """
-
-    dims = getShapeDims(shape)
-    if dims is None or len(dims) > 0:
-        return False
-    else:
-        return True
+CHUNK_LAYOUT_CLASSES = (
+    "H5D_CHUNKED",
+    "H5D_CHUNKED_REF",
+    "H5D_CHUNKED_REF_INDIRECT",
+    "H5D_CONTIGUOUS_REF",
+)
 
 
 def getDatasetLayout(dset_json):
@@ -122,11 +34,7 @@ def getDatasetLayout(dset_json):
         cp = dset_json["creationProperties"]
         if "layout" in cp:
             layout = cp["layout"]
-    if not layout and "layout" in dset_json:
-        layout = dset_json["layout"]
-    if not layout:
-        # no layout for {dset_json
-        return None
+
     return layout
 
 
@@ -140,43 +48,8 @@ def getDatasetLayoutClass(dset_json):
     return layout_class
 
 
-CHUNK_LAYOUT_CLASSES = (
-    "H5D_CHUNKED",
-    "H5D_CHUNKED_REF",
-    "H5D_CHUNKED_REF_INDIRECT",
-    "H5D_CONTIGUOUS_REF",
-)
-
-
-def getDatasetSize(shape_json, typesize):
-    """Return the size of the dataspace.  For
-    any unlimited dimensions, assume a value of 1.
-    (so the return size will be the absolute minimum)
-    """
-
-    if isNullSpace(shape_json):
-        return None
-
-    if typesize == "H5T_VARIABLE":
-        dset_size = DEFAULT_TYPE_SIZE  # just take a guess at the item size
-    else:
-        dset_size = typesize
-
-    if isScalar(shape_json):
-        return dset_size  # just return size for one item
-
-    dims = getShapeDims(shape_json)
-    rank = len(dims)
-
-    for n in range(rank):
-        if dims[n] == 0:
-            # extendable extent with value of 0
-            continue  # assume this is one
-        dset_size *= dims[n]
-    return dset_size
-
-
 def resize_dataset(dset_json, shape):
+    """ Update shape dims to the given shape provided new shape is valid for maxdims """
     shape_json = dset_json["shape"]
     shape_class = shape_json["class"]
     if shape_class != "H5S_SIMPLE":
@@ -259,12 +132,10 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
     return layout
 
 
-def getChunkSize(layout, type_size):
+def getChunkSize(layout, type_size: int = 1):
     """Return chunk size given layout.
     i.e. just the product of the values in the list.
     """
-    if type_size == "H5T_VARIABLE":
-        type_size = DEFAULT_TYPE_SIZE
 
     chunk_size = type_size
     for n in layout:
@@ -284,8 +155,6 @@ def isExtensible(dims, maxdims):
     if len(maxdims) != rank:
         raise ValueError("rank of maxdims does not match dataset")
     for n in range(rank):
-        # TBD - shouldn't have H5S_UNLIMITED in any new files.
-        # Remove check once this is confirmed
         if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
             return True
     return False
@@ -301,7 +170,7 @@ def getDsetMaxDims(dset_json):
         msg = "No shape found in dset_json"
         raise KeyError(msg)
     shape_json = dset_json["shape"]
-    shape_class = getShapeClass(shape_json)
+    shape_class = shape_json["class"]
     maxdims = None
     if shape_class == "H5S_NULL":
         msg = "Expected shape class other than H5S_NULL"
@@ -311,6 +180,8 @@ def getDsetMaxDims(dset_json):
     elif shape_class == "H5S_SIMPLE":
         if "maxdims" in shape_json:
             maxdims = shape_json["maxdims"]
+        else:
+            maxdims = shape_json["dims"]
     else:
         msg = f"Unexpected shape class: {shape_class}"
         raise ValueError(msg)
@@ -335,18 +206,16 @@ def getChunkDims(dset_json):
 
 
 def getChunkLayout(dset_json):
-    """Get chunk layout.  Throw 500 if used with non-H5D_CHUNKED layout"""
-    if "layout" not in dset_json:
-        msg = "No layout found in dset_json"
-        raise KeyError(msg)
-    layout_json = dset_json["layout"]
-    if "class" not in layout_json:
-        msg = f"Expected class key for layout: {layout_json}"
-        raise KeyError(msg)
-    layout_class = layout_json["class"]
+    """Get chunk layout.  Return None for non-chunked layout"""
+
+    layout_class = getDatasetLayoutClass(dset_json)
+    if not layout_class:
+        return None
+
     if layout_class not in CHUNK_LAYOUT_CLASSES:
-        msg = f"Unexpected shape layout: {layout_class}"
-        raise ValueError(msg)
+        return None
+
+    layout_json = getDatasetLayout(dset_json)
     if "dims" not in layout_json:
         msg = f"Expected dims key in layout: {layout_json}"
         raise KeyError(msg)
@@ -379,9 +248,7 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
         # validate that the chunk_dims are valid and correlates with the
         # dataset shape
         if isinstance(chunk_dims, int):
-            chunk_dims = [
-                chunk_dims,
-            ]  # promote to array
+            chunk_dims = [chunk_dims, ]  # promote to array
         if len(chunk_dims) != rank:
             msg = "Layout rank does not match shape rank"
             raise ValueError(msg)
@@ -516,7 +383,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class=
     layout = list(layout)
     dims = shape_json["dims"]
     rank = len(dims)
-    extendable_dims = 0  # number of dimensions that are extenable
+    extendable_dims = 0  # number of dimensions that are extendable
     maxdims = None
     if "maxdims" in shape_json:
         maxdims = shape_json["maxdims"]
@@ -524,7 +391,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class=
             if maxdims[n] == 0 or maxdims[n] > dims[n]:
                 extendable_dims += 1
 
-    dset_size = getDatasetSize(shape_json, typesize)
+    dset_size = getDataSize(shape_json, typesize)
     if dset_size <= chunk_min and extendable_dims == 0:
         # just use the entire dataspace shape as one big chunk
         return tuple(dims)
@@ -550,7 +417,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class=
                     layout[dim] *= 2
                     if layout[dim] >= dims[dim]:
                         layout[dim] = maxdims[dim]  # trim back
-                        extendable_dims -= 1  # one less extenable dimension
+                        extendable_dims -= 1  # one less extendable dimension
 
                     chunk_size = getChunkSize(layout, typesize)
                     if chunk_size > chunk_min:
@@ -579,8 +446,9 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class=
     return tuple(layout)
 
 
-def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX, layout_class="H5D_CHUNKED"):
+def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX):
     """Compute a reduced chunk shape with a size in bytes less than chunk_max."""
+
     layout = list(layout)
     chunk_size = getChunkSize(layout, typesize)
     if chunk_size <= chunk_max:
@@ -636,18 +504,16 @@ def guessChunk(shape_json, typesize):
     return shape
 
 
-def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, chunk_max=None):
+def getLayoutJson(creation_props,
+                  shape=None,
+                  type_json=None,
+                  chunk_min=CHUNK_MIN,
+                  chunk_max=CHUNK_MAX,
+                  max_chunks_per_folder=0):
     """ Get the layout json given by creation_props.
-        Raise bad request error if invalid """
-
-    min_chunk_size = CHUNK_MIN  # int(config.get("min_chunk_size"))
-    max_chunk_size = CHUNK_MAX  # int(config.get("max_chunk_size"))
+        Raise value error if invalid """
 
     item_size = getItemSize(type_json)
-    if chunk_min is None:
-        chunk_min = 1000 * 1000
-    if chunk_max is None:
-        chunk_max = 4 * 1000 * 1000
 
     if chunk_min > chunk_max:
         msg = "chunk_max must be larger than chunk_min"
@@ -689,7 +555,7 @@ def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, ch
         chunk_dims = None
 
     if layout_class == "H5D_CONTIGUOUS_REF":
-        kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+        kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
         chunk_dims = getContiguousLayout(shape, item_size, **kwargs)
         layout["dims"] = chunk_dims
 
@@ -702,11 +568,11 @@ def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, ch
 
         # adjust the chunk shape if chunk size is too small or too big
         adjusted_chunk_dims = None
-        if chunk_size < min_chunk_size:
-            kwargs = {"chunk_min": min_chunk_size, "layout_class": layout_class}
+        if chunk_size < chunk_min:
+            kwargs = {"chunk_min": chunk_min, "layout_class": layout_class}
             adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape, **kwargs)
-        elif chunk_size > max_chunk_size:
-            kwargs = {"chunk_max": max_chunk_size}
+        elif chunk_size > chunk_max:
+            kwargs = {"chunk_max": chunk_max}
             adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
         if adjusted_chunk_dims:
             layout["dims"] = adjusted_chunk_dims
@@ -714,7 +580,6 @@ def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, ch
             layout["dims"] = chunk_dims  # don't need to adjust chunk size
 
         # set partition_count if needed:
-        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
         set_partition = False
         if max_chunks_per_folder > 0:
             if "dims" in shape and "dims" in layout:
@@ -762,8 +627,8 @@ def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, ch
 
         # nothing to do about inefficiently small chunks, but large chunks
         # can be subdivided
-        if chunk_size < min_chunk_size:
+        if chunk_size < chunk_min:
             pass  # too small
-        elif chunk_size > max_chunk_size:
+        elif chunk_size > chunk_max:
             pass  # too large
         layout["dims"] = chunk_dims
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 178a82bf..9164f1e8 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -193,3 +193,40 @@ def getShuffleFilter(filters):
             return filter
 
     return None
+
+
+def getFilterOps(filters, dtype=None):
+    """Get list of filter operations to be used for this dataset"""
+
+    compressionFilter = getCompressionFilter(filters)
+
+    filter_ops = {}
+
+    shuffleFilter = getShuffleFilter(filters)
+
+    if shuffleFilter and not isVlen(dtype):
+        shuffle_name = shuffleFilter["name"]
+        if shuffle_name == "shuffle":
+            filter_ops["shuffle"] = 1  # use regular shuffle
+        elif shuffle_name == "bitshuffle":
+            filter_ops["shuffle"] = 2  # use bitshuffle
+        else:
+            filter_ops["shuffle"] = 0  # no shuffle
+    else:
+        filter_ops["shuffle"] = 0  # no shuffle
+
+    """ return list of filter operations for this dataset """
+    if compressionFilter:
+        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
+            filter_ops["compressor"] = "zlib"  # blosc compressor
+        else:
+            if "name" in compressionFilter:
+                filter_ops["compressor"] = compressionFilter["name"]
+            else:
+                filter_ops["compressor"] = "lz4"  # default to lz4
+        if "level" not in compressionFilter:
+            filter_ops["level"] = 5  # medium level
+        else:
+            filter_ops["level"] = int(compressionFilter["level"])
+
+    return filter_ops
diff --git a/src/h5json/shape_util.py b/src/h5json/shape_util.py
new file mode 100644
index 00000000..a3531cde
--- /dev/null
+++ b/src/h5json/shape_util.py
@@ -0,0 +1,141 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import numpy as np
+
+
+def getShapeClass(shape):
+    """ Return shape class of the given data shape """
+
+    if not isinstance(shape, dict):
+        raise TypeError("expected dict object")
+
+    if shape.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
+        # this is a shape_json obj
+        shape_json = shape
+    elif "shape" in shape:
+        # dataset or attribute json
+        shape_json = shape["shape"]
+    else:
+        raise ValueError(f"Unknown shape: {shape}")
+
+    if "class" not in shape_json:
+        raise KeyError("expected 'class' key for data shape")\
+
+    return shape_json["class"]
+
+
+def getShapeDims(shape):
+    """
+    Get dims from a given shape json.  Return [1,] for Scalar datasets,
+    None for null data spaces
+    """
+    dims = None
+    if isinstance(shape, int):
+        dims = (shape, )
+    elif isinstance(shape, list):
+        dims = tuple(shape)
+    elif isinstance(shape, tuple):
+        dims = shape  # can use as is
+    elif isinstance(shape, str):
+        # only valid string value is H5S_NULL
+        if shape != "H5S_NULL":
+            raise ValueError("Invalid value for shape")
+        dims = None
+    elif isinstance(shape, dict):
+        if shape.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
+            # this is a shape_json obj
+            shape_json = shape
+        elif "shape" in shape:
+            # dataset or attribute json
+            shape_json = shape["shape"]
+        else:
+            raise ValueError(f"Unknown shape: {shape}")
+
+        if "class" not in shape_json:
+            raise ValueError("'class' key not found in shape")
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
+            dims = None
+        elif shape_class == "H5S_SCALAR":
+            dims = ()
+        elif shape_class == "H5S_SIMPLE":
+            if "dims" not in shape_json:
+                raise ValueError("'dims' key expected for shape")
+            dims = tuple(shape_json["dims"])
+        else:
+            raise ValueError(f"Unknown shape: {shape_json}")
+    else:
+        raise ValueError(f"Unexpected shape class: {type(shape)}")
+    return dims
+
+
+def getNumElements(obj_json):
+    """ return the number of elements defined by the dataset's shape
+        returns None for null shape, 1 for scalar shape, and product of
+        extents otherwise """
+
+    dims = getShapeDims(obj_json)
+    if dims is None:
+        return 0
+    else:
+        return int(np.prod(dims))
+
+
+def getRank(shape):
+    """ Return rank of given data shape """
+
+    dims = getShapeDims(shape)
+    if dims is None:
+        return 0
+    else:
+        return len(dims)
+
+
+def isNullSpace(shape):
+    """Return true if this dataset is a null data space"""
+
+    shape_class = getShapeClass(shape)
+    if shape_class == "H5S_NULL":
+        return True
+    else:
+        return False
+
+
+def isScalar(shape):
+    """ return true if this is a scalar dataset """
+
+    shape_class = getShapeClass(shape)
+    if shape_class == "H5S_SCALAR":
+        return True
+    else:
+        return False
+
+
+def getDataSize(shape, type_size: int = 1):
+    """Return the size of the dataspace.  For
+    any unlimited dimensions, assume a value of 1.
+    (so the return size will be the absolute minimum)
+    """
+
+    if isinstance(shape, dict) and isNullSpace(shape):
+        return 0
+
+    if isinstance(shape, dict) and isScalar(shape):
+        return type_size  # just return size for one item
+
+    dims = getShapeDims(shape)
+
+    if dims is None:
+        return 0
+    else:
+        return type_size * int(np.prod(dims))
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 7c4556ea..c8b949ec 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -154,15 +154,6 @@ def testExpandChunk(self):
         self.assertTrue(num_bytes > CHUNK_MIN)
         self.assertTrue(num_bytes < CHUNK_MAX)
 
-        shape = {"class": "H5S_SIMPLE", "dims": [1000,]}
-        layout = (10,)
-        num_bytes = getChunkSize(layout, "H5T_VARIABLE")
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, "H5T_VARIABLE", shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, "H5T_VARIABLE")
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
         shape = {
             "class": "H5S_SIMPLE",
             "dims": [1000, 10, 1000],
diff --git a/test/unit/shape_util_test.py b/test/unit/shape_util_test.py
new file mode 100755
index 00000000..23c41edf
--- /dev/null
+++ b/test/unit/shape_util_test.py
@@ -0,0 +1,121 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+
+from h5json.shape_util import getShapeClass, getShapeDims, getNumElements, getRank
+from h5json.shape_util import isNullSpace, isScalar, getDataSize
+
+
+class ShapeUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(ShapeUtilTest, self).__init__(*args, **kwargs)
+        # main
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testSimple(self):
+
+        type_json = {
+            "base": "H5T_STD_I32BE",
+            "class": "H5T_INTEGER"
+        }
+        vstr_json = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "length": "H5T_VARIABLE",
+            "strPad": "H5T_STR_NULLTERM"
+        }
+        null_shape_json = {"class": "H5S_NULL"}
+        null_shape_obj = {"type": type_json, "shape": null_shape_json}
+        scalar_shape_json = {"class": "H5S_SCALAR"}
+        scalar_shape_obj = {"type": type_json, "shape": scalar_shape_json}
+        vstr_scalar_shape_obj = {"type": vstr_json, "shape": scalar_shape_json}
+
+        simple_shape_json = {"class": "H5S_SIMPLE", "dims": [5, 7]}
+        simple_shape_obj = {"type": type_json, "shape": simple_shape_json}
+        vstr_simple_shape_obj = {"type": vstr_json, "shape": simple_shape_json}
+
+        self.assertEqual(getShapeClass(null_shape_json), "H5S_NULL")
+        self.assertEqual(getShapeClass(null_shape_obj), "H5S_NULL")
+        self.assertEqual(getShapeClass(scalar_shape_json), "H5S_SCALAR")
+        self.assertEqual(getShapeClass(scalar_shape_obj), "H5S_SCALAR")
+        self.assertEqual(getShapeClass(vstr_scalar_shape_obj), "H5S_SCALAR")
+        self.assertEqual(getShapeClass(simple_shape_json), "H5S_SIMPLE")
+        self.assertEqual(getShapeClass(simple_shape_obj), "H5S_SIMPLE")
+        self.assertEqual(getShapeClass(vstr_simple_shape_obj), "H5S_SIMPLE")
+
+        self.assertEqual(getShapeDims(null_shape_json), None)
+        self.assertEqual(getShapeDims(null_shape_obj), None)
+        self.assertEqual(getShapeDims(scalar_shape_json), ())
+        self.assertEqual(getShapeDims(scalar_shape_obj), ())
+        self.assertEqual(getShapeDims(vstr_scalar_shape_obj), ())
+        self.assertEqual(getShapeDims(simple_shape_json), (5, 7))
+        self.assertEqual(getShapeDims(simple_shape_obj), (5, 7))
+        self.assertEqual(getShapeDims(vstr_simple_shape_obj), (5, 7))
+        self.assertEqual(getShapeDims(12), (12,))
+
+        self.assertEqual(getRank(null_shape_json), 0)
+        self.assertEqual(getRank(null_shape_obj), 0)
+        self.assertEqual(getRank(scalar_shape_json), 0)
+        self.assertEqual(getRank(scalar_shape_obj), 0)
+        self.assertEqual(getRank(vstr_scalar_shape_obj), 0)
+        self.assertEqual(getRank(simple_shape_json), 2)
+        self.assertEqual(getRank(simple_shape_obj), 2)
+        self.assertEqual(getRank(vstr_simple_shape_obj), 2)
+        self.assertEqual(getRank((1, 2, 3)), 3)
+
+        self.assertEqual(getNumElements(null_shape_json), 0)
+        self.assertEqual(getNumElements(null_shape_obj), 0)
+        self.assertEqual(getNumElements(scalar_shape_json), 1)
+        self.assertEqual(getNumElements(scalar_shape_obj), 1)
+        self.assertEqual(getNumElements(vstr_scalar_shape_obj), 1)
+        self.assertEqual(getNumElements(simple_shape_json), 35)
+        self.assertEqual(getNumElements(simple_shape_obj), 35)
+        self.assertEqual(getNumElements(vstr_simple_shape_obj), 35)
+        self.assertEqual(getNumElements(()), 1)
+        self.assertEqual(getNumElements([1, 2, 3]), 6)
+
+        self.assertEqual(isNullSpace(null_shape_json), True)
+        self.assertEqual(isNullSpace(null_shape_obj), True)
+        self.assertEqual(isNullSpace(scalar_shape_json), False)
+        self.assertEqual(isNullSpace(scalar_shape_obj), False)
+        self.assertEqual(isNullSpace(vstr_scalar_shape_obj), False)
+        self.assertEqual(isNullSpace(simple_shape_json), False)
+        self.assertEqual(isNullSpace(simple_shape_obj), False)
+        self.assertEqual(isNullSpace(vstr_simple_shape_obj), False)
+
+        self.assertEqual(isScalar(null_shape_json), False)
+        self.assertEqual(isScalar(null_shape_obj), False)
+        self.assertEqual(isScalar(scalar_shape_json), True)
+        self.assertEqual(isScalar(scalar_shape_obj), True)
+        self.assertEqual(isScalar(vstr_scalar_shape_obj), True)
+        self.assertEqual(isScalar(simple_shape_json), False)
+        self.assertEqual(isScalar(simple_shape_obj), False)
+        self.assertEqual(isScalar(vstr_simple_shape_obj), False)
+
+        self.assertEqual(getDataSize(null_shape_json, 4), 0)
+        self.assertEqual(getDataSize(null_shape_obj, 4), 0)
+        self.assertEqual(getDataSize(scalar_shape_json, 4), 4)
+        self.assertEqual(getDataSize(scalar_shape_obj, 4), 4)
+        self.assertEqual(getDataSize(vstr_scalar_shape_obj, 4), 4)
+        self.assertEqual(getDataSize(simple_shape_json, 4), 140)
+        self.assertEqual(getDataSize(simple_shape_obj, 4), 140)
+        self.assertEqual(getDataSize(vstr_simple_shape_obj, 4), 140)
+        self.assertEqual(getDataSize((), 4), 4)
+        self.assertEqual(getDataSize([1, 2, 3], 4), 24)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/testall.py b/testall.py
index 04aa4798..34b1efd7 100755
--- a/testall.py
+++ b/testall.py
@@ -19,6 +19,7 @@
     "array_util_test",
     "objid_test",
     "hdf5dtype_test",
+    "shape_util_test",
     "dset_util_test",
     "hdf5db_test",
     "h5json_reader_test",

From dcaf2fb3d3b26e23975d494ec2db9d27da57dc9d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 17 Oct 2025 11:18:18 +0100
Subject: [PATCH 087/129] consolidate duplicate dsetutil funcs

---
 src/h5json/dset_util.py     | 17 -----------------
 test/unit/dset_util_test.py | 38 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 4327a044..eb627443 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -189,23 +189,6 @@ def getDsetMaxDims(dset_json):
 
 
 def getChunkDims(dset_json):
-    """ get chunk shape for given dset_json """
-
-    layout = getDatasetLayout(dset_json)
-    if layout and "dims" in layout:
-        return layout["dims"]
-    else:
-        # H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key
-        # Check the layout dict in dset_json to see if it's
-        # defined there
-        if "layout" in dset_json:
-            layout = dset_json["layout"]
-            if "dims" in layout:
-                return layout["dims"]
-    return None
-
-
-def getChunkLayout(dset_json):
     """Get chunk layout.  Return None for non-chunked layout"""
 
     layout_class = getDatasetLayoutClass(dset_json)
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index c8b949ec..3dafeedf 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -12,7 +12,8 @@
 import unittest
 import logging
 
-from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, getContiguousLayout, expandChunk
+from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk
+from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getDatasetLayout, getChunkDims
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -22,6 +23,41 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.WARNING)
 
+    def testGetLayout(self):
+        contiguous_layout = {'class': 'H5D_CONTIGUOUS'}
+        dset_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
+                     'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
+                     'created': 1760613930.3584619,
+                     'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
+                     'shape': {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]},
+                     'lastModified': 1760613930.3584619,
+                     'attributeCount': 0,
+                     'creationProperties': {'fillValue': 3.12, 'layout': contiguous_layout}}
+
+        layout = getDatasetLayout(dset_json)
+        self.assertTrue("class" in layout)
+        layout_class = getDatasetLayoutClass(dset_json)
+        self.assertEqual(layout_class, "H5D_CONTIGUOUS")
+        chunk_dims = getChunkDims(dset_json)
+        self.assertEqual(chunk_dims, None)
+
+        chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
+        dset_chunked_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
+                             'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
+                             'created': 1760613930.3584619,
+                             'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
+                             'shape': {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]},
+                             'lastModified': 1760613930.3584619,
+                             'attributeCount': 0,
+                             'creationProperties': {'fillValue': 3.12, 'layout': chunked_layout}}
+
+        layout = getDatasetLayout(dset_chunked_json)
+        self.assertTrue("class" in layout)
+        layout_class = getDatasetLayoutClass(dset_chunked_json)
+        self.assertEqual(layout_class, "H5D_CHUNKED")
+        chunk_dims = getChunkDims(dset_chunked_json)
+        self.assertEqual(chunk_dims, [2, ])
+
     def testGuessChunk(self):
 
         typesize = "H5T_VARIABLE"

From e6357ff4bd18b993a6abe6e100940cc9b35a777f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 17 Oct 2025 11:40:03 +0100
Subject: [PATCH 088/129] for non chunked datasets return chunk dims as dset
 shape

---
 src/h5json/dset_util.py     | 22 ++++++++++++++--------
 test/unit/dset_util_test.py |  4 ++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index eb627443..00220235 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -132,13 +132,13 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
     return layout
 
 
-def getChunkSize(layout, type_size: int = 1):
+def getChunkSize(chunk_dims, type_size: int = 1):
     """Return chunk size given layout.
     i.e. just the product of the values in the list.
     """
 
     chunk_size = type_size
-    for n in layout:
+    for n in chunk_dims:
         if n <= 0:
             raise ValueError("Invalid chunk layout")
         chunk_size *= n
@@ -185,25 +185,31 @@ def getDsetMaxDims(dset_json):
     else:
         msg = f"Unexpected shape class: {shape_class}"
         raise ValueError(msg)
-    return maxdims
+    return tuple(maxdims)
 
 
 def getChunkDims(dset_json):
-    """Get chunk layout.  Return None for non-chunked layout"""
+    """Get chunk layout.  Return shape dims for non-chunked layout"""
 
+    shape_json = dset_json["shape"]
+    if shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1, )
+    shape_dims = shape_json["dims"]
     layout_class = getDatasetLayoutClass(dset_json)
     if not layout_class:
-        return None
+        return tuple(shape_dims)
 
     if layout_class not in CHUNK_LAYOUT_CLASSES:
-        return None
+        return tuple(shape_dims)
 
     layout_json = getDatasetLayout(dset_json)
     if "dims" not in layout_json:
         msg = f"Expected dims key in layout: {layout_json}"
         raise KeyError(msg)
-    layout = layout_json["dims"]
-    return layout
+    chunk_dims = tuple(layout_json["dims"])
+    return chunk_dims
 
 
 def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 3dafeedf..498276ce 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -39,7 +39,7 @@ def testGetLayout(self):
         layout_class = getDatasetLayoutClass(dset_json)
         self.assertEqual(layout_class, "H5D_CONTIGUOUS")
         chunk_dims = getChunkDims(dset_json)
-        self.assertEqual(chunk_dims, None)
+        self.assertEqual(chunk_dims, (10, ))
 
         chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
         dset_chunked_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
@@ -56,7 +56,7 @@ def testGetLayout(self):
         layout_class = getDatasetLayoutClass(dset_chunked_json)
         self.assertEqual(layout_class, "H5D_CHUNKED")
         chunk_dims = getChunkDims(dset_chunked_json)
-        self.assertEqual(chunk_dims, [2, ])
+        self.assertEqual(chunk_dims, (2, ))
 
     def testGuessChunk(self):
 

From b8c474f5ad94e767e7a2731cc6b399a1f8641e2f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 17 Oct 2025 12:14:33 +0100
Subject: [PATCH 089/129] add more tests for dset_util

---
 test/unit/dset_util_test.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 498276ce..8a30527b 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -41,6 +41,23 @@ def testGetLayout(self):
         chunk_dims = getChunkDims(dset_json)
         self.assertEqual(chunk_dims, (10, ))
 
+        compact_layout = {'class': 'H5D_COMPACT'}
+        dset_compact_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
+                             'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
+                             'created': 1760613930.3584619,
+                             'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
+                             'shape': {'class': 'H5S_SCALAR'},
+                             'lastModified': 1760613930.3584619,
+                             'attributeCount': 0,
+                             'creationProperties': {'fillValue': 3.12, 'layout': compact_layout}}
+
+        layout = getDatasetLayout(dset_compact_json)
+        self.assertTrue("class" in layout)
+        layout_class = getDatasetLayoutClass(dset_json)
+        self.assertEqual(layout_class, "H5D_CONTIGUOUS")
+        chunk_dims = getChunkDims(dset_compact_json)
+        self.assertEqual(chunk_dims, (1, ))
+
         chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
         dset_chunked_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
                              'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',

From 0be82f28cdc857289399d303385e7b3e5786517b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 23 Oct 2025 18:00:48 +0100
Subject: [PATCH 090/129] add min/max param for guessChunk

---
 src/h5json/dset_util.py     | 10 ++++++++--
 test/unit/dset_util_test.py |  7 +++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 00220235..6d28ab76 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -362,7 +362,7 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
         raise ValueError(msg)
 
 
-def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"):
+def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
     """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
     if shape_json is None or shape_json["class"] == "H5S_NULL":
         return None
@@ -466,7 +466,7 @@ def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX):
     return tuple(layout)
 
 
-def guessChunk(shape_json, typesize):
+def guessChunk(shape_json, typesize, chunk_min=None, chunk_max=None):
     """Guess an appropriate chunk layout for a dataset, given its shape and
     the size of each element in bytes.  Will allocate chunks only as large
     as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
@@ -490,6 +490,12 @@ def guessChunk(shape_json, typesize):
     # For unlimited dimensions we have to guess. use 1024
     shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
 
+    chunk_size = getChunkSize(shape, typesize)
+    if chunk_min and chunk_size < chunk_min:
+        shape = expandChunk(shape, typesize, shape_json, chunk_min=chunk_min)
+    elif chunk_max and chunk_size > chunk_max:
+        shape = shrinkChunk(shape, typesize, chunk_max=chunk_max)
+
     return shape
 
 
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 8a30527b..adeb9f4a 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -107,11 +107,14 @@ def testGuessChunk(self):
         self.assertEqual(layout, (5,))
 
         shape = {"class": "H5S_SIMPLE", "dims": [100, 100, 100]}
-        layout = guessChunk(shape, typesize)
+        chunk_max = 400
+        layout = guessChunk(shape, typesize, chunk_max=chunk_max)
         self.assertTrue(len(layout), 3)
         for i in range(3):
             self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
+            self.assertTrue(layout[i] < 100)
+        chunk_size = getChunkSize(layout, typesize)
+        self.assertTrue(chunk_size <= chunk_max)
 
         shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
         layout = guessChunk(shape, typesize)

From fdb9ffa233f2201ac6f3c901bb122dfdc78f5c88 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 29 Oct 2025 11:56:51 +0000
Subject: [PATCH 091/129] added constant for valid layout classes

---
 src/h5json/dset_util.py     | 12 ++++++++---
 test/unit/dset_util_test.py | 40 +++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 6d28ab76..2fce42ea 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -18,11 +18,14 @@
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
 CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
 
-CHUNK_LAYOUT_CLASSES = (
+
+LAYOUT_CLASSES = (
+    "H5D_COMPACT",
+    "H5D_CONTIGUOUS",
+    "H5D_CONTIGUOUS_REF",
     "H5D_CHUNKED",
     "H5D_CHUNKED_REF",
     "H5D_CHUNKED_REF_INDIRECT",
-    "H5D_CONTIGUOUS_REF",
 )
 
 
@@ -201,7 +204,8 @@ def getChunkDims(dset_json):
     if not layout_class:
         return tuple(shape_dims)
 
-    if layout_class not in CHUNK_LAYOUT_CLASSES:
+    if not layout_class.startswith("H5D_CHUNKED"):
+        # for non-chunked layouts, just return the shape as the chunk dim
         return tuple(shape_dims)
 
     layout_json = getDatasetLayout(dset_json)
@@ -495,6 +499,8 @@ def guessChunk(shape_json, typesize, chunk_min=None, chunk_max=None):
         shape = expandChunk(shape, typesize, shape_json, chunk_min=chunk_min)
     elif chunk_max and chunk_size > chunk_max:
         shape = shrinkChunk(shape, typesize, chunk_max=chunk_max)
+    else:
+        pass  # good already
 
     return shape
 
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index adeb9f4a..f7e4aa91 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -123,6 +123,18 @@ def testGuessChunk(self):
             self.assertTrue(layout[i] >= 1)
             self.assertTrue(layout[i] <= 1024)
 
+        dims = [50000, 80000]
+        shape = {'class': 'H5S_SIMPLE', 'dims': dims}
+        chunk_min = 1048576
+        chunk_max = 4194304
+        layout = guessChunk(shape, typesize, chunk_min=chunk_min, chunk_max=chunk_max)
+        self.assertTrue(len(layout), 2)
+        self.assertTrue(layout[0] < dims[0])
+        self.assertTrue(layout[1] < dims[1])
+        chunk_size = layout[0] * layout[1] * typesize
+        self.assertTrue(chunk_size >= chunk_min)
+        self.assertTrue(chunk_size <= chunk_max)
+
         shape = {"class": "H5S_SCALAR"}
         layout = guessChunk(shape, typesize)
         self.assertEqual(layout, (1,))
@@ -175,6 +187,18 @@ def testShrinkChunk(self):
         self.assertTrue(num_bytes > CHUNK_MIN)
         self.assertTrue(num_bytes < CHUNK_MAX)
 
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [50000, 80000],
+        }
+        layout = [782, 125]
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
     def testExpandChunk(self):
         CHUNK_MIN = 5000
         CHUNK_MAX = 50000
@@ -242,6 +266,22 @@ def testExpandChunk(self):
             "maxdims": [1000, 0, 1000],
         }
         layout = (10, 10, 10)
+        typesize = 4
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        CHUNK_MIN = 1024 * 1024
+        CHUNK_MAX = 4 * CHUNK_MIN
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [50000, 80000],
+        }
+        layout = [100, 100]
+        typesize = 4
         num_bytes = getChunkSize(layout, typesize)
         self.assertTrue(num_bytes < CHUNK_MIN)
         expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)

From e84a072eb70463c83dc11369e59ab99365304ce2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Dec 2025 12:15:24 +0800
Subject: [PATCH 092/129] update for create time

---
 src/h5json/h5pystore/h5py_reader.py | 11 ++++++-----
 src/h5json/h5pystore/h5py_writer.py |  1 +
 src/h5json/hdf5db.py                | 25 +++++++++++++------------
 test/unit/h5py_reader_test.py       |  7 ++++---
 test/unit/h5py_writer_test.py       |  2 +-
 5 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index b4b4c184..6725c783 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -153,7 +153,7 @@ def __init__(
         else:
             self.log = logging.getLogger()
         if not h5py.is_hdf5(filepath):
-            self.log.warn(f"File: {filepath} is not an HDF5 file")
+            self.log.warning(f"File: {filepath} is not an HDF5 file")
             raise IOError("not an HDF5 file")
         super().__init__(filepath, app_logger=app_logger)
         self._f = None
@@ -265,8 +265,8 @@ def getAttribute(self, obj_id, name, include_data=True):
             item["value"] = value
         else:
             pass  # no data
-
-        item['created'] = time.time()  # TBD: get attribute creation time from h5py?
+        stats = self.getStats()
+        item['created'] = stats["lastModified"]  # use file modification time as attr creation time
         return item
 
     def getAttributes(self, obj_id, include_data=True):
@@ -312,7 +312,8 @@ def _getLink(self, parent, link_name):
             else:
                 item["id"] = self._addr_map[addr]
 
-        item['created'] = time.time()  # TBD: get the link creation time from h5py?
+        stats = self.getStats()
+        item['created'] = stats["lastModified"]  # use file modification time as attr creation time
 
         return item
 
@@ -567,7 +568,7 @@ def getStats(self):
         """
         stat_info = os_stat(self.filepath)
         stats = {}
-        stats['created'] = stat_info.st_ctime
+        stats['created'] = stat_info.st_birthtime
         stats["lastModified"] = stat_info.st_mtime
         stats['owner'] = stat_info.st_uid  # TBD: convert to username?
         return stats
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index dc62ed72..5e1e20d7 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -401,6 +401,7 @@ def updateAttributes(self, obj_id, obj):
                     del obj.attrs[name]
                 else:
                     pass  # already deleted or never added
+                continue
             if "created" in attr_json and attr_json["created"] < self._flush_time:
                 # attribute should be saved already
                 continue
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 18e4f3e0..be84be92 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -9,7 +9,7 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import time
+
 import numpy as np
 import logging
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
@@ -18,6 +18,7 @@
 from .filters import getFiltersJson
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
+from .time_util import getNow
 from .apiversion import _apiver
 from .h5reader import H5Reader, H5NullReader
 from .h5writer import H5Writer, H5NullWriter
@@ -138,7 +139,7 @@ def make_dirty(self, obj_id):
             # object deleted, just return
             return
         obj_json = self.db[obj_id]
-        obj_json["lastModified"] = time.time()
+        obj_json["lastModified"] = getNow()
         if not self.is_new(obj_id):
             # object hasn't been initially written yet, add to dirt_object set
             self._dirty_objects.add(obj_id)
@@ -520,7 +521,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         type_json = getTypeItem(dtype)
         # finally put it all together...
         attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
-        attr_json["created"] = time.time()
+        attr_json["created"] = getNow()
 
         # slot into the obj_json["attrs"]
         attrs_json[name] = attr_json
@@ -535,7 +536,7 @@ def deleteAttribute(self, obj_id, name):
         if name not in attrs_json:
             raise KeyError(f"attribute [{name}] not found in {obj_id}")
         attr_json = attrs_json[name]
-        attr_json["DELETED"] = time.time()  # mark key for deletion
+        attr_json["DELETED"] = getNow()  # mark key for deletion
 
         self.make_dirty(obj_id)
 
@@ -726,26 +727,26 @@ def _addLink(self, grp_id, name, link_json):
     def createHardLink(self, grp_id, name, tgt_id):
         """ Create a new hardlink """
         link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id}
-        link_json["created"] = time.time()
+        link_json["created"] = getNow()
         self._addLink(grp_id, name, link_json)
 
     def createSoftLink(self, grp_id, name, h5path):
         """ Create a soft link """
         link_json = {"class": "H5L_TYPE_SOFT", "h5path": h5path}
-        link_json["created"] = time.time()
+        link_json["created"] = getNow()
         self._addLink(grp_id, name, link_json)
 
     def createCustomLink(self, grp_id, name, link_json):
         """ create a custom link """
         if link_json.get("class") != "H5L_TYPE_USER_DEFINED":
             link_json["class"] = "H5L_TYPE_USER_DEFINED"
-        link_json["created"] = time.time()
+        link_json["created"] = getNow()
         self._addLink(grp_id, name, link_json)
 
     def createExternalLink(self, grp_id, name, h5path, filepath):
         """ Create a external link link """
         link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath}
-        link_json["created"] = time.time()
+        link_json["created"] = getNow()
         self._addLink(grp_id, name, link_json)
 
     def deleteLink(self, grp_id, name):
@@ -757,7 +758,7 @@ def deleteLink(self, grp_id, name):
         if name not in links:
             raise KeyError(f"Link [{name}] not found in {grp_id}")
         link_json = links[name]
-        link_json["DELETED"] = time.time()  # mark for deletion
+        link_json["DELETED"] = getNow()  # mark for deletion
         self.make_dirty(grp_id)
         grp_json = self.getObjectById(grp_id)
         links = grp_json["links"]
@@ -772,7 +773,7 @@ def createGroup(self, cpl=None):
             group_json["creationProperties"] = cpl
         else:
             group_json["creationProperties"] = {}
-        group_json["created"] = time.time()
+        group_json["created"] = getNow()
         self.db[grp_id] = group_json
         self._new_objects.add(grp_id)
         return grp_id
@@ -797,7 +798,7 @@ def createCommittedType(self, datatype, cpl=None):
         type_json = getTypeItem(dt)  # get canonical json description of datatype
 
         ctype_json = {"type": type_json, "attributes": {}, "creationProperties": cpl}
-        ctype_json["created"] = time.time()
+        ctype_json["created"] = getNow()
         self.db[ctype_id] = ctype_json
         self._new_objects.add(ctype_id)
         return ctype_id
@@ -846,7 +847,7 @@ def createDataset(
             dset_json["creationProperties"] = cpl
         else:
             dset_json["creationProperties"] = {}
-        dset_json["created"] = time.time()
+        dset_json["created"] = getNow()
 
         dset_id = createObjId("datasets", root_id=self.root_id)
         self.db[dset_id] = dset_json
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index e4cc9c7d..74108313 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -12,12 +12,12 @@
 import unittest
 
 import logging
-import time
 import numpy as np
 
 from h5json import Hdf5db
 from h5json.h5pystore.h5py_reader import H5pyReader
 from h5json import selections
+from h5json.time_util import getNow
 
 
 class H5pyReaderTest(unittest.TestCase):
@@ -55,8 +55,9 @@ def testSimple(self):
         self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
         self.assertTrue("created" in g1_link)
         g1_created = g1_link["created"]
-        now = time.time()
-        self.assertTrue(g1_created < now)
+        now = getNow()
+        self.assertTrue(g1_created < int(now))
+
         g1_id = g1_link["id"]
         self.assertTrue(g1_id)
         self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index aa481dfd..5426310d 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -38,7 +38,7 @@ def __init__(self, *args, **kwargs):
         self.log.setLevel(logging.DEBUG)
         # create logger
 
-        handler = logging.FileHandler("./h5pywriterbtest.log")
+        handler = logging.FileHandler("./h5pywritertest.log")
         # add handler to logger
         self.log.addHandler(handler)
 

From 1f53fe012806c093bcd1471fa04f744f0d25dfe1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Dec 2025 12:22:06 +0800
Subject: [PATCH 093/129] add time_util.py

---
 src/h5json/time_util.py | 86 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 src/h5json/time_util.py

diff --git a/src/h5json/time_util.py b/src/h5json/time_util.py
new file mode 100644
index 00000000..7cfcad69
--- /dev/null
+++ b/src/h5json/time_util.py
@@ -0,0 +1,86 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+from datetime import datetime
+import time
+import os
+import pytz
+
+def unixTimeToUTC(timestamp):
+    """Convert unix timestamp (seconds since Jan 1, 1970, to ISO-8601
+    compatible UTC time string.
+
+    """
+    utc = pytz.utc
+    dtTime = datetime.fromtimestamp(timestamp, utc)
+    iso_str = dtTime.isoformat()
+    # isoformat returns a string like this:
+    # '2014-10-30T04:25:21+00:00'
+    # strip off the '+00:00' and replace
+    # with 'Z' (both are ISO-8601 compatible)
+    npos = iso_str.rfind("+")
+    iso_z = iso_str[:npos] + "Z"
+    return iso_z
+
+
+def elapsedTime(timestamp):
+    """Get Elapsed time from given timestamp"""
+    delta = int(time.time()) - timestamp
+    if delta < 0:
+        return "Invalid timestamp!"
+    day_length = 24 * 60 * 60
+    days = 0
+    hour_length = 60 * 60
+    hours = 0
+    minute_length = 60
+    minutes = 0
+    ret_str = ""
+
+    if delta > day_length:
+        days = delta // day_length
+        delta = delta % day_length
+        ret_str += f"{days} days "
+    if delta > hour_length or days > 0:
+        hours = delta // hour_length
+        delta = delta % hour_length
+        ret_str += f"{hours} hours "
+    if delta > minute_length or days > 0 or hours > 0:
+        minutes = delta // minute_length
+        delta = delta % minute_length
+        ret_str += f"{minutes} minutes "
+    ret_str += f"{delta} seconds"
+    return ret_str
+
+
+def getNow(app=None):
+    """
+    Get current time in unix timestamp
+
+    Returns a precise timestamp even on platforms where
+    time.time() has low resolution (e.g. Windows)
+    """
+    system = os.name
+    current_time = 0
+
+    if system == "nt":
+        # Windows
+        if app is None or "start_time_relative" not in app or "start_time" not in app:
+            current_time = time.time()  # just use lower precision time.time()
+        else:
+            current_time = (time.perf_counter() - app["start_time_relative"]) + app["start_time"]
+    elif system == "posix":
+        # Unix
+        current_time = time.time()
+    else:
+        raise ValueError(f"Unsupported OS: {system}")
+
+    return current_time

From 6a6f38511e8b2f026013406f9d5fc0f01d6b4999 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Dec 2025 12:25:32 +0800
Subject: [PATCH 094/129] fix flake8 errors

---
 src/h5json/time_util.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/h5json/time_util.py b/src/h5json/time_util.py
index 7cfcad69..24f6a835 100644
--- a/src/h5json/time_util.py
+++ b/src/h5json/time_util.py
@@ -15,6 +15,7 @@
 import os
 import pytz
 
+
 def unixTimeToUTC(timestamp):
     """Convert unix timestamp (seconds since Jan 1, 1970, to ISO-8601
     compatible UTC time string.

From 632260b103372f588ee3ab4ed02fac2e6d0dd880 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Dec 2025 12:34:19 +0800
Subject: [PATCH 095/129] revert getStats change

---
 src/h5json/h5pystore/h5py_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index 6725c783..fddfedb4 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -568,7 +568,7 @@ def getStats(self):
         """
         stat_info = os_stat(self.filepath)
         stats = {}
-        stats['created'] = stat_info.st_birthtime
+        stats['created'] = stat_info.st_ctime
         stats["lastModified"] = stat_info.st_mtime
         stats['owner'] = stat_info.st_uid  # TBD: convert to username?
         return stats

From d95766949bf9a1eceb9b907f6ded91963486c79e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Dec 2025 17:22:39 +0800
Subject: [PATCH 096/129] check for chunked for resiable dsets

---
 src/h5json/dset_util.py     | 14 +++++++-
 test/unit/dset_util_test.py | 68 ++++++++++++++++++-------------------
 2 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 2fce42ea..23e54aba 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -270,7 +270,6 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
         raise ValueError(msg)
 
     layout_class = layout["class"]
-
     if layout_class == "H5D_CONTIGUOUS_REF":
         # reference to a dataset in a traditional HDF5 files with
         # contiguous storage
@@ -299,6 +298,11 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
             msg = "'dims' key can not be provided for "
             msg += "H5D_CONTIGUOUS_REF layout"
             raise ValueError(msg)
+        if "maxdims" in shape_json:
+            # maxdims not allowed for H5D_CONTIGUOUS_REF
+            msg = "'maxdims' key can not be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            raise ValueError(msg)
     elif layout_class == "H5D_CHUNKED_REF":
         # reference to a dataset in a traditional HDF5 files with
         # chunked storage
@@ -356,11 +360,19 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
             msg = "dims key found in layout for creation property list "
             msg += "for H5D_CONTIGUOUS storage class"
             raise ValueError(msg)
+        if "maxdims" in shape_json:
+            msg = "maxdims found in shape for creation property list "
+            msg += "for H5D_CONTIGUOUS storage class"
+            raise ValueError(msg)
     elif layout_class == "H5D_COMPACT":
         if "dims" in layout:
             msg = "dims key found in layout for creation property list "
             msg += "for H5D_COMPACT storage class"
             raise ValueError(msg)
+        if "maxdims" in shape_json:
+            msg = "maxdims found in shape for creation property list "
+            msg += "for H5D_COMPACT storage class"
+            raise ValueError(msg)
     else:
         msg = f"Unexpected layout: {layout_class}"
         raise ValueError(msg)
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index f7e4aa91..52fb49f9 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -13,7 +13,8 @@
 import logging
 
 from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk
-from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getDatasetLayout, getChunkDims
+from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getChunkDims
+from h5json.dset_util import validateChunkLayout, getDatasetLayout
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -25,54 +26,53 @@ def __init__(self, *args, **kwargs):
 
     def testGetLayout(self):
         contiguous_layout = {'class': 'H5D_CONTIGUOUS'}
+        fixed_1d_shape_json = {'class': 'H5S_SIMPLE', 'dims': [10]}
+        resizable_shape_json = {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]}
+        base_type = 'H5T_IEEE_F32LE'
+        item_size = 4  # bytes
+        type_json = {'class': 'H5T_FLOAT', 'base': base_type}
+        chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
+        cpl = {'fillValue': 3.12, 'layout': contiguous_layout}
+
         dset_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
                      'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
                      'created': 1760613930.3584619,
-                     'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
-                     'shape': {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]},
+                     'type': type_json,
+                     'shape': resizable_shape_json,
                      'lastModified': 1760613930.3584619,
-                     'attributeCount': 0,
-                     'creationProperties': {'fillValue': 3.12, 'layout': contiguous_layout}}
+                     'creationProperties': cpl}
 
         layout = getDatasetLayout(dset_json)
         self.assertTrue("class" in layout)
         layout_class = getDatasetLayoutClass(dset_json)
         self.assertEqual(layout_class, "H5D_CONTIGUOUS")
-        chunk_dims = getChunkDims(dset_json)
-        self.assertEqual(chunk_dims, (10, ))
-
-        compact_layout = {'class': 'H5D_COMPACT'}
-        dset_compact_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
-                             'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
-                             'created': 1760613930.3584619,
-                             'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
-                             'shape': {'class': 'H5S_SCALAR'},
-                             'lastModified': 1760613930.3584619,
-                             'attributeCount': 0,
-                             'creationProperties': {'fillValue': 3.12, 'layout': compact_layout}}
-
-        layout = getDatasetLayout(dset_compact_json)
+
+        # contigous layout with resizable shape should raise exception
+        try:
+            validateChunkLayout(dset_json["shape"], item_size, layout)
+            self.assertTrue(False)  # should not reach here
+        except ValueError:
+            pass  # should raise exception
+
+        dset_json["shape"] = fixed_1d_shape_json
+        layout = getDatasetLayout(dset_json)
         self.assertTrue("class" in layout)
         layout_class = getDatasetLayoutClass(dset_json)
         self.assertEqual(layout_class, "H5D_CONTIGUOUS")
-        chunk_dims = getChunkDims(dset_compact_json)
-        self.assertEqual(chunk_dims, (1, ))
 
-        chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
-        dset_chunked_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
-                             'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
-                             'created': 1760613930.3584619,
-                             'type': {'class': 'H5T_FLOAT', 'base': 'H5T_IEEE_F32LE'},
-                             'shape': {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]},
-                             'lastModified': 1760613930.3584619,
-                             'attributeCount': 0,
-                             'creationProperties': {'fillValue': 3.12, 'layout': chunked_layout}}
-
-        layout = getDatasetLayout(dset_chunked_json)
+        dset_json["shape"] = resizable_shape_json
+        cpl["layout"] = chunked_layout
+        layout = getDatasetLayout(dset_json)
         self.assertTrue("class" in layout)
-        layout_class = getDatasetLayoutClass(dset_chunked_json)
+        layout_class = getDatasetLayoutClass(dset_json)
         self.assertEqual(layout_class, "H5D_CHUNKED")
-        chunk_dims = getChunkDims(dset_chunked_json)
+
+        try:
+            validateChunkLayout(dset_json["shape"], item_size, layout)
+        except ValueError:
+            self.assertTrue(False)  # should raise exception
+
+        chunk_dims = getChunkDims(dset_json)
         self.assertEqual(chunk_dims, (2, ))
 
     def testGuessChunk(self):

From 978a54849234bae57d37bdf14738a22e62ccb7ef Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Dec 2025 18:48:22 +0800
Subject: [PATCH 097/129] added validateDatasetCreationProps

---
 src/h5json/dset_util.py     | 58 +++++++++++++++++++++++++++++++++++--
 test/unit/dset_util_test.py |  5 ++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 23e54aba..57d983b5 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -11,8 +11,10 @@
 ##############################################################################
 
 import math
-from .hdf5dtype import getItemSize
+from .hdf5dtype import getItemSize, createDataType
 from .shape_util import getDataSize
+from .array_util import getNumpyValue
+from .filters import getFiltersJson
 from .objid import isValidUuid
 
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
@@ -216,7 +218,7 @@ def getChunkDims(dset_json):
     return chunk_dims
 
 
-def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
+def validateChunkLayout(shape_json, type_json, layout):
     """
     Use chunk layout given in the creationPropertiesList (if defined and
     layout is valid).
@@ -227,6 +229,7 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
     space_dims = None
     chunk_dims = None
     max_dims = None
+    item_size = getItemSize(type_json)
 
     if "dims" in shape_json:
         space_dims = shape_json["dims"]
@@ -378,6 +381,57 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
         raise ValueError(msg)
 
 
+def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
+    """ validate creation props """
+
+    if not type_json or not shape:
+        msg = "validateDatasetCreationProps - shape and type must be set"
+        raise ValueError(msg)
+
+    if "fillValue" in creation_props:
+        # validate fill value compatible with type
+        dt = createDataType(type_json)
+        fill_value = creation_props["fillValue"]
+        if "fillValue_encoding" in creation_props:
+            fill_value_encoding = creation_props["fillValue_encoding"]
+            if fill_value_encoding not in ("None", "base64"):
+                msg = f"unexpected value for fill_value_encoding: {fill_value_encoding}"
+                raise ValueError(msg)
+            else:
+                # should see a string in this case
+                if not isinstance(fill_value, str):
+                    msg = f"unexpected fill value: {fill_value} "
+                    msg += f"for encoding: {fill_value_encoding}"
+                    raise ValueError(msg)
+        else:
+            fill_value_encoding = None
+
+            try:
+                getNumpyValue(fill_value, dt=dt, encoding=fill_value_encoding)
+            except ValueError:
+                msg = f"invalid fill value: {fill_value}"
+                raise ValueError(msg)
+
+    layout_class = None
+    if "layout" in creation_props:
+        layout_json = creation_props["layout"]
+        validateChunkLayout(shape, type_json, layout_json)
+        layout_class = layout_json["class"]
+
+    if "filters" in creation_props:
+        try:
+            filters_out = getFiltersJson(creation_props)
+        except (KeyError, ValueError):
+            # raise bad request exception if not valid
+            msg = "invalid filter provided"
+            raise ValueError(msg)
+        if filters_out:
+            # check that a chunked layout is used
+            if layout_class is None or layout_class.startswith("H5D_CHUNKED") is False:
+                msg = "filters can only be used with chunked layout"
+                raise ValueError(msg)
+
+
 def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
     """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
     if shape_json is None or shape_json["class"] == "H5S_NULL":
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 52fb49f9..3b2e35cd 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -29,7 +29,6 @@ def testGetLayout(self):
         fixed_1d_shape_json = {'class': 'H5S_SIMPLE', 'dims': [10]}
         resizable_shape_json = {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]}
         base_type = 'H5T_IEEE_F32LE'
-        item_size = 4  # bytes
         type_json = {'class': 'H5T_FLOAT', 'base': base_type}
         chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [2, ]}
         cpl = {'fillValue': 3.12, 'layout': contiguous_layout}
@@ -49,7 +48,7 @@ def testGetLayout(self):
 
         # contigous layout with resizable shape should raise exception
         try:
-            validateChunkLayout(dset_json["shape"], item_size, layout)
+            validateChunkLayout(dset_json["shape"], type_json, layout)
             self.assertTrue(False)  # should not reach here
         except ValueError:
             pass  # should raise exception
@@ -68,7 +67,7 @@ def testGetLayout(self):
         self.assertEqual(layout_class, "H5D_CHUNKED")
 
         try:
-            validateChunkLayout(dset_json["shape"], item_size, layout)
+            validateChunkLayout(dset_json["shape"], type_json, layout)
         except ValueError:
             self.assertTrue(False)  # should raise exception
 

From 6cb136e61a2de1298dfbf56a1ce515b167107103 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 11 Dec 2025 18:55:46 +0800
Subject: [PATCH 098/129] updated dset_util_test

---
 test/unit/dset_util_test.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 3b2e35cd..85c260f2 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -14,7 +14,7 @@
 
 from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk
 from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getChunkDims
-from h5json.dset_util import validateChunkLayout, getDatasetLayout
+from h5json.dset_util import validateChunkLayout, validateDatasetCreationProps, getDatasetLayout
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -69,11 +69,16 @@ def testGetLayout(self):
         try:
             validateChunkLayout(dset_json["shape"], type_json, layout)
         except ValueError:
-            self.assertTrue(False)  # should raise exception
+            self.assertTrue(False)  # shouldn't raise exception
 
         chunk_dims = getChunkDims(dset_json)
         self.assertEqual(chunk_dims, (2, ))
 
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+
     def testGuessChunk(self):
 
         typesize = "H5T_VARIABLE"

From 678025bfb2f80499b52abb726a456a056e41f2ef Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 15 Dec 2025 15:21:13 +0800
Subject: [PATCH 099/129] added filter validation

---
 src/h5json/dset_util.py     |   2 +-
 src/h5json/filters.py       | 107 ++++++++++++++++++++++++++++++------
 test/unit/dset_util_test.py |  55 ++++++++++++++++++
 3 files changed, 145 insertions(+), 19 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 57d983b5..872e3160 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -421,7 +421,7 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
     if "filters" in creation_props:
         try:
             filters_out = getFiltersJson(creation_props)
-        except (KeyError, ValueError):
+        except (KeyError, TypeError, ValueError):
             # raise bad request exception if not valid
             msg = "invalid filter provided"
             raise ValueError(msg)
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 9164f1e8..d4b256d7 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -71,6 +71,20 @@
 )
 
 
+def getAllFilterNames():
+    """ Return list of all recognized filter names """
+
+    names = set()
+    for item in FILTER_DEFS:
+        filter_id = item[1]
+        filter_name = item[2]
+        if filter_id > 0 and filter_name:
+            names.add(filter_name)
+    names = list(names)
+    names.sort()
+    return tuple(names)
+
+
 def getFilterItem(key):
     """
     Return filter code, id, and name, based on an id, a name or a code.
@@ -102,6 +116,9 @@ def getFiltersJson(create_props, supported_filters=None):
         msg = "Expected filters in creation_props to be a list"
         raise TypeError(msg)
 
+    if not supported_filters:
+        supported_filters = getAllFilterNames()
+
     f_out = []
     for filter in f_in:
         if isinstance(filter, int) or isinstance(filter, str):
@@ -115,11 +132,12 @@ def getFiltersJson(create_props, supported_filters=None):
                 raise ValueError(msg)
             f_out.append(item)
         elif isinstance(filter, dict):
-            if "class" not in filter:
-                msg = "expected 'class' key for filter property"
-                raise KeyError(msg)
-            if filter["class"] != "H5Z_FILTER_USER":
-                item = getFilterItem(filter["class"])
+            if filter.get("class") == "H5Z_FILTER_USER":
+                # user filter - must have either id or name
+                if "id" not in filter and "name" not in filter:
+                    msg = "user filter must have either 'id' or 'name' key"
+                    raise KeyError(msg)
+                item = filter
             elif "id" in filter:
                 item = getFilterItem(filter["id"])
             elif "name" in filter:
@@ -127,21 +145,74 @@ def getFiltersJson(create_props, supported_filters=None):
             else:
                 item = None
             if not item:
-                msg = f"filter {filter['class']} not recognized"
-                raise ValueError(msg)
-            if "id" not in filter:
-                filter["id"] = item["id"]
-            elif item["id"] != filter["id"]:
-                msg = f"Expected {filter['class']} to have id: "
-                msg += f"{item['id']} but got {filter['id']}"
+                msg = f"filter {filter} not recognized"
                 raise ValueError(msg)
-            if "name" not in filter:
-                filter["name"] = item["name"]
-            if filter["name"] not in supported_filters:
-                msg = f"filter {filter} is not supported"
-                raise KeyError(msg)
 
-            f_out.append(filter)
+            # copy any filter specified options
+            filter_class = item["class"]
+            if filter_class == "H5Z_FILTER_DEFLATE":
+                if "level" in filter:
+                    level_val = filter["level"]
+                    if not isinstance(level_val, int):
+                        msg = "Expected integer level for deflate filter"
+                        raise TypeError(msg)
+                    if level_val < 0 or level_val > 9:
+                        msg = "Deflate filter level must be between 0 and 9"
+                        raise ValueError(msg)
+                    item["level"] = level_val
+            elif filter_class == "H5Z_FILTER_SHUFFLE":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_FLETCHER32":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_SZIP":
+                for key in ("bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"):
+                    if key in filter:
+                        val = filter[key]
+                        if key == "coding":
+                            if val not in HDF_FILTER_OPTION_ENUMS["coding"].values():
+                                msg = f"Invalid coding option for szip filter: {val}"
+                                raise ValueError(msg)
+                            else:
+                                # other options need to be positivie integers
+                                if not isinstance(val, int) or val <= 0:
+                                    msg = f"Expected positive integer for szip filter option {key}"
+                                    raise ValueError(msg)
+                        item[key] = val
+            elif filter_class == "H5Z_FILTER_NBIT":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_SCALEOFFSET":
+                if "scaleType" in filter:
+                    val = filter["scaleType"]
+                    if val not in HDF_FILTER_OPTION_ENUMS["scaleType"].values():
+                        msg = f"Invalid scaleType option for scaleoffset filter: {val}"
+                        raise ValueError(msg)
+                    else:
+                        item["scaleType"] = val
+                if "scaleOffset" in filter:
+                    val = filter["scaleOffset"]
+                    if not isinstance(val, int) or val < 0:
+                        msg = "Expected non-negative integer for scaleOffset option"
+                        raise ValueError(msg)
+                    else:
+                        item["scaleOffset"] = val
+            elif filter_class == "H5Z_FILTER_LZF":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_BLOSC":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_SNAPPY":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_LZ4":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_LZ4HC":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_BITSHUFFLE":
+                pass  # no options
+            elif filter_class == "H5Z_FILTER_ZSTD":
+                pass  # no options
+            else:
+                msg = f"filter class {filter_class} is not supported"
+                raise KeyError(msg)
+            f_out.append(item)
         else:
             msg = f"Unexpected type for filter: {filter}"
             raise ValueError(msg)
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 85c260f2..82466013 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -79,6 +79,61 @@ def testGetLayout(self):
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
 
+    def testFilterValidation(self):
+
+        shape_json = {'class': 'H5S_SIMPLE', 'dims': [500]}
+        base_type = 'H5T_IEEE_F32LE'
+        type_json = {'class': 'H5T_FLOAT', 'base': base_type}
+        contiguous_layout = {'class': 'H5D_CONTIGUOUS'}
+        chunked_layout = {'class': 'H5D_CHUNKED', 'dims': [100, ]}
+        deflate_filter = {'class': 'H5Z_FILTER_DEFLATE', 'id': 1, 'name': 'deflate'}
+        filters = [deflate_filter, ]
+        cpl = {'fillValue': 3.12, 'layout': contiguous_layout, "filters": filters}
+
+        dset_json = {'id': 'd-f4a9f95e-c8962a53-f6c8-f18440-78d051',
+                     'root': 'g-f4a9f95e-c8962a53-7c21-71d640-1ea2db',
+                     'created': 1760613930.3584619,
+                     'type': type_json,
+                     'shape': shape_json,
+                     'lastModified': 1760613930.3584619,
+                     'creationProperties': cpl}
+
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+            self.assertTrue(False)  # should not reach here
+        except ValueError:
+            pass  # filters are invalid with contiguous layout
+        cpl["layout"] = chunked_layout
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+        # add an invlaid level option for deflate
+        deflate_filter["level"] = 20
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+            self.assertTrue(False)  # should not reach here
+        except ValueError:
+            pass  # invalid deflate level
+        deflate_filter["level"] = 5
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+        # try with just a filter name
+        cpl["filters"] = ["gzip", ]
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+        # try with an invalid filter name
+        cpl["filters"] = ["invalid_filter_name", ]
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+            self.assertTrue(False)  # should not reach here
+        except ValueError:
+            pass  # invalid filter name
+
     def testGuessChunk(self):
 
         typesize = "H5T_VARIABLE"

From b68e9679f3ba6b5a2b977b0d22a06016da19a6ef Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 17 Dec 2025 17:50:04 +0800
Subject: [PATCH 100/129] fix for getFilters

---
 src/h5json/filters.py         |  3 +++
 src/h5json/hdf5db.py          |  2 +-
 test/unit/dset_util_test.py   |  9 +++++++
 test/unit/h5py_writer_test.py | 45 +++++++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index d4b256d7..724ac929 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -148,6 +148,9 @@ def getFiltersJson(create_props, supported_filters=None):
                 msg = f"filter {filter} not recognized"
                 raise ValueError(msg)
 
+            # will replace options list with specified options
+            del item["options"]
+
             # copy any filter specified options
             filter_class = item["class"]
             if filter_class == "H5Z_FILTER_DEFLATE":
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index be84be92..08af1c16 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -838,7 +838,7 @@ def createDataset(
         if cpl:
             if "filters" in cpl:
                 if self.writer:
-                    supported_filters = self.writer.getSupportedFilters()
+                    supported_filters = self.writer.getFilters()
                 else:
                     supported_filters = ()
                 # validate and normalize supplied filter property list
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 82466013..c029fd01 100755
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -134,6 +134,15 @@ def testFilterValidation(self):
         except ValueError:
             pass  # invalid filter name
 
+        deflate_filter = {'class': 'H5Z_FILTER_DEFLATE', 'id': 1, 'level': 9, 'name': 'deflate'}
+        fletcher_filter = {'class': 'H5Z_FILTER_FLETCHER32', 'id': 3, 'name': 'fletcher32'}
+        filters = [fletcher_filter, deflate_filter]
+        cpl["filters"] = filters
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+
     def testGuessChunk(self):
 
         typesize = "H5T_VARIABLE"
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 5426310d..5b2ff629 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -635,6 +635,51 @@ def testReaderWithUpdate(self):
                     else:
                         self.assertEqual(data[i, j], 0)
 
+    def testCompression(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCompression.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+
+        layout = {"class": "H5D_CHUNKED", "dims": (10, 1)}
+        gzip_filter = {
+            "class": "H5Z_FILTER_DEFLATE",
+            "id": 1,
+            "level": 9,
+            "name": "deflate",
+        }
+        cpl = {"layout": layout, "filters": [gzip_filter, ]}
+        dset_id = db.createDataset(shape=(10, 10), dtype=np.int32, cpl=cpl)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_id, sel_all, arr)
+        db.createHardLink(g1_id, "dset1.1.1", dset_id)
+        db.close()
+
+        # open file with h5py and verify changes
+        with h5py.File(filepath) as f:
+
+            self.assertTrue("g1" in f)
+
+            g1 = f["g1"]
+            self.assertEqual(len(g1), 1)
+            self.assertTrue("dset1.1.1" in g1)
+            dset = g1["dset1.1.1"]
+            self.assertEqual(dset.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    self.assertEqual(dset[i, j], i * j)
+
 
 if __name__ == "__main__":
     # setup test files

From 7ad35b7bcb8629099bf0a06460eb08e6555ad852 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 22 Dec 2025 15:44:44 +0800
Subject: [PATCH 101/129] updates for dataset reads/writes

---
 pyproject.toml                      |   6 +-
 src/h5json/dset_util.py             |   4 +
 src/h5json/h5pystore/h5py_reader.py |   1 -
 src/h5json/h5reader.py              |   6 +-
 src/h5json/h5writer.py              |   3 +-
 src/h5json/hdf5db.py                | 182 +++++++++++++++++-----------
 src/h5json/selections.py            |  23 ++++
 test/unit/h5py_writer_test.py       |   1 +
 test/unit/hdf5db_test.py            |  72 +++++++++++
 9 files changed, 220 insertions(+), 78 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 11302438..a299a9e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,8 @@ classifiers = [
 authors = [{ "name" = "The HDF Group", "email" = "help@hdfgroup.org" }]
 keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
 requires-python = ">=3.9"
+version = "1.0.0"
+
 dependencies = [
     "h5py >= 3.10",
     "numpy >= 2.0; python_version>='3.9'",
@@ -24,7 +26,7 @@ dependencies = [
     "tomli; python_version<'3.11'",
 ]
 
-dynamic = ["version"]
+#dynamic = ["version"]
 
 [project.urls]
 Homepage = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
@@ -44,7 +46,7 @@ dev = ["check-manifest"]
 test = ["coverage"]
 
 [build-system]
-requires = ["setuptools", "setuptools_scm", "wheel"]
+requires = ["setuptools >= 61"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 872e3160..b9b57563 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -40,6 +40,10 @@ def getDatasetLayout(dset_json):
         if "layout" in cp:
             layout = cp["layout"]
 
+    if layout is None and "layout" in dset_json:
+        # previous HSDS versions stored layout here
+        layout = dset_json["layout"]
+
     return layout
 
 
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index fddfedb4..e0d5d825 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -13,7 +13,6 @@
 import numpy as np
 import logging
 from os import stat as os_stat
-import time
 
 from ..objid import createObjId, getCollectionForId
 from ..hdf5dtype import getTypeItem, isOpaqueDtype
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
index a4127097..08df2adb 100644
--- a/src/h5json/h5reader.py
+++ b/src/h5json/h5reader.py
@@ -14,7 +14,6 @@
 
 import logging
 import time
-import numpy as np
 
 from .objid import createObjId
 
@@ -158,10 +157,9 @@ def getDatasetValues(self, obj_id, sel=None, dtype=None):
         number of elements as the rank of the dataset.
         """
 
-        # just return a zero array
-        arr = np.zeros(sel.shape, dtype=dtype)
+        # just return None
 
-        return arr
+        return None
 
     def open(self):
         """ Open data source for reading """
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
index 422a0450..fc368bfe 100644
--- a/src/h5json/h5writer.py
+++ b/src/h5json/h5writer.py
@@ -78,7 +78,8 @@ def open(self):
     @abstractmethod
     def flush(self):
         """ Write dirty items """
-        pass
+        # return False since we can't actually persist anything
+        return False
 
     @abstractmethod
     def close(self):
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 08af1c16..9468c9fe 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -15,6 +15,7 @@
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import resize_dataset
+from .shape_util import getShapeClass, getShapeDims
 from .filters import getFiltersJson
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
@@ -24,6 +25,14 @@
 from .h5writer import H5Writer, H5NullWriter
 
 
+def _getDatasetUpdates(dset_json):
+    """ return a list of value updates for the datset.
+        initalize one if not already present. """
+    if "updates" not in dset_json:
+        dset_json["updates"] = []
+    return dset_json["updates"]
+
+
 class Hdf5db:
     """
     This class is used to manage id lookup tables for primary HDF objects (Groups, Datasets,
@@ -109,10 +118,12 @@ def root_id(self):
 
     def is_new(self, obj_id):
         """ return true if this is a new object (has not been persisted) """
+        obj_id = getHashTagForId(obj_id)
         return obj_id in self._new_objects
 
     def is_dirty(self, obj_id):
         """ return true if this object has been modified """
+        obj_id = getHashTagForId(obj_id)
         if self.is_new(obj_id):
             return True
         return obj_id in self._dirty_objects
@@ -131,7 +142,7 @@ def deleted_objects(self):
 
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
-
+        obj_id = getHashTagForId(obj_id)
         if obj_id not in self.db:
             self.log.error("make dirty called on deleted object")
             raise KeyError(f"obj_id: {obj_id} not found")
@@ -236,8 +247,8 @@ def close(self):
         """ close reader and writer handles """
         self.log.info("Hdf5db __close")
 
-        self.flush()
-        if self.writer:
+        if self.writer and not isinstance(self.writer, H5NullWriter):
+            self.flush()
             self.writer.close()
         if self.reader:
             self.reader.close()
@@ -280,13 +291,13 @@ def _checkWriter(self):
     def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
         self._checkReader()
-        tag = getHashTagForId(obj_id)
-        if tag not in self.db or refresh:
+        obj_id = getHashTagForId(obj_id)
+        if obj_id not in self.db or refresh:
             # load the obj from the reader
             self.log.debug(f"getObjectById - fetching {obj_id} from reader")
             obj_json = self.reader.getObjectById(obj_id)
-            self.db[tag] = obj_json
-        obj_json = self.db[tag]
+            self.db[obj_id] = obj_json
+        obj_json = self.db[obj_id]
 
         return obj_json
 
@@ -299,6 +310,9 @@ def getObjectIdByPath(self, h5path, parent_id=None):
 
         if parent_id is None:
             parent_id = self.root_id
+        else:
+            parent_id = getHashTagForId(parent_id)
+
         self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}")
 
         obj_json = self.getObjectById(parent_id)
@@ -359,7 +373,7 @@ def getObjectByPath(self, path):
         return obj_json
 
     def getDtype(self, obj_json):
-        """ Return numpy data type for given object id
+        """ Return numpy data type for given dataset, datatype, or attribute
         """
 
         if "type" not in obj_json:
@@ -546,30 +560,26 @@ def getDatasetValues(self, dset_id, sel):
         If a slices list or tuple is provided, it should have the same
         number of elements as the rank of the dataset.
         """
+
+        def init_arr(dtype, cpl):
+            """ create an ndarray with the give shape, dtype and fill_value
+                (if the latter is found in the creation properties list) """
+            arr_shape = sel.count if isinstance(sel.count, tuple) else (sel.count, )
+            arr = np.zeros(arr_shape, dtype=dtype)
+            if "fillValue" in cpl:
+                fillValue = cpl["fillValue"]
+                # TBD: fix for compound types
+                arr[...] = fillValue
+            return arr
+
+        dset_id = getHashTagForId(dset_id)
         self.log.info(f"getDatasetValues dset_id: {dset_id}, sel: {sel}")
 
-        self._checkReader()
         dset_json = self.getObjectById(dset_id)
         shape_json = dset_json["shape"]
         if not isinstance(sel, selections.Selection):
             raise TypeError("Expected Selection class")
 
-        if shape_json["class"] == "H5S_NULL":
-            return None
-
-        if shape_json["class"] == "H5S_SCALAR":
-            if sel.select_type != selections.H5S_SELECT_ALL:
-                # TBD: support other selection types
-                raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
-            if sel.shape != ():
-                raise ValueError("Selection shape does not match dataset shape")
-            rank = 0
-        else:
-            dims = tuple(shape_json["dims"])
-            if sel.shape != dims:
-                raise ValueError("Selection shape does not match dataset shape")
-            rank = len(dims)
-
         dtype = self.getDtype(dset_json)
 
         if "creationProperties" in dset_json:
@@ -577,50 +587,72 @@ def getDatasetValues(self, dset_id, sel):
         else:
             cpl = {}
 
-        # determine if we need to make a read request or not
-        if dset_id in self._new_objects:
+        updates = _getDatasetUpdates(dset_json)
+
+        shape_class = getShapeClass(shape_json)
+
+        if shape_class == "H5S_NULL":
+            # return None for selections on null space
+            return None
+
+        if sel.shape != getShapeDims(shape_json):
+            raise ValueError("Selection shape does not match dataset shape")
+
+        if shape_class == "H5S_SCALAR":
+            if sel.select_type != selections.H5S_SELECT_ALL:
+                # TBD: support other selection types
+                raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
+            if sel.shape != ():
+                raise ValueError("Selection shape does not match dataset shape")
+            if updates:
+                # for scalars the update has to be the requested value
+                (update_sel, arr) = updates[-1]
+            elif dset_id in self._new_objects:
+                arr = init_arr(dtype, cpl)
+            else:
+                # fetch from the server
+                arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
+                if arr is None:
+                    raise KeyError(f"Data for dataset {dset_id} not returned")
+            # done with NULL and SCALAR cases
+            return arr
+
+        # simple daaset
+        arr = None
+        fetch = True
+
+        # determine if we need to get data from the reader
+        if isinstance(self._reader, H5NullReader) or dset_id in self._new_objects:
             fetch = False
         else:
-            fetch = True
-            # check against pending updates
-            if "updates" in dset_json:
-                updates = dset_json["updates"]
-                for (update_sel, update_val) in updates:
-                    if selections.contained(sel, update_sel):
-                        fetch = False
-                        break
-
-        # send a reader request unless an update already covers the sel area
-        if fetch:
-            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
-        else:
-            if "fillValue" in cpl:
-                fillValue = cpl["fillValue"]
-                # TBD: fix for compound types
-                arr = np.zeros(sel.mshape, dtype=dtype)
-                arr[...] = fillValue
-            else:
-                arr = np.zeros(sel.mshape, dtype=dtype)
-
-        if "updates" in dset_json:
-            # apply any non-flushed changes that intersect the current selection
-            updates = dset_json["updates"]
             for (update_sel, update_val) in updates:
                 sel_inter = selections.intersect(sel, update_sel)
                 if sel_inter.nselect == 0:
                     continue
-                # update portion of arr, that intersects update_val
-                slices = []
-                for dim in range(rank):
-                    start = sel_inter.start[dim] - sel.start[dim]
-                    stop = start + sel_inter.count[dim]
-                    slices.append(slice(start, stop, 1))
-                slices = tuple(slices)
-                # TBD: needs updating to work in the general case!
-                if slices == ():
-                    arr[slices] = update_val[slices]
-                else:
-                    arr[slices] = update_val
+                if selections.contained(sel, update_sel):
+                    # desired selection is wholly contained in this update
+                    # TBD: determine if multiple updates would contain all the
+                    # required elements
+                    fetch = False
+                    break
+        if fetch:
+            # get last saved version of the data from the reader
+            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
+        else:
+            # initialize an array with fill value if given
+            arr = init_arr(dtype, cpl)
+
+        # apply any updates that impact this selection
+        for (update_sel, update_val) in updates:
+            # get the part of the update that is in common with the requested selection
+            x_sel = selections.intersect(sel, update_sel)
+            if x_sel.nselect == 0:
+                # this update doesn't effect the selection, so ignore
+                continue
+            # apply the update to the array to be returned
+            src_sel = selections.translate(update_sel, x_sel)
+            tgt_sel = selections.translate(sel, x_sel)
+            arr[tgt_sel.slices] = update_val[src_sel.slices]
 
         return arr
 
@@ -641,22 +673,32 @@ def setDatasetValues(self, dset_id, sel, arr):
         src_dt = arr.dtype
         if src_dt != tgt_dt:
             raise TypeError("arr.dtype doesn't match dataset dtype")
-
-        if shape_json["class"] == "H5S_NULL":
+        shape_class = getShapeClass(shape_json)
+        if shape_class == "H5S_NULL":
             raise ValueError("writing to null space dataset not supported")
-        if shape_json["class"] == "H5S_SCALAR":
+        if shape_class == "H5S_SCALAR":
             if sel.shape != ():
                 raise ValueError("Selection shape does not match dataset shape")
             if len(arr.shape) > 0:
                 raise TypeError("Expected scalar ndarray for scalar dataset")
         else:
-            dims = tuple(shape_json["dims"])
+            dims = getShapeDims(shape_json)
             if sel.shape != dims:
                 raise ValueError("Selection shape does not match dataset shape")
-        if "updates" not in dset_json or sel.select_type == selections.H5S_SELECT_ALL:
+        updates = _getDatasetUpdates(dset_json)
+        if sel.select_type == selections.H5S_SELECT_ALL:
             # for select all, throw out any existing updates since this will overwrite them
-            dset_json["updates"] = []
-        updates = dset_json["updates"]
+            updates.clear()
+        arr = arr.copy()  # make a copy in case the client updates it later
+        rank = len(sel.shape)
+        if len(arr.shape) < rank:
+            # reshape to keep compatiblity with dataset rank
+            if sel.select_type == selections.H5S_SELECT_ALL:
+                # this should not result in a dimension reduction
+                raise ValueError("unexpected selection shape")
+            if sel.select_type != selections.H5S_SELECT_HYPERSLABS:
+                raise ValueError("tbd")
+            arr = arr.reshape(sel.mshape)
         updates.append((sel, arr.copy()))
         self.make_dirty(dset_id)
 
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index ec4ac649..93dd8bcb 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -158,6 +158,8 @@ def contained(s1, s2):
 
     is_contained = True
     rank = len(s1.shape)
+    if len(s2.shape) != rank:
+        raise ValueError("contained can be used in selections of different ranks")
     for dim in range(rank):
         if s1.step[dim] > 1 or s2.step[dim] > 1:
             # TBD: do the right thing for stepped selections
@@ -173,6 +175,27 @@ def contained(s1, s2):
     return is_contained
 
 
+def translate(s1, s2):
+    """ Given two selections, s1 and s2, return a new selection
+    definied by s2 relative to s1's stat and count.
+    s2 must be contained in s1 """
+
+    _check_bool_args(s1, s2)
+    sel_inter = intersect(s1, s2)
+    if sel_inter.nselect == 0:
+        raise ValueError("translate - selections not overlapping")
+
+    rank = len(s1.shape)
+
+    slices = []
+    for dim in range(rank):
+        start = s2.start[dim] - s1.start[dim]
+        count = s2.count[dim]
+        slices.append(slice(start, start + count, 1))
+    slices = tuple(slices)
+    return select(s1.shape, slices)
+
+
 class Selection(object):
 
     """
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 5b2ff629..f0091a39 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -58,6 +58,7 @@ def testOpen(self):
         self.assertEqual(db.getObjectIdByPath("/"), root_id)
         db.close()
         self.assertTrue(db.closed)
+        self.assertTrue(db.writer.isClosed())
         obj_id = db.open()
         self.assertEqual(obj_id, root_id)
         db.close()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 63030ef2..11bdd30b 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -432,20 +432,85 @@ def testSimpleDataset(self):
         db.createAttribute(dset_id, "a1", "Hello, world")
         sel_all = selections.select(shape, ...)
         arr = db.getDatasetValues(dset_id, sel_all)
+
         self.assertEqual(arr.dtype, dtype)
         self.assertEqual(arr.shape, shape)
         self.assertEqual(arr.min(), 0)
         self.assertEqual(arr.max(), 0)
         row = np.zeros((ncols,), dtype=dtype)
+
+        # set values row by row
         for i in range(nrows):
             row[:] = list(range(i * 10, (i + 1) * 10))
             row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
             db.setDatasetValues(dset_id, row_sel, row)
+
+        # read entire dataset
         arr = db.getDatasetValues(dset_id, sel_all)
         for i in range(nrows):
             row = np.array(list(range(i * 10, (i + 1) * 10)), dtype=dtype)
             np.testing.assert_array_equal(arr[i, :], row)
 
+        # read row by row
+        for i in range(nrows):
+            sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
+            row = db.getDatasetValues(dset_id, sel)
+            self.assertTrue(isinstance(row, np.ndarray))
+            self.assertEqual(row.shape, (1, ncols))
+            for j in range(ncols):
+                self.assertEqual(row[0, j], i * 10 + j)
+
+        # read col by col
+        for j in range(ncols):
+            sel = selections.select(shape, (slice(0, ncols), slice(j, j + 1)))
+            col = db.getDatasetValues(dset_id, sel)
+            self.assertTrue(isinstance(col, np.ndarray))
+            self.assertEqual(col.shape, (nrows, 1))
+            for i in range(nrows):
+                self.assertEqual(col[i, 0], i * 10 + j)
+
+        # read element by element
+        for i in range(nrows):
+            for j in range(ncols):
+                sel = selections.select(shape, (slice(i, i + 1), slice(j, j + 1)))
+                val = db.getDatasetValues(dset_id, sel)
+                self.assertTrue(isinstance(val, np.ndarray))
+                self.assertEqual(val.shape, (1, 1))
+                self.assertEqual(val[0, 0], i * 10 + j)
+
+        db.close()
+
+    def testStringDataset(self):
+        nrows = 6
+        ncols = 3
+        shape = (nrows, ncols)
+        dtype = np.dtype("S1")
+        data = [[b'a', b'b', b'c'],
+                [b'd', b'e', b'f'],
+                [b'g', b'h', b'i'],
+                [b'j', b'k', b'l'],
+                [b'm', b'n', b'o'],
+                [b'x', b'y', b'z']]
+        init_arr = np.array(data, dtype=dtype)
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+
+        db.setDatasetValues(dset_id, sel_all, init_arr)
+
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue(np.array_equal(arr, init_arr))
+        sel_one = selections.select(shape, (slice(5, 6), slice(2, 3)))
+        arr = db.getDatasetValues(dset_id, sel_one)
+        self.assertEqual(arr.shape, (1, 1))
+        self.assertEqual(arr[0, 0], b'z')
+
         db.close()
 
     def testBoolDataset(self):
@@ -473,6 +538,13 @@ def testBoolDataset(self):
         self.assertEqual(arr.shape, (3,))
         self.assertEqual(list(arr[...]), [False, True, False])
 
+        # read back three elements
+        sel_three = selections.select(shape, slice(1, 4))
+        arr = db.getDatasetValues(dset_id, sel_three)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, (3,))
+        self.assertEqual(list(arr[...]), [True, False, False])
+
         db.close()
 
     def testScalarDataset(self):

From 8bb734e6b347426e0e9eb3ce0861f2b7ea870c85 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 24 Dec 2025 20:47:58 +0800
Subject: [PATCH 102/129] update filter methods

---
 src/h5json/dset_util.py      | 345 ++++++++++++++++----------------
 src/h5json/filters.py        | 377 +++++++++++++++++------------------
 src/h5json/hdf5db.py         |   5 +-
 src/h5json/shape_util.py     | 104 +++++++++-
 test/unit/dset_util_test.py  |   4 +-
 test/unit/filter_test.py     |  98 +++++++++
 test/unit/hdf5db_test.py     |   0
 test/unit/hdf5dtype_test.py  |   0
 test/unit/objid_test.py      |   0
 test/unit/shape_util_test.py |  41 +++-
 10 files changed, 593 insertions(+), 381 deletions(-)
 mode change 100755 => 100644 test/unit/dset_util_test.py
 create mode 100644 test/unit/filter_test.py
 mode change 100755 => 100644 test/unit/hdf5db_test.py
 mode change 100755 => 100644 test/unit/hdf5dtype_test.py
 mode change 100755 => 100644 test/unit/objid_test.py
 mode change 100755 => 100644 test/unit/shape_util_test.py

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index b9b57563..ffcf0147 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -12,13 +12,14 @@
 
 import math
 from .hdf5dtype import getItemSize, createDataType
-from .shape_util import getDataSize
+from .shape_util import getDataSize, getShapeClass, getNumElements, getShapeDims
+from .shape_util import isExtensible, getMaxDims, getRank
 from .array_util import getNumpyValue
-from .filters import getFiltersJson
+from .filters import validateFilters
 from .objid import isValidUuid
 
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
-CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
+CHUNK_MAX = 8096 * 1024  # Hard upper limit (2M)
 
 
 LAYOUT_CLASSES = (
@@ -57,6 +58,33 @@ def getDatasetLayoutClass(dset_json):
     return layout_class
 
 
+def estimateDatasetSize(shape_json, item_size, chunk_min=CHUNK_MIN):
+    """ Get the dataset size in bytes.  Make a reasonable guess
+     for extensible datasets """
+
+    shape_class = getShapeClass(shape_json)
+    if shape_class == "H5S_NULL":
+        return 0
+    if shape_class == "H5S_SCALAR":
+        return item_size
+    if "maxdims" not in shape_json:
+        # can just multiple item_size by the number of elements
+        return item_size * getNumElements(shape_json)
+    max_dims = getMaxDims(shape_json)
+    rank = getRank(shape_json)
+    nsize = item_size
+    for dim in range(rank):
+        extent = max_dims[dim]
+        if extent not in (0, "H5S_UNLIMITED"):
+            nsize *= extent
+    # if the current size is less than min_chunk size,
+    # return something just larger than min_chunk_size
+    if chunk_min and nsize < chunk_min:
+        nsize = chunk_min
+        nsize = -(-nsize // item_size) * item_size  # round up to be divisible by item_size
+    return nsize
+
+
 def resize_dataset(dset_json, shape):
     """ Update shape dims to the given shape provided new shape is valid for maxdims """
     shape_json = dset_json["shape"]
@@ -88,7 +116,7 @@ def resize_dataset(dset_json, shape):
 
 def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
     """
-    create a chunk layout for datasets use contiguous storage.
+    create a chunk layout for datasets using contiguous storage.
     """
     if not isinstance(item_size, int):
         msg = "ContiguousLayout can only be used with fixed-length types"
@@ -154,49 +182,6 @@ def getChunkSize(chunk_dims, type_size: int = 1):
     return chunk_size
 
 
-def isExtensible(dims, maxdims):
-    """
-    Determine if the dataset can be extended
-    """
-    if maxdims is None or len(dims) == 0:
-        return False
-    rank = len(dims)
-    if len(maxdims) != rank:
-        raise ValueError("rank of maxdims does not match dataset")
-    for n in range(rank):
-        if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
-            return True
-    return False
-
-
-def getDsetMaxDims(dset_json):
-    """
-    Get maxdims from a given shape.  Return [1,] for Scalar datasets
-
-    Use with H5S_NULL datasets will throw a ValueError
-    """
-    if "shape" not in dset_json:
-        msg = "No shape found in dset_json"
-        raise KeyError(msg)
-    shape_json = dset_json["shape"]
-    shape_class = shape_json["class"]
-    maxdims = None
-    if shape_class == "H5S_NULL":
-        msg = "Expected shape class other than H5S_NULL"
-        raise ValueError(msg)
-    elif shape_class == "H5S_SCALAR":
-        maxdims = [1,]
-    elif shape_class == "H5S_SIMPLE":
-        if "maxdims" in shape_json:
-            maxdims = shape_json["maxdims"]
-        else:
-            maxdims = shape_json["dims"]
-    else:
-        msg = f"Unexpected shape class: {shape_class}"
-        raise ValueError(msg)
-    return tuple(maxdims)
-
-
 def getChunkDims(dset_json):
     """Get chunk layout.  Return shape dims for non-chunked layout"""
 
@@ -423,17 +408,17 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
         layout_class = layout_json["class"]
 
     if "filters" in creation_props:
+        filters = creation_props["filters"]
         try:
-            filters_out = getFiltersJson(creation_props)
-        except (KeyError, TypeError, ValueError):
+            validateFilters(filters)
+        except (KeyError, TypeError, ValueError) as e:
             # raise bad request exception if not valid
-            msg = "invalid filter provided"
+            msg = f"invalid filter provided: {str(e)}"
+            raise ValueError(msg)
+        # check that a chunked layout is used
+        if layout_class and layout_class.startswith("H5D_CHUNKED") is False:
+            msg = "filters can only be used with chunked layout"
             raise ValueError(msg)
-        if filters_out:
-            # check that a chunked layout is used
-            if layout_class is None or layout_class.startswith("H5D_CHUNKED") is False:
-                msg = "filters can only be used with chunked layout"
-                raise ValueError(msg)
 
 
 def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
@@ -540,7 +525,7 @@ def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX):
     return tuple(layout)
 
 
-def guessChunk(shape_json, typesize, chunk_min=None, chunk_max=None):
+def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
     """Guess an appropriate chunk layout for a dataset, given its shape and
     the size of each element in bytes.  Will allocate chunks only as large
     as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
@@ -548,11 +533,17 @@ def guessChunk(shape_json, typesize, chunk_min=None, chunk_max=None):
 
     Undocumented and subject to change without warning.
     """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
+    if shape is None or isinstance(shape, dict) and shape.get("class") == "H5S_NULL":
         return None
-    if shape_json["class"] == "H5S_SCALAR":
+    if isinstance(shape, dict) and shape.get("class") == "H5S_SCALAR":
         return (1,)  # just enough to store one item
 
+    # if we are passed shape as a tuple, create an shape json using H5S_SIMPLE
+    if isinstance(shape, (list, tuple)):
+        shape_json = {"class": "H5S_SIMPLE", "dims": shape}
+    else:
+        shape_json = shape
+
     if "maxdims" in shape_json:
         shape = shape_json["maxdims"]
     else:
@@ -575,131 +566,133 @@ def guessChunk(shape_json, typesize, chunk_min=None, chunk_max=None):
     return shape
 
 
-def getLayoutJson(creation_props,
-                  shape=None,
-                  type_json=None,
-                  chunk_min=CHUNK_MIN,
-                  chunk_max=CHUNK_MAX,
-                  max_chunks_per_folder=0):
-    """ Get the layout json given by creation_props.
-        Raise value error if invalid """
+def generateLayout(
+        shape_json,
+        item_size=0,
+        has_filter=False,
+        chunks=None,
+        chunk_min=CHUNK_MIN,
+        chunk_max=CHUNK_MAX,
+        max_chunks_per_folder=0
+):
 
-    item_size = getItemSize(type_json)
+    """ Create a dataset layout based on type and shape properties  """
+
+    if item_size < 0:
+        raise ValueError("item_size is invalid")
+
+    shape_class = getShapeClass(shape_json)
+    if shape_class == "H5S_NULL":
+        if chunks or has_filter:
+            raise ValueError("Null space datasets do not support chunking")
+        return {}
+
+    if shape_class == "H5S_SCALAR":
+        if chunks or has_filter:
+            raise ValueError("Scalar datasets do not support chunking")
+        return {"class": "H5D_CONIGUOUS"}
 
     if chunk_min > chunk_max:
         msg = "chunk_max must be larger than chunk_min"
         raise ValueError(msg)
 
-    layout = None
-    if "layout" in creation_props:
-        layout_props = creation_props["layout"]
-    else:
-        layout_props = None
-
-    if layout_props:
-        if "class" not in layout_props:
-            msg = "expected class key in layout props"
-            raise KeyError(msg)
-        layout_class = layout_props["class"]
-        if layout_class == "H5D_CONTIGUOUS":
-            # treat contiguous as chunked
-            layout_class = "H5D_CHUNKED"
-        else:
-            layout_class = layout_props["class"]
-    elif shape["class"] != "H5S_NULL":
-        layout_class = "H5D_CHUNKED"
-    else:
-        layout_class = None
-
-    if layout_class == "H5D_COMPACT":
-        layout = {"class": "H5D_COMPACT"}
-    elif layout_class:
-        # initialize to H5D_CHUNKED
-        layout = {"class": "H5D_CHUNKED"}
-    else:
-        # null space - no layout
-        layout = None
+    dset_size = estimateDatasetSize(shape_json, item_size, chunk_min=chunk_min)
+    shape_dims = getShapeDims(shape_json)
+    rank = len(shape_dims)
+    max_dims = getMaxDims(shape_json)
+    extensible = isExtensible(shape_dims, max_dims)
 
-    if layout_props and "dims" in layout_props:
-        chunk_dims = layout_props["dims"]
-    else:
-        chunk_dims = None
+    if dset_size < chunk_min and not extensible and not has_filter and not chunks:
+        # can just return a contiguous layout
+        return {"class": "H5D_CONTIGUOUS"}
 
-    if layout_class == "H5D_CONTIGUOUS_REF":
+    layout = {"class": "H5D_CHUNKED"}  # otherwise use chunked layout
+    chunk_dims = None
+    if chunks:
+        if isinstance(chunks, (tuple, list)):
+            chunk_dims = chunks
+            if len(chunk_dims) != rank:
+                raise ValueError("given chunk dims do not agree with dataset rank")
+    if not chunk_dims:
         kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
-        chunk_dims = getContiguousLayout(shape, item_size, **kwargs)
-        layout["dims"] = chunk_dims
-
-    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
-        # do auto-chunking
-        chunk_dims = guessChunk(shape, item_size)
-
-    if layout_class == "H5D_CHUNKED":
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        # adjust the chunk shape if chunk size is too small or too big
-        adjusted_chunk_dims = None
-        if chunk_size < chunk_min:
-            kwargs = {"chunk_min": chunk_min, "layout_class": layout_class}
-            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape, **kwargs)
-        elif chunk_size > chunk_max:
-            kwargs = {"chunk_max": chunk_max}
-            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
-        if adjusted_chunk_dims:
-            layout["dims"] = adjusted_chunk_dims
-        else:
-            layout["dims"] = chunk_dims  # don't need to adjust chunk size
-
-        # set partition_count if needed:
-        set_partition = False
-        if max_chunks_per_folder > 0:
-            if "dims" in shape and "dims" in layout:
-                set_partition = True
-
-        if set_partition:
-            chunk_dims = layout["dims"]
-            shape_dims = shape["dims"]
-            if "maxdims" in shape:
-                max_dims = shape["maxdims"]
-            else:
-                max_dims = None
-            num_chunks = 1
-            rank = len(shape_dims)
-            unlimited_count = 0
-            if max_dims:
-                for i in range(rank):
-                    if max_dims[i] == 0:
-                        unlimited_count += 1
-            for i in range(rank):
-                max_dim = 1
-                if max_dims:
-                    max_dim = max_dims[i]
-                    if max_dim == 0:
-                        # don't really know what the ultimate extent
-                        # could be, but assume 10^6 for total number of
-                        # elements and square-shaped array...
-                        MAX_ELEMENT_GUESS = 10.0 ** 6
-                        exp = 1 / unlimited_count
-                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
-                else:
-                    max_dim = shape_dims[i]
-                num_chunks *= math.ceil(max_dim / chunk_dims[i])
-
-            if num_chunks > max_chunks_per_folder:
-                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
-                msg = f"set partition count to: {partition_count}, "
-                msg += f"num_chunks: {num_chunks}"
-                layout["partition_count"] = partition_count
+        chunk_dims = getChunkDims(shape_json, item_size, **kwargs)
+    layout["dims"] = chunk_dims
+
+    # set partition_count if needed:
+    if max_chunks_per_folder > 0:
+        num_chunks = 1
+        rank = len(shape_dims)
+        unlimited_count = 0
+        for dim in range(rank):
+            if max_dims[dim] in (0, "H5S_UNLIMITED"):
+                unlimited_count += 1
+        for dim in range(rank):
+            max_dim = 1
+            max_dim = max_dims[dim]
+            if max_dim in (0, "H5S_UNLIMITED"):
+                # don't really know what the ultimate extent
+                # could be, but assume 10^6 for total number of
+                # elements and square-shaped array...
+                MAX_ELEMENT_GUESS = 10.0 ** 6
+                exp = 1 / unlimited_count
+                max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
             else:
-                pass  # partition not needed
-
-    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        # nothing to do about inefficiently small chunks, but large chunks
-        # can be subdivided
-        if chunk_size < chunk_min:
-            pass  # too small
-        elif chunk_size > chunk_max:
-            pass  # too large
-        layout["dims"] = chunk_dims
+                max_dim = shape_dims[dim]
+            num_chunks *= math.ceil(max_dim / chunk_dims[dim])
+
+        if num_chunks > max_chunks_per_folder:
+            partition_count = math.ceil(num_chunks / max_chunks_per_folder)
+            layout["partition_count"] = partition_count
+        else:
+            pass  # partition not needed
+    return layout
+
+
+def generate_dcpl(
+    shape_json,
+    dtype,
+    chunks=None,
+    filters=[],
+    chunk_min=CHUNK_MIN,
+    chunk_max=CHUNK_MAX,
+    max_chunks_per_folder=None,
+    initializer=None,
+    initializer_opts=None
+):
+    """Generate a dataset creation property list.
+
+    """
+
+    plist = {}
+
+    shape_class = getShapeClass(shape_json)
+
+    if shape_class != "H5S_SIMPLE":
+        if chunks or filters:
+            raise TypeError(f"{shape_class} datasets don't support chunk/filter options")
+
+        return plist  # return empty property list for non-simple datasets
+
+    validateFilters(filters)  # check filter params if any
+
+    # End argument validation
+
+    kwargs = {"item_size": dtype.itemsize, "has_filter": filters}
+    kwargs["chunks"] = chunks
+    kwargs["chunk_min"] = chunk_min
+    kwargs["chunk_max"] = chunk_max
+    kwargs["max_chunks_per_folder"] = max_chunks_per_folder
+    plist["layout"] = generateLayout(shape_json, **kwargs)
+
+    if len(filters) > 0:
+        plist["filters"] = filters
+
+    if initializer:
+        # TBD: this needs to be documented in the json spec
+        # pass in initializer options
+        initializer = [initializer,]
+        if initializer_opts:
+            initializer.extend(initializer_opts)
+        plist["initializer"] = initializer
+
+    return plist
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 724ac929..3ddfe3f5 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -14,13 +14,17 @@
 
 from .hdf5dtype import isVlen
 
+DEFAULT_GZIP = 4
+DEFAULT_SZIP = 4
+SO_INT_MINBITS_DEFAULT = 0
+
 # List of registered filters.  Not all are supported by every reader and writer.
 #
 #
 # tuple of filter key, filter id, and options,
 FILTER_DEFS = (
     ("H5Z_FILTER_NONE", 0, "none", ()),
-    ("H5Z_FILTER_DEFLATE", 1, "gzip", ("level",)),  # aka as "zlib" for blosc
+    ("H5Z_FILTER_DEFLATE", 1, "gzip", ("level",)),  # aka as "default" or "zlib" for blosc
     ("H5Z_FILTER_SHUFFLE", 2, "shuffle", ()),
     ("H5Z_FILTER_FLETCHER32", 3, "fletcher32", ()),
     ("H5Z_FILTER_SZIP", 4, "szip", ("bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine")),
@@ -85,143 +89,193 @@ def getAllFilterNames():
     return tuple(names)
 
 
-def getFilterItem(key):
+def getFilterItem(name, options={}):
     """
     Return filter code, id, and name, based on an id, a name or a code.
     """
-
-    if key == "deflate":
-        key = "gzip"  # use gzip as equivalent
+    # is key is dict, just verify it's a valid filter and return
+    filter_json = None
+
+    if isinstance(name, dict):
+        filter_json = name
+        base_keys = ("class", "id", "name")
+        for key in base_keys:
+            if key not in filter_json:
+                raise KeyError(f"Expected {key} for filter")
+        # use class key to look up options
+        name = filter_json["class"]
+    elif name in ("deflate", "zlib"):
+        name = "gzip"  # use gzip as equivalent
+
+    option_set = None
     for item in FILTER_DEFS:
         # check for a match by key, id, or alias (the first three elements)
         for i in range(3):
-            if key == item[i]:
-                return {"class": item[0], "id": item[1], "name": item[2], "options": item[3]}
-    return None  # not found
-
+            if name == item[i]:
+                if filter_json is None:
+                    filter_json = {"class": item[0], "id": item[1], "name": item[2]}
+                option_set = set(item[3])
+                break
+
+    if not filter_json and isinstance(name, int) and name > 32000:
+        filter_json = {"class": "H5Z_FILTER_USER", "id": name, "name": f"user filter {name}"}
+
+    if not filter_json:
+        raise KeyError(f"filter {name} is unknown")
+
+    filter_class = filter_json["class"]
+    if filter_class == "H5Z_FILTER_USER":
+        option_set = set()
+        option_set.add("parameters")
+
+    # check that any option supplied is supported by the filter
+    for key in options:
+        if key not in option_set:
+            msg = f"Option {key} is not supported by the {filter_class} filter"
+            raise KeyError(msg)
+
+    # for any supplied options verify they are correct type and range
+    # (raise Type or Value error if not).  If option is not given, use
+    # the default value if not.  Finally add options to the filter_json
+
+    if filter_class == "H5Z_FILTER_DEFLATE":
+        if "level" in options:
+            level_val = options["level"]
+            if not isinstance(level_val, int):
+                msg = "Expected integer level for deflate filter"
+                raise TypeError(msg)
+            if level_val < 0 or level_val > 9:
+                msg = "Deflate filter level must be between 0 and 9"
+                raise ValueError(msg)
+            filter_json["level"] = level_val
+        else:
+            filter_json["level"] = DEFAULT_GZIP
+
+    elif filter_class == "H5Z_FILTER_SHUFFLE":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_FLETCHER32":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_SZIP":
+        for key in option_set:        # option set("bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"):
+            if key in options:
+                val = options[key]
+                if key == "coding":
+                    if val not in HDF_FILTER_OPTION_ENUMS["coding"].values():
+                        msg = f"Invalid coding option for szip filter: {val}"
+                        raise ValueError(msg)
+                else:
+                    # other options need to be positivie integers
+                    if not isinstance(val, int) or val <= 0:
+                        msg = f"Expected positive integer for szip filter option {key}"
+                        raise ValueError(msg)
+                filter_json[key] = val
+            else:
+                pass  # no defaults for szip
+    elif filter_class == "H5Z_FILTER_NBIT":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_SCALEOFFSET":
+        if "scaleType" in options:
+            val = options["scaleType"]
+            if val not in HDF_FILTER_OPTION_ENUMS["scaleType"].values():
+                msg = f"Invalid scaleType option for scaleoffset filter: {val}"
+                raise ValueError(msg)
 
-def getFiltersJson(create_props, supported_filters=None):
-    """ return standardized filter representation from creation properties
-        raise bad request if invalid """
+            filter_json["scaleType"] = val
+        if "scaleOffset" in options:
+            val = options["scaleOffset"]
+            if not isinstance(val, int) or val < 0:
+                msg = "Expected non-negative integer for scaleOffset option"
+                raise ValueError(msg)
+            filter_json["scaleOffset"] = val
+    elif filter_class == "H5Z_FILTER_LZF":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_BLOSC":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_SNAPPY":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_LZ4":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_LZ4HC":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_BITSHUFFLE":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_ZSTD":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_NONE":
+        pass  # no options
+    elif filter_class == "H5Z_FILTER_USER":
+        if "parameters" in options:
+            parameters = options["parameters"]
+            # expecting a positive integer array
+            if not isinstance(parameters, (list, tuple)):
+                raise TypeError(f"filter {filter_class} parameters option should be a list")
+            vals = []
+            for val in parameters:
+                if not isinstance(val, int):
+                    raise TypeError(f"filter {filter_class} parameters expected integer value")
+                if val <= 0:
+                    raise TypeError(f"filter {filter_class} parameters option should be a positive int")
+                vals.append(val)
+            filter_json["parameters"] = val
+    else:
+        msg = f"filter class {filter_class} is not supported"
+        raise KeyError(msg)
+
+    return filter_json
+
+
+def validateFilter(filter_json, supported_filters=None):
+    """ Check the given the given filter for create format,
+        required options set.  Raise TypeError, KeyError or ValueError if not.
+        If supported_filters is supplied, raise KeyError if a non-supported
+        filter is supplied. """
+
+    if not isinstance(filter_json, dict):
+        raise TypeError(f"Expected dict for filter but got {type(filter_json)}")
+    base_keys = ("class", "id", "name")
+    for key in base_keys:
+        if key not in filter_json:
+            raise KeyError(f"Expected {key} for filter")
+    filter_class = filter_json["class"]
+    filter_id = filter_json["id"]
+    # check that the filter_class agrees with the id in FILTER_DEFS
+    options = None
+    for filter_def in FILTER_DEFS:
+        if filter_def[0] == filter_class:
+            if filter_id != filter_def[1]:
+                msg = f"Incorrect filter_id: {filter_id} for filter: {filter_class}"
+                raise ValueError(msg)
+            # collect any filter options to check later
+            options = {}
+            for key in filter_json:
+                if key in base_keys:
+                    continue
+                options[key] = filter_json[key]
+            break
 
-    # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
-    # filters.html#grammar-token-filter_list
+    if options is None and filter_class == "H5Z_FILTER_USER":
+        # custom filter, id should be > 32000
+        if filter_id <= 32000:
+            raise ValueError(f"Unexpected filter id: {filter_id} for user filter")
+        options = {}
+        for key in filter_json:
+            if key in base_keys:
+                continue
+            options[key] = filter_json[key]
 
-    if "filters" not in create_props:
-        return {}  # null set
+    if options is None:
+        raise KeyError(f"Unknown filter: {filter_class}")
 
-    f_in = create_props["filters"]
+    # will raise error if any option is invalid
+    getFilterItem(filter_json, options)
 
-    if not isinstance(f_in, list):
-        msg = "Expected filters in creation_props to be a list"
-        raise TypeError(msg)
 
-    if not supported_filters:
-        supported_filters = getAllFilterNames()
+def validateFilters(filters, supported_filters=None):
+    """ validate each filter in the filter list """
 
-    f_out = []
-    for filter in f_in:
-        if isinstance(filter, int) or isinstance(filter, str):
-            item = getFilterItem(filter)
-            if not item:
-                msg = f"filter {filter} not recognized"
-                raise ValueError(msg)
-
-            if item["name"] not in supported_filters:
-                msg = f"filter {filter} is not supported"
-                raise ValueError(msg)
-            f_out.append(item)
-        elif isinstance(filter, dict):
-            if filter.get("class") == "H5Z_FILTER_USER":
-                # user filter - must have either id or name
-                if "id" not in filter and "name" not in filter:
-                    msg = "user filter must have either 'id' or 'name' key"
-                    raise KeyError(msg)
-                item = filter
-            elif "id" in filter:
-                item = getFilterItem(filter["id"])
-            elif "name" in filter:
-                item = getFilterItem(filter["name"])
-            else:
-                item = None
-            if not item:
-                msg = f"filter {filter} not recognized"
-                raise ValueError(msg)
-
-            # will replace options list with specified options
-            del item["options"]
-
-            # copy any filter specified options
-            filter_class = item["class"]
-            if filter_class == "H5Z_FILTER_DEFLATE":
-                if "level" in filter:
-                    level_val = filter["level"]
-                    if not isinstance(level_val, int):
-                        msg = "Expected integer level for deflate filter"
-                        raise TypeError(msg)
-                    if level_val < 0 or level_val > 9:
-                        msg = "Deflate filter level must be between 0 and 9"
-                        raise ValueError(msg)
-                    item["level"] = level_val
-            elif filter_class == "H5Z_FILTER_SHUFFLE":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_FLETCHER32":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_SZIP":
-                for key in ("bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"):
-                    if key in filter:
-                        val = filter[key]
-                        if key == "coding":
-                            if val not in HDF_FILTER_OPTION_ENUMS["coding"].values():
-                                msg = f"Invalid coding option for szip filter: {val}"
-                                raise ValueError(msg)
-                            else:
-                                # other options need to be positivie integers
-                                if not isinstance(val, int) or val <= 0:
-                                    msg = f"Expected positive integer for szip filter option {key}"
-                                    raise ValueError(msg)
-                        item[key] = val
-            elif filter_class == "H5Z_FILTER_NBIT":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_SCALEOFFSET":
-                if "scaleType" in filter:
-                    val = filter["scaleType"]
-                    if val not in HDF_FILTER_OPTION_ENUMS["scaleType"].values():
-                        msg = f"Invalid scaleType option for scaleoffset filter: {val}"
-                        raise ValueError(msg)
-                    else:
-                        item["scaleType"] = val
-                if "scaleOffset" in filter:
-                    val = filter["scaleOffset"]
-                    if not isinstance(val, int) or val < 0:
-                        msg = "Expected non-negative integer for scaleOffset option"
-                        raise ValueError(msg)
-                    else:
-                        item["scaleOffset"] = val
-            elif filter_class == "H5Z_FILTER_LZF":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_BLOSC":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_SNAPPY":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_LZ4":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_LZ4HC":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_BITSHUFFLE":
-                pass  # no options
-            elif filter_class == "H5Z_FILTER_ZSTD":
-                pass  # no options
-            else:
-                msg = f"filter class {filter_class} is not supported"
-                raise KeyError(msg)
-            f_out.append(item)
-        else:
-            msg = f"Unexpected type for filter: {filter}"
-            raise ValueError(msg)
-
-    # return standardized filter representation
-    return f_out
+    # TBD: check given order of filters is supported
+    for filter_json in filters:
+        validateFilter(filter_json, supported_filters=supported_filters)
 
 
 def getFilters(dset_json):
@@ -235,72 +289,11 @@ def getFilters(dset_json):
     return filters
 
 
-def getCompressionFilter(filters):
-    """Return compression filter from filters, or None"""
-    for filter in filters:
-        if "class" not in filter:
-            # expected class key - malformed filter def
-            continue
-        filter_class = filter["class"]
-        if filter_class in COMPRESSION_FILTER_IDS:
-            return filter
-        if all(
-            (
-                filter_class == "H5Z_FILTER_USER",
-                "name" in filter,
-                filter["name"] in COMPRESSION_FILTER_NAMES,
-            )
-        ):
-            return filter
-    return None
-
-
-def getShuffleFilter(filters):
-    """Return shuffle filter, or None"""
-    FILTER_CLASSES = ("H5Z_FILTER_SHUFFLE", "H5Z_FILTER_BITSHUFFLE")
-    for filter in filters:
-        if "class" not in filter:
-            # invalid filter def?
-            continue
-        filter_class = filter["class"]
-        if filter_class in FILTER_CLASSES:
-            return filter
-
-    return None
-
-
-def getFilterOps(filters, dtype=None):
-    """Get list of filter operations to be used for this dataset"""
-
-    compressionFilter = getCompressionFilter(filters)
-
-    filter_ops = {}
-
-    shuffleFilter = getShuffleFilter(filters)
-
-    if shuffleFilter and not isVlen(dtype):
-        shuffle_name = shuffleFilter["name"]
-        if shuffle_name == "shuffle":
-            filter_ops["shuffle"] = 1  # use regular shuffle
-        elif shuffle_name == "bitshuffle":
-            filter_ops["shuffle"] = 2  # use bitshuffle
-        else:
-            filter_ops["shuffle"] = 0  # no shuffle
-    else:
-        filter_ops["shuffle"] = 0  # no shuffle
+def isCompressionFilter(filter):
+    filter_json = getFilterItem(filter)
+    return filter_json["class"] in COMPRESSION_FILTER_IDS
 
-    """ return list of filter operations for this dataset """
-    if compressionFilter:
-        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
-            filter_ops["compressor"] = "zlib"  # blosc compressor
-        else:
-            if "name" in compressionFilter:
-                filter_ops["compressor"] = compressionFilter["name"]
-            else:
-                filter_ops["compressor"] = "lz4"  # default to lz4
-        if "level" not in compressionFilter:
-            filter_ops["level"] = 5  # medium level
-        else:
-            filter_ops["level"] = int(compressionFilter["level"])
 
-    return filter_ops
+def getCompressionFilter(filters):
+    """Return compression filter ids from filters, or None"""
+    return COMPRESSION_FILTER_IDS
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 9468c9fe..7982a926 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -16,7 +16,7 @@
 from .array_util import jsonToArray, bytesArrayToList
 from .dset_util import resize_dataset
 from .shape_util import getShapeClass, getShapeDims
-from .filters import getFiltersJson
+from .filters import validateFilters
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
 from .time_util import getNow
@@ -884,8 +884,7 @@ def createDataset(
                 else:
                     supported_filters = ()
                 # validate and normalize supplied filter property list
-                filters_json = getFiltersJson(cpl, supported_filters=supported_filters)
-                cpl["filters"] = filters_json
+                validateFilters(cpl["filters"], supported_filters=supported_filters)
             dset_json["creationProperties"] = cpl
         else:
             dset_json["creationProperties"] = {}
diff --git a/src/h5json/shape_util.py b/src/h5json/shape_util.py
index a3531cde..cbc6a8fe 100644
--- a/src/h5json/shape_util.py
+++ b/src/h5json/shape_util.py
@@ -13,20 +13,20 @@
 import numpy as np
 
 
-def getShapeClass(shape):
+def getShapeClass(obj_json):
     """ Return shape class of the given data shape """
 
-    if not isinstance(shape, dict):
+    if not isinstance(obj_json, dict):
         raise TypeError("expected dict object")
 
-    if shape.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
+    if obj_json.get("class") in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
         # this is a shape_json obj
-        shape_json = shape
-    elif "shape" in shape:
+        shape_json = obj_json
+    elif "shape" in obj_json:
         # dataset or attribute json
-        shape_json = shape["shape"]
+        shape_json = obj_json["shape"]
     else:
-        raise ValueError(f"Unknown shape: {shape}")
+        raise ValueError(f"Unknown shape: {obj_json}")
 
     if "class" not in shape_json:
         raise KeyError("expected 'class' key for data shape")\
@@ -34,6 +34,33 @@ def getShapeClass(shape):
     return shape_json["class"]
 
 
+def getShapeJson(dims, maxdims=None):
+    """ create a new shape_json based on dims and
+        optionally maxdims (the later only applies to
+        datasets) """
+    if isinstance(dims, int):
+        dims = (dims, )
+    if isinstance(maxdims, int):
+        maxdims = (maxdims, )
+    if dims is None:
+        shape_class = "H5S_NULL"
+    elif len(dims) == 0:
+        shape_class = "H5S_SCALAR"
+    else:
+        shape_class = "H5S_SIMPLE"
+    if maxdims is not None:
+        if shape_class != "H5S_SIMPLE":
+            raise ValueError(f"maxdims can not be used with shape class: {shape_class}")
+        if len(maxdims) != len(dims):
+            raise ValueError("maxdims must match dataspace rank")
+    shape_json = {"class": shape_class}
+    if shape_class == "H5S_SIMPLE":
+        shape_json["dims"] = dims
+    if maxdims is not None:
+        shape_json["maxdims"] = maxdims
+    return shape_json
+
+
 def getShapeDims(shape):
     """
     Get dims from a given shape json.  Return [1,] for Scalar datasets,
@@ -139,3 +166,66 @@ def getDataSize(shape, type_size: int = 1):
         return 0
     else:
         return type_size * int(np.prod(dims))
+
+
+def isExtensible(obj_json):
+    """
+    Determine if the dataset can be extended
+    """
+
+    if "shape" in obj_json:
+        # assume dataset or attribute json
+        shape_json = obj_json["shape"]
+    else:
+        shape_json = obj_json
+    shape_class = getShapeClass(shape_json)
+    if shape_class != "H5S_SIMPLE":
+        return False
+
+    if "maxdims" not in shape_json:
+        return False
+
+    dims = shape_json["dims"]
+    maxdims = shape_json["maxdims"]
+    rank = len(dims)
+    if len(maxdims) != rank:
+        raise ValueError("rank of maxdims does not match dataset")
+    for n in range(rank):
+        if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
+            return True
+    return False
+
+
+def getMaxDims(obj_json):
+    """
+    Get maxdims from a given shape.  Return [1,] for Scalar datasets
+
+    Use with H5S_NULL datasets will throw a ValueError
+    """
+
+    if not isinstance(obj_json, dict):
+        raise TypeError("expected a dict argument")
+
+    if "shape" in obj_json:
+        shape_json = obj_json["shape"]
+    else:
+        shape_json = obj_json
+
+    if "class" not in shape_json:
+        # should have at least this
+        raise KeyError(f"unexpected shape json: {shape_json}")
+    shape_class = shape_json["class"]
+    maxdims = None
+    if shape_class == "H5S_NULL":
+        return None
+    elif shape_class == "H5S_SCALAR":
+        maxdims = ()
+    elif shape_class == "H5S_SIMPLE":
+        if "maxdims" in shape_json:
+            maxdims = shape_json["maxdims"]
+        else:
+            maxdims = shape_json["dims"]
+    else:
+        msg = f"Unexpected shape class: {shape_class}"
+        raise ValueError(msg)
+    return tuple(maxdims)
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
old mode 100755
new mode 100644
index c029fd01..b24594d2
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -12,6 +12,7 @@
 import unittest
 import logging
 
+from h5json.filters import getFilterItem
 from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk
 from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getChunkDims
 from h5json.dset_util import validateChunkLayout, validateDatasetCreationProps, getDatasetLayout
@@ -121,7 +122,8 @@ def testFilterValidation(self):
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
         # try with just a filter name
-        cpl["filters"] = ["gzip", ]
+        gzip_filter = getFilterItem("gzip")
+        cpl["filters"] = [gzip_filter, ]
         try:
             validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
         except ValueError:
diff --git a/test/unit/filter_test.py b/test/unit/filter_test.py
new file mode 100644
index 00000000..0b37c54f
--- /dev/null
+++ b/test/unit/filter_test.py
@@ -0,0 +1,98 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+
+from h5json.filters import FILTER_DEFS
+from h5json.filters import getFilterItem, validateFilter, isCompressionFilter
+
+
+class FiltersTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(FiltersTest, self).__init__(*args, **kwargs)
+        # main
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testStandardFilters(self):
+
+        # check standard filters with no options
+
+        self.assertEqual(len(FILTER_DEFS), 14)
+        for item in FILTER_DEFS:
+            filter_class = item[0]
+            filter_id = item[1]
+            filter_name = item[2]
+            for value in (filter_class, filter_id, filter_name):
+                filter_json = getFilterItem(value)
+                validateFilter(filter_json)
+
+        # check alternate names work
+        for name in ("deflate", "gzip"):
+            filter_json = getFilterItem(name)
+            validateFilter(filter_json)
+            self.assertTrue(isCompressionFilter(filter_json))
+
+        # check random name raises exception
+        try:
+            getFilterItem("goofy")
+            self.assertTrue(False)
+        except KeyError:
+            pass  # expected
+
+        # check invalid filter id fails
+        try:
+            getFilterItem(1234)
+            self.assertTrue(False)
+        except KeyError:
+            pass  # expected
+
+    def testCustomFilters(self):
+
+        # check custom filter usage
+        custom_filter = {"class": "H5Z_FILTER_USER", "name": "myspecialfilter"}
+        # id should be over 32000
+        custom_filter["id"] = 32000
+        try:
+            validateFilter(custom_filter)
+            self.assertTrue(False)  # shouldn't get here
+        except ValueError:
+            pass  # expected
+
+        custom_filter["id"] = 32099
+        validateFilter(custom_filter)
+
+        custom_filter["unknown_option"] = 42
+        try:
+            validateFilter(custom_filter)
+            self.assertTrue(False)  # shouldn't get here
+        except KeyError:
+            pass  # expected
+
+        del custom_filter["unknown_option"]
+        good_params = (1, 2, 3)
+        bad_params = (2, -1)  # needs to be positive
+        custom_filter["parameters"] = good_params
+        validateFilter(custom_filter)
+
+        custom_filter["parameters"] = bad_params
+        try:
+            validateFilter(custom_filter)
+            self.assertTrue(False)  # shouldn't get here
+        except TypeError:
+            pass  # expected
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
old mode 100755
new mode 100644
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
old mode 100755
new mode 100644
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
old mode 100755
new mode 100644
diff --git a/test/unit/shape_util_test.py b/test/unit/shape_util_test.py
old mode 100755
new mode 100644
index 23c41edf..98812692
--- a/test/unit/shape_util_test.py
+++ b/test/unit/shape_util_test.py
@@ -13,7 +13,7 @@
 import logging
 
 from h5json.shape_util import getShapeClass, getShapeDims, getNumElements, getRank
-from h5json.shape_util import isNullSpace, isScalar, getDataSize
+from h5json.shape_util import isNullSpace, isScalar, getDataSize, isExtensible, getMaxDims
 
 
 class ShapeUtilTest(unittest.TestCase):
@@ -44,6 +44,7 @@ def testSimple(self):
         simple_shape_json = {"class": "H5S_SIMPLE", "dims": [5, 7]}
         simple_shape_obj = {"type": type_json, "shape": simple_shape_json}
         vstr_simple_shape_obj = {"type": vstr_json, "shape": simple_shape_json}
+        resizable_shape_obj = {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]}
 
         self.assertEqual(getShapeClass(null_shape_json), "H5S_NULL")
         self.assertEqual(getShapeClass(null_shape_obj), "H5S_NULL")
@@ -53,6 +54,7 @@ def testSimple(self):
         self.assertEqual(getShapeClass(simple_shape_json), "H5S_SIMPLE")
         self.assertEqual(getShapeClass(simple_shape_obj), "H5S_SIMPLE")
         self.assertEqual(getShapeClass(vstr_simple_shape_obj), "H5S_SIMPLE")
+        self.assertEqual(getShapeClass(resizable_shape_obj), "H5S_SIMPLE")
 
         self.assertEqual(getShapeDims(null_shape_json), None)
         self.assertEqual(getShapeDims(null_shape_obj), None)
@@ -63,6 +65,17 @@ def testSimple(self):
         self.assertEqual(getShapeDims(simple_shape_obj), (5, 7))
         self.assertEqual(getShapeDims(vstr_simple_shape_obj), (5, 7))
         self.assertEqual(getShapeDims(12), (12,))
+        self.assertEqual(getShapeDims(resizable_shape_obj), (10,))
+
+        self.assertEqual(getMaxDims(null_shape_json), None)
+        self.assertEqual(getMaxDims(null_shape_obj), None)
+        self.assertEqual(getMaxDims(scalar_shape_json), ())
+        self.assertEqual(getMaxDims(scalar_shape_obj), ())
+        self.assertEqual(getMaxDims(vstr_scalar_shape_obj), ())
+        self.assertEqual(getMaxDims(simple_shape_json), (5, 7))
+        self.assertEqual(getMaxDims(simple_shape_obj), (5, 7))
+        self.assertEqual(getMaxDims(vstr_simple_shape_obj), (5, 7))
+        self.assertEqual(getMaxDims(resizable_shape_obj), (20,))
 
         self.assertEqual(getRank(null_shape_json), 0)
         self.assertEqual(getRank(null_shape_obj), 0)
@@ -72,7 +85,7 @@ def testSimple(self):
         self.assertEqual(getRank(simple_shape_json), 2)
         self.assertEqual(getRank(simple_shape_obj), 2)
         self.assertEqual(getRank(vstr_simple_shape_obj), 2)
-        self.assertEqual(getRank((1, 2, 3)), 3)
+        self.assertEqual(getRank(resizable_shape_obj), 1)
 
         self.assertEqual(getNumElements(null_shape_json), 0)
         self.assertEqual(getNumElements(null_shape_obj), 0)
@@ -82,6 +95,7 @@ def testSimple(self):
         self.assertEqual(getNumElements(simple_shape_json), 35)
         self.assertEqual(getNumElements(simple_shape_obj), 35)
         self.assertEqual(getNumElements(vstr_simple_shape_obj), 35)
+        self.assertEqual(getNumElements(resizable_shape_obj), 10)
         self.assertEqual(getNumElements(()), 1)
         self.assertEqual(getNumElements([1, 2, 3]), 6)
 
@@ -93,6 +107,7 @@ def testSimple(self):
         self.assertEqual(isNullSpace(simple_shape_json), False)
         self.assertEqual(isNullSpace(simple_shape_obj), False)
         self.assertEqual(isNullSpace(vstr_simple_shape_obj), False)
+        self.assertEqual(isNullSpace(resizable_shape_obj), False)
 
         self.assertEqual(isScalar(null_shape_json), False)
         self.assertEqual(isScalar(null_shape_obj), False)
@@ -102,6 +117,7 @@ def testSimple(self):
         self.assertEqual(isScalar(simple_shape_json), False)
         self.assertEqual(isScalar(simple_shape_obj), False)
         self.assertEqual(isScalar(vstr_simple_shape_obj), False)
+        self.assertEqual(isScalar(resizable_shape_obj), False)
 
         self.assertEqual(getDataSize(null_shape_json, 4), 0)
         self.assertEqual(getDataSize(null_shape_obj, 4), 0)
@@ -111,9 +127,30 @@ def testSimple(self):
         self.assertEqual(getDataSize(simple_shape_json, 4), 140)
         self.assertEqual(getDataSize(simple_shape_obj, 4), 140)
         self.assertEqual(getDataSize(vstr_simple_shape_obj, 4), 140)
+        self.assertEqual(getDataSize(resizable_shape_obj, 4), 40)
         self.assertEqual(getDataSize((), 4), 4)
         self.assertEqual(getDataSize([1, 2, 3], 4), 24)
 
+        self.assertEqual(isScalar(null_shape_json), False)
+        self.assertEqual(isScalar(null_shape_obj), False)
+        self.assertEqual(isScalar(scalar_shape_json), True)
+        self.assertEqual(isScalar(scalar_shape_obj), True)
+        self.assertEqual(isScalar(vstr_scalar_shape_obj), True)
+        self.assertEqual(isScalar(simple_shape_json), False)
+        self.assertEqual(isScalar(simple_shape_obj), False)
+        self.assertEqual(isScalar(vstr_simple_shape_obj), False)
+        self.assertEqual(isScalar(resizable_shape_obj), False)
+
+        self.assertEqual(isExtensible(null_shape_json), False)
+        self.assertEqual(isExtensible(null_shape_obj), False)
+        self.assertEqual(isExtensible(scalar_shape_json), False)
+        self.assertEqual(isExtensible(scalar_shape_obj), False)
+        self.assertEqual(isExtensible(vstr_scalar_shape_obj), False)
+        self.assertEqual(isExtensible(simple_shape_json), False)
+        self.assertEqual(isExtensible(simple_shape_obj), False)
+        self.assertEqual(isExtensible(vstr_simple_shape_obj), False)
+        self.assertEqual(isExtensible(resizable_shape_obj), True)
+
 
 if __name__ == "__main__":
     # setup test files

From e4aafaf7bbdedd89c43da5deb2510cbcb1f1b97d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 26 Dec 2025 13:57:41 +0800
Subject: [PATCH 103/129] fix for lz4 filter opts

---
 src/h5json/dset_util.py     | 17 ++++++++-------
 src/h5json/filters.py       | 28 ++++++++++++++++++++-----
 test/unit/dset_util_test.py | 42 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index ffcf0147..1946bd35 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -569,7 +569,6 @@ def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
 def generateLayout(
         shape_json,
         item_size=0,
-        has_filter=False,
         chunks=None,
         chunk_min=CHUNK_MIN,
         chunk_max=CHUNK_MAX,
@@ -583,14 +582,14 @@ def generateLayout(
 
     shape_class = getShapeClass(shape_json)
     if shape_class == "H5S_NULL":
-        if chunks or has_filter:
+        if chunks:
             raise ValueError("Null space datasets do not support chunking")
         return {}
 
     if shape_class == "H5S_SCALAR":
-        if chunks or has_filter:
+        if chunks:
             raise ValueError("Scalar datasets do not support chunking")
-        return {"class": "H5D_CONIGUOUS"}
+        return {"class": "H5D_CONTIGUOUS"}
 
     if chunk_min > chunk_max:
         msg = "chunk_max must be larger than chunk_min"
@@ -600,9 +599,9 @@ def generateLayout(
     shape_dims = getShapeDims(shape_json)
     rank = len(shape_dims)
     max_dims = getMaxDims(shape_json)
-    extensible = isExtensible(shape_dims, max_dims)
+    extensible = isExtensible(shape_json)
 
-    if dset_size < chunk_min and not extensible and not has_filter and not chunks:
+    if dset_size < chunk_min and not extensible and not chunks:
         # can just return a contiguous layout
         return {"class": "H5D_CONTIGUOUS"}
 
@@ -613,10 +612,12 @@ def generateLayout(
             chunk_dims = chunks
             if len(chunk_dims) != rank:
                 raise ValueError("given chunk dims do not agree with dataset rank")
+        else:
+            pass  # otherwise we'll guess a chunk shape below
     if not chunk_dims:
         kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
-        chunk_dims = getChunkDims(shape_json, item_size, **kwargs)
-    layout["dims"] = chunk_dims
+        chunk_dims = guessChunk(shape_json, item_size, **kwargs)
+    layout["dims"] = list(chunk_dims)
 
     # set partition_count if needed:
     if max_chunks_per_folder > 0:
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index 3ddfe3f5..c8435873 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -33,7 +33,7 @@
     ("H5Z_FILTER_LZF", 32000, "lzf", ()),
     ("H5Z_FILTER_BLOSC", 32001, "blosclz", ()),
     ("H5Z_FILTER_SNAPPY", 32003, "snappy", ()),
-    ("H5Z_FILTER_LZ4", 32004, "lz4", ()),
+    ("H5Z_FILTER_LZ4", 32004, "lz4", ("level",)),
     ("H5Z_FILTER_LZ4HC", 32005, "lz4hc", ()),
     ("H5Z_FILTER_BITSHUFFLE", 32008, "bitshuffle", ()),
     ("H5Z_FILTER_ZSTD", 32015, "zstd", ()),
@@ -194,7 +194,11 @@ def getFilterItem(name, options={}):
     elif filter_class == "H5Z_FILTER_SNAPPY":
         pass  # no options
     elif filter_class == "H5Z_FILTER_LZ4":
-        pass  # no options
+        if "level" in options:
+            level_val = options["level"]
+            if level_val < 0 or level_val > 9:
+                msg = "Deflate filter level must be between 0 and 9"
+                raise ValueError(msg)
     elif filter_class == "H5Z_FILTER_LZ4HC":
         pass  # no options
     elif filter_class == "H5Z_FILTER_BITSHUFFLE":
@@ -224,7 +228,7 @@ def getFilterItem(name, options={}):
     return filter_json
 
 
-def validateFilter(filter_json, supported_filters=None):
+def validateFilter(filter_json):
     """ Check the given the given filter for create format,
         required options set.  Raise TypeError, KeyError or ValueError if not.
         If supported_filters is supplied, raise KeyError if a non-supported
@@ -275,7 +279,10 @@ def validateFilters(filters, supported_filters=None):
 
     # TBD: check given order of filters is supported
     for filter_json in filters:
-        validateFilter(filter_json, supported_filters=supported_filters)
+        validateFilter(filter_json)
+        filter_class = filter_json["class"]
+        if supported_filters and filter_class not in supported_filters:
+            raise ValueError(f"filter: {filter_class} not supported")
 
 
 def getFilters(dset_json):
@@ -296,4 +303,15 @@ def isCompressionFilter(filter):
 
 def getCompressionFilter(filters):
     """Return compression filter ids from filters, or None"""
-    return COMPRESSION_FILTER_IDS
+    for filter in filters:
+        if filter["class"] in COMPRESSION_FILTER_IDS:
+            return filter
+    return None
+
+
+def getShuffleFilter(filters):
+    """Return shuffle filter if present  or None"""
+    for filter in filters:
+        if filter["class"] == "H5Z_FILTER_SHUFFLE":
+            return filter
+    return None
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index b24594d2..1331cd97 100644
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -13,7 +13,7 @@
 import logging
 
 from h5json.filters import getFilterItem
-from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk
+from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk, generateLayout
 from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getChunkDims
 from h5json.dset_util import validateChunkLayout, validateDatasetCreationProps, getDatasetLayout
 
@@ -359,6 +359,46 @@ def testExpandChunk(self):
         self.assertTrue(num_bytes > CHUNK_MIN)
         self.assertTrue(num_bytes < CHUNK_MAX)
 
+    def testGenerateLayout(self):
+        typesize = 4
+        chunk_min = 4000
+        chunk_max = 8000
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [40, 20],
+        }
+        kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
+        layout = generateLayout(shape, typesize, **kwargs)
+        self.assertTrue("class" in layout)
+        self.assertEqual(layout["class"], "H5D_CONTIGUOUS")
+        self.assertFalse("dims" in layout)
+
+        layout = generateLayout(shape, typesize, chunks=True, **kwargs)
+        self.assertTrue("class" in layout)
+        self.assertEqual(layout["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout)
+        self.assertEqual(layout["dims"], [40, 20])
+
+        layout = generateLayout(shape, typesize, chunks=(20, 10), **kwargs)
+        self.assertTrue("class" in layout)
+        self.assertEqual(layout["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout)
+        self.assertEqual(layout["dims"], [20, 10])
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [0, 20],
+            "maxdims": [0, 20]
+        }
+        layout = generateLayout(shape, typesize, **kwargs)
+        self.assertTrue("class" in layout)
+        self.assertEqual(layout["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout)
+        dims = layout["dims"]
+        self.assertEqual(len(dims), 2)
+        self.assertTrue(dims[0] > 0)
+        self.assertTrue(dims[1] > 0)
+
     def testGetContiguousLayout(self):
         typesize = 4
         chunk_min = 400

From 43133daf7ce4b49dcef7b17a5d1f421952a4d01e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 26 Dec 2025 14:23:46 +0800
Subject: [PATCH 104/129] added filter test to testall

---
 testall.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/testall.py b/testall.py
index 34b1efd7..e4237ce6 100755
--- a/testall.py
+++ b/testall.py
@@ -17,10 +17,11 @@
 
 unit_tests = [
     "array_util_test",
+    "dset_util_test",
+    "filter_test",
     "objid_test",
     "hdf5dtype_test",
     "shape_util_test",
-    "dset_util_test",
     "hdf5db_test",
     "h5json_reader_test",
     "h5json_writer_test",

From df8ce239a1a686aa1bae8efbfb37091a9c5753ba Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 30 Dec 2025 19:13:26 +0800
Subject: [PATCH 105/129] updates for resizable datasets

---
 src/h5json/array_util.py            |  29 +++-
 src/h5json/dset_util.py             |  82 +++++++----
 src/h5json/filters.py               |   3 +-
 src/h5json/h5pystore/h5py_writer.py | 213 +++++++++++++++-------------
 src/h5json/hdf5db.py                | 145 ++++++++++++++-----
 src/h5json/shape_util.py            |  22 +++
 src/h5json/track_util.py            |  26 ++++
 test/unit/dset_util_test.py         |  30 ++--
 test/unit/h5py_writer_test.py       |  46 ++++++
 test/unit/hdf5db_test.py            |   4 +-
 test/unit/shape_util_test.py        |  12 ++
 11 files changed, 438 insertions(+), 174 deletions(-)
 create mode 100644 src/h5json/track_util.py

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index cb39cd55..e57a3892 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -15,7 +15,7 @@
 import binascii
 import numpy as np
 
-from .hdf5dtype import isVlen
+from .hdf5dtype import isVlen, is_float16_dtype, guess_dtype
 
 MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
 
@@ -474,6 +474,33 @@ def arrayToBytes(arr, encoding=None):
     return data
 
 
+def array_for_new_object(data, specified_dtype=None):
+    """Prepare an array from data used to create a new dataset or attribute"""
+
+    # We mostly let HDF5 convert data as necessary when it's written.
+    # But if we are going to a float16 datatype, pre-convert in python
+    # to workaround a bug in the conversion.
+    # https://github.com/h5py/h5py/issues/819
+    if is_float16_dtype(specified_dtype):
+        as_dtype = specified_dtype
+    elif not isinstance(data, np.ndarray) and (specified_dtype is not None):
+        # If we need to convert e.g. a list to an array, don't leave numpy
+        # to guess a dtype we already know.
+        as_dtype = specified_dtype
+    else:
+        as_dtype = guess_dtype(data)
+
+    data = np.asarray(data, order="C", dtype=as_dtype)
+
+    # In most cases, this does nothing. But if data was already an array,
+    # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings),
+    # asarray() doesn't replace its dtype object. This gives it the tagged dtype:
+    if as_dtype is not None:
+        data = data.view(dtype=as_dtype)
+
+    return data
+
+
 def bytesToArray(data, dt, shape, encoding=None):
     """
     Create numpy array based on byte representation
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 1946bd35..50340438 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -19,7 +19,7 @@
 from .objid import isValidUuid
 
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
-CHUNK_MAX = 8096 * 1024  # Hard upper limit (2M)
+CHUNK_MAX = 8096 * 1024  # Hard upper limit (8M)
 
 
 LAYOUT_CLASSES = (
@@ -87,30 +87,36 @@ def estimateDatasetSize(shape_json, item_size, chunk_min=CHUNK_MIN):
 
 def resize_dataset(dset_json, shape):
     """ Update shape dims to the given shape provided new shape is valid for maxdims """
-    shape_json = dset_json["shape"]
-    shape_class = shape_json["class"]
+
+    layout_class = getDatasetLayoutClass(dset_json)
+    if layout_class != "H5D_CHUNKED":
+        raise TypeError("Only chunked datasets can be resized")
+    shape_class = getShapeClass(dset_json)
     if shape_class != "H5S_SIMPLE":
         raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
-    if len(shape_json["dims"]) != len(shape):
+    dims = getShapeDims(dset_json)
+    if len(dims) != len(shape):
         raise ValueError("Resize shape parameter doesn't match dataset's rank")
-    if "maxdims" not in shape_json:
+    if not isExtensible(dset_json):
         raise ValueError("Dataset is not resizable")
-    dims = shape_json["dims"]
-    maxdims = shape_json["maxdims"]
+    maxdims = getMaxDims(dset_json)
 
-    if shape_json["dims"] == list(shape):
+    if dims == tuple(shape):
         # no change, just return
-        return
-    for i in range(len(dims)):
+        return None
+    rank = getRank(dset_json)
+    for i in range(rank):
         extent = shape[i]
         if extent < 0:
             raise ValueError("dimensions can't be negative")
-        if maxdims[i] == "H5S_UNLIMITED":
+        if maxdims[i] in (0, "H5S_UNLIMITED"):
             # any positive extent is ok
             continue
         if extent > maxdims[i]:
             raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
 
+    # update the object json with the new dimensions
+    shape_json = dset_json["shape"]
     shape_json["dims"] = list(shape)
 
 
@@ -185,12 +191,12 @@ def getChunkSize(chunk_dims, type_size: int = 1):
 def getChunkDims(dset_json):
     """Get chunk layout.  Return shape dims for non-chunked layout"""
 
-    shape_json = dset_json["shape"]
-    if shape_json["class"] == "H5S_NULL":
+    shape_class = getShapeClass(dset_json)
+    if shape_class == "H5S_NULL":
         return None
-    if shape_json["class"] == "H5S_SCALAR":
+    if shape_class == "H5S_SCALAR":
         return (1, )
-    shape_dims = shape_json["dims"]
+    shape_dims = getShapeDims(dset_json)
     layout_class = getDatasetLayoutClass(dset_json)
     if not layout_class:
         return tuple(shape_dims)
@@ -207,7 +213,7 @@ def getChunkDims(dset_json):
     return chunk_dims
 
 
-def validateChunkLayout(shape_json, type_json, layout):
+def validateLayout(shape_json, type_json, layout):
     """
     Use chunk layout given in the creationPropertiesList (if defined and
     layout is valid).
@@ -218,6 +224,7 @@ def validateChunkLayout(shape_json, type_json, layout):
     space_dims = None
     chunk_dims = None
     max_dims = None
+
     item_size = getItemSize(type_json)
 
     if "dims" in shape_json:
@@ -250,7 +257,7 @@ def validateChunkLayout(shape_json, type_json, layout):
                 if chunk_extent > dim_extent:
                     msg = "Invalid layout value"
                     raise ValueError(reason=msg)
-            elif max_dims[i] != 0:
+            elif max_dims[i] not in (0, "H5S_UNLIMITED"):
                 if chunk_extent > max_dims[i]:
                     msg = "Invalid layout value for extensible dimension"
                     raise ValueError(msg)
@@ -404,7 +411,7 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
     layout_class = None
     if "layout" in creation_props:
         layout_json = creation_props["layout"]
-        validateChunkLayout(shape, type_json, layout_json)
+        validateLayout(shape, type_json, layout_json)
         layout_class = layout_json["class"]
 
     if "filters" in creation_props:
@@ -436,7 +443,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
     if "maxdims" in shape_json:
         maxdims = shape_json["maxdims"]
         for n in range(rank):
-            if maxdims[n] == 0 or maxdims[n] > dims[n]:
+            if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
                 extendable_dims += 1
 
     dset_size = getDataSize(shape_json, typesize)
@@ -454,7 +461,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
             dim = rank - n - 1  # start from last dim
 
             if extendable_dims > 0:
-                if maxdims[dim] == 0:
+                if maxdims[dim] in (0, "H5S_UNLIMITED"):
                     # infinitely extendable dimensions
                     layout[dim] *= 2
                     chunk_size = getChunkSize(layout, typesize)
@@ -553,7 +560,7 @@ def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
         typesize = 128  # just take a guess at the item size
 
     # For unlimited dimensions we have to guess. use 1024
-    shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
+    shape = tuple((x if x not in (0, "H5S_UNLIMITED") else 1024) for i, x in enumerate(shape))
 
     chunk_size = getChunkSize(shape, typesize)
     if chunk_min and chunk_size < chunk_min:
@@ -568,7 +575,7 @@ def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
 
 def generateLayout(
         shape_json,
-        item_size=0,
+        type_json,
         chunks=None,
         chunk_min=CHUNK_MIN,
         chunk_max=CHUNK_MAX,
@@ -577,6 +584,9 @@ def generateLayout(
 
     """ Create a dataset layout based on type and shape properties  """
 
+    item_size = getItemSize(type_json)
+    if item_size == "H5T_VARIABLE":
+        item_size = 128  # take a guess
     if item_size < 0:
         raise ValueError("item_size is invalid")
 
@@ -612,6 +622,13 @@ def generateLayout(
             chunk_dims = chunks
             if len(chunk_dims) != rank:
                 raise ValueError("given chunk dims do not agree with dataset rank")
+            for dim in range(rank):
+                if max_dims[dim] in (0, "H5S_UNLIMITED"):
+                    pass  # unlimited, so any chunk extent is ok
+                elif chunk_dims[dim] > max_dims[dim]:
+                    msg = "Chunk shape must not be greater than data shape in any dimension. "
+                    msg += f"{chunk_dims} is not compatible with {max_dims}"
+                    raise ValueError()
         else:
             pass  # otherwise we'll guess a chunk shape below
     if not chunk_dims:
@@ -646,12 +663,14 @@ def generateLayout(
             layout["partition_count"] = partition_count
         else:
             pass  # partition not needed
+
+    validateLayout(shape_json, type_json, layout)
     return layout
 
 
 def generate_dcpl(
     shape_json,
-    dtype,
+    type_json,
     chunks=None,
     filters=[],
     chunk_min=CHUNK_MIN,
@@ -678,12 +697,12 @@ def generate_dcpl(
 
     # End argument validation
 
-    kwargs = {"item_size": dtype.itemsize, "has_filter": filters}
+    kwargs = {"has_filter": filters}
     kwargs["chunks"] = chunks
     kwargs["chunk_min"] = chunk_min
     kwargs["chunk_max"] = chunk_max
     kwargs["max_chunks_per_folder"] = max_chunks_per_folder
-    plist["layout"] = generateLayout(shape_json, **kwargs)
+    plist["layout"] = generateLayout(shape_json, type_json, **kwargs)
 
     if len(filters) > 0:
         plist["filters"] = filters
@@ -697,3 +716,16 @@ def generate_dcpl(
         plist["initializer"] = initializer
 
     return plist
+
+
+def getFillValue(obj_json):
+    """ Return the fill value or None if not set """
+
+    if "creationProperties" in obj_json:
+        cpl = obj_json["creationProperties"]
+    else:
+        cpl = obj_json  # assume we've been based a cpl
+    if "filLValue" in cpl:
+        return cpl["fillValue"]
+    else:
+        return None
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
index c8435873..3642fe07 100644
--- a/src/h5json/filters.py
+++ b/src/h5json/filters.py
@@ -16,6 +16,7 @@
 
 DEFAULT_GZIP = 4
 DEFAULT_SZIP = 4
+DEFAULT_LZ4 = 1
 SO_INT_MINBITS_DEFAULT = 0
 
 # List of registered filters.  Not all are supported by every reader and writer.
@@ -97,7 +98,7 @@ def getFilterItem(name, options={}):
     filter_json = None
 
     if isinstance(name, dict):
-        filter_json = name
+        filter_json = name.copy()
         base_keys = ("class", "id", "name")
         for key in base_keys:
             if key not in filter_json:
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 5e1e20d7..b801af83 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -17,7 +17,11 @@
 from ..objid import getCollectionForId, isValidUuid, createObjId
 from ..hdf5dtype import createDataType
 from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
+from ..shape_util import getShapeDims, getShapeClass, isExtensible, getMaxDims
 from ..array_util import jsonToArray
+from ..track_util import getTrackTimes
+from ..dset_util import getDatasetLayout, getFillValue
+from ..filters import isCompressionFilter, getFilters, getFilterItem
 from .. import selections
 from .. import filters
 from ..h5writer import H5Writer
@@ -153,106 +157,104 @@ def _createDataset(self, parent, dset_json, name=None):
         dtype = self.db.getDtype(dset_json)
 
         kwargs = {"dtype": dtype}
-        shape_json = dset_json["shape"]
-        shape_class = shape_json["class"]
+        shape_class = getShapeClass(dset_json)
         if shape_class == "H5S_NULL":
             # skip the shape keyword to create a null space dataset
             pass
         elif shape_class == "H5S_SCALAR":
             kwargs["shape"] = ()
         else:
-            kwargs["shape"] = shape_json["dims"]
-        if "dcpl" in dset_json and shape_class != "H5S_NULL":
-            creation_props = dset_json["dcpl"]
-            if "fillValue" in creation_props:
-                fillvalue = creation_props["fillValue"]
-                if fillvalue and len(dtype) > 1 and type(fillvalue) in (list, tuple):
-                    # for compound types, need to convert from list to dataset compatible element
-
-                    if len(dtype) != len(fillvalue):
-                        msg = "fillvalue has incorrect number of elements"
-                        self.log.warning(msg)
-                        raise ValueError(msg)
-
-                    fillvalue = jsonToArray((), dtype, fillvalue)
-
-                kwargs["fillvalue"] = fillvalue
-
-            if "trackTimes" in creation_props:
-                kwargs["track_times"] = creation_props["trackTimes"]
-            if "layout" in creation_props:
-                layout = creation_props["layout"]
-                if "dims" in layout:
-                    kwargs["chunks"] = tuple(layout["dims"])
-            if "filters" in creation_props:
-                filter_props = creation_props["filters"]
-                for filter_prop in filter_props:
-                    if "id" not in filter_prop:
-                        self.log.warning("filter id not provided")
-                        continue
-                    filter_id = filter_prop["id"]
-                    if filter_id not in filters._HDF_FILTERS:
-                        self.log.warning(f"unknown filter id: {filter_id} ignoring")
-                        continue
-
-                    hdf_filter = filters._HDF_FILTERS[filter_id]
-
-                    self.log.info(f"got filter: {filter_id}")
-                    if "alias" not in hdf_filter:
-                        self.log.warning(f"unsupported filter id: {filter_id} ignoring")
-                        continue
-
-                    filter_alias = hdf_filter["alias"]
-                    if not h5py.h5z.filter_avail(filter_id):
-                        msg = "compression filter not available, filter: {filter_alias}, ignoring"
-                        self.log.warning(msg)
-                        continue
-                    if filter_alias in filters._H5PY_COMPRESSION_FILTERS:
-                        if kwargs.get("compression"):
-                            msg = f"compression filter already set for {filter_alias}, ignoring"
-                            self.log.info(msg)
-                            continue
-
-                        kwargs["compression"] = filter_alias
-                        self.log.info("setting compression filter to: {filter_alias}")
-                        if filter_alias == "gzip":
-                            # check for an optional compression value
-                            if "level" in filter_prop:
-                                kwargs["compression_opts"] = filter_prop["level"]
-                        elif filter_alias == "szip":
-                            bitsPerPixel = None
-                            coding = "nn"
+            shape = getShapeDims(dset_json)
+            kwargs["shape"] = shape
+            if isExtensible(dset_json):
+                maxshape = list(getMaxDims(dset_json))
+                # replace any 0, or H5S_UNLIMITED with None
+                for dim in range(len(maxshape)):
+                    if maxshape[dim] in (0, "H5S_UNLIMITED"):
+                        maxshape[dim] = None
+                kwargs["maxshape"] = tuple(maxshape)
 
-                            if "bitsPerPixel" in filter_prop:
-                                bitsPerPixel = filter_prop["bitsPerPixel"]
-                            if "coding" in filter_prop:
-                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
-                                    coding = "ec"
-                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
-                                    coding = "nn"
-                                else:
-                                    self.log.warning("invalid szip option: 'coding'")
-                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
-                            # so these options will be ignored
-                            if "pixelsPerBlock" in filter_props:
-                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
-                            if "pixelsPerScanline" in filter_props:
-                                self.log.info("ignoring szip option: 'pixelsPerScanline'")
-                            if bitsPerPixel:
-                                kwargs["compression_opts"] = (coding, bitsPerPixel)
-                    else:
-                        if filter_alias == "shuffle":
-                            kwargs["shuffle"] = True
-                        elif filter_alias == "fletcher32":
-                            kwargs["fletcher32"] = True
-                        elif filter_alias == "scaleoffset":
-                            if "scaleOffset" not in filter_prop:
-                                msg = "No scale_offset provided for scale offset filter, ignoring"
-                                self.log(msg)
-                                continue
-                            kwargs["scaleoffset"] = filter_prop["scaleOffset"]
+        fillvalue = getFillValue(dset_json)
+
+        if fillvalue and len(dtype) > 1 and type(fillvalue) in (list, tuple):
+            # for compound types, need to convert from list to dataset compatible element
+
+            if len(dtype) != len(fillvalue):
+                msg = "fillvalue has incorrect number of elements"
+                raise ValueError(msg)
+
+            fillvalue = jsonToArray((), dtype, fillvalue)
+
+        kwargs["fillvalue"] = fillvalue
+
+        track_times = getTrackTimes(dset_json)
+        if track_times is not None:
+            kwargs["track_times"] = track_times
+
+        layout = getDatasetLayout(dset_json)
+        if layout and "dims" in layout:
+            kwargs["chunks"] = tuple(layout["dims"])
+
+        filter_props = getFilters(dset_json)
+
+        for filter_prop in filter_props:
+            try:
+                getFilterItem(filter_prop)
+            except (KeyError, ValueError, TypeError):
+                self.log.warning(f"unknown filter: {filter_prop} ignoring")
+                continue
+            filter_class = filter_prop["class"]
+            filter_id = filter_prop["id"]
+            filter_name = filter_prop["name"]
+
+            if not h5py.h5z.filter_avail(filter_id):
+                msg = f"filter not available, filter: {filter_class}, ignoring"
+                self.log.warning(msg)
+                continue
+
+            if isCompressionFilter(filter_class):
+                if kwargs.get("compression"):
+                    msg = f"compression filter already set for {filter_class}, ignoring"
+                    self.log.info(msg)
+                    continue
+
+                kwargs["compression"] = filter_name
+                self.log.info(f"setting compression filter to: {filter_class}")
+                if filter_class == "H5Z_FILTER_DEFLATE":
+                    kwargs["compression"] = "gzip"  # h5py doesn't recognize 'deflate' name
+                    # check for an optional compression value
+                    if "level" in filter_prop:
+                        kwargs["compression_opts"] = filter_prop["level"]
+                elif filter_class == "H5Z_FILTER_SZIP":
+                    bitsPerPixel = None
+                    coding = "nn"
+
+                    if "bitsPerPixel" in filter_prop:
+                        bitsPerPixel = filter_prop["bitsPerPixel"]
+                    if "coding" in filter_prop:
+                        if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
+                            coding = "ec"
+                        elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
+                            coding = "nn"
                         else:
-                            self.log.info(f"Unexpected filter name: {filter_alias}, ignoring")
+                            self.log.warning("invalid szip option: 'coding'")
+                        # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
+                        # so these options will be ignored
+                    if "pixelsPerBlock" in filter_props:
+                        self.log.info("ignoring szip option: 'pixelsPerBlock'")
+                    if "pixelsPerScanline" in filter_props:
+                        self.log.info("ignoring szip option: 'pixelsPerScanline'")
+                    if bitsPerPixel:
+                        kwargs["compression_opts"] = (coding, bitsPerPixel)
+                elif filter_class == "H5Z_FILTER_SHUFFLE":
+                    kwargs["shuffle"] = True
+                elif filter_class == "H5Z_FILTER_FLETCHER32":
+                    kwargs["fletcher32"] = True
+                elif filter_class == "H5Z_FILTER_SCALEOFFSET":
+                    if "scaleOffset" in filter_prop:
+                        kwargs["scaleoffset"] = filter_prop["scaleOffset"]
+                else:
+                    self.log.warning(f"Ignoring filter: {filter_class}")
 
         dset = parent.create_dataset(name, **kwargs)
         return dset
@@ -332,12 +334,18 @@ def _createObjects(self, parent, links_json, visited=set()):
             else:
                 self.log.warning(f"unexpected link class: {link_class}")
 
+    def resizeDataset(self, dset_id, dset):
+        """ Update the datasets shape """
+
+        dset_json = self.db.getObjectById(dset_id)
+        new_dims = getShapeDims(dset_json)
+        dset.resize(new_dims)
+
     def updateDatasetValues(self, dset_id, dset):
         """ write any pending dataset values """
-        dset_json = self.db.getObjectById(dset_id)
-        if "updates" not in dset_json:
-            return
-        updates = dset_json["updates"]
+
+        updates = self.db._getDatasetUpdates(dset_id)
+
         for (sel, val) in updates:
             slices = []
             for dim in range(len(sel.shape)):
@@ -436,11 +444,14 @@ def flush(self):
                 obj = self._f[h5path]
                 self.updateAttributes(obj_id, obj)
                 collection = getCollectionForId(obj_id)
-                if collection == "datasets" and not self.no_data:
-                    if self._init:
-                        self.initializeDatasetValues(obj_id, obj)
-                    else:
-                        self.updateDatasetValues(obj_id, obj)
+                if collection == "datasets":
+                    if self.db.is_resized(obj_id):
+                        self.resizeDataset(obj_id, obj)
+                    if not self.no_data:
+                        if self._init:
+                            self.initializeDatasetValues(obj_id, obj)
+                        else:
+                            self.updateDatasetValues(obj_id, obj)
         # mark time write is complete
         # updates before this time will not need to be written
         # TBD: possible race condition with multithreading
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 7982a926..eadf0dd0 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -13,9 +13,10 @@
 import numpy as np
 import logging
 from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
+from .hdf5dtype import numpy_integer_types, numpy_float_types
 from .array_util import jsonToArray, bytesArrayToList
-from .dset_util import resize_dataset
-from .shape_util import getShapeClass, getShapeDims
+from .dset_util import resize_dataset, getDatasetLayoutClass
+from .shape_util import getShapeClass, getShapeDims, getShapeJson
 from .filters import validateFilters
 from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId, getHashTagForId
 from . import selections
@@ -25,12 +26,38 @@
 from .h5writer import H5Writer, H5NullWriter
 
 
-def _getDatasetUpdates(dset_json):
-    """ return a list of value updates for the datset.
-        initalize one if not already present. """
-    if "updates" not in dset_json:
-        dset_json["updates"] = []
-    return dset_json["updates"]
+def _decode(item, encoding="ascii"):
+    """
+    decode any byte items to python 3 strings
+    """
+    ret_val = None
+    if type(item) is bytes:
+        ret_val = item.decode(encoding)
+    elif type(item) is list:
+        ret_val = []
+        for x in item:
+            ret_val.append(_decode(x, encoding))
+    elif type(item) is tuple:
+        ret_val = []
+        for x in item:
+            ret_val.append(_decode(x, encoding))
+        ret_val = tuple(ret_val)
+    elif type(item) is dict:
+        ret_val = {}
+        for k in dict:
+            ret_val[k] = _decode(item[k], encoding)
+    elif type(item) is np.ndarray:
+        x = item.tolist()
+        ret_val = []
+        for x in item:
+            ret_val.append(_decode(x, encoding))
+    elif type(item) in numpy_integer_types:
+        ret_val = int(item)
+    elif type(item) in numpy_float_types:
+        ret_val = float(item)
+    else:
+        ret_val = item
+    return ret_val
 
 
 class Hdf5db:
@@ -59,9 +86,11 @@ def __init__(
 
         self._db = {}
 
-        self._new_objects = set()  # set of for newly created objects
-        self._dirty_objects = set()  # set of modified objects
-        self._deleted_objects = set()  # set of deleted objects
+        self._new_objects = set()       # set of for newly created objects
+        self._dirty_objects = set()     # set of modified objects
+        self._deleted_objects = set()   # set of deleted objects
+        self._resized_datasets = set()  # set of dataset ids that have been resized
+        self._dataset_updates = {}         # list of dataset values updates keyed by dset_id
 
         self._root_id = None
 
@@ -126,8 +155,19 @@ def is_dirty(self, obj_id):
         obj_id = getHashTagForId(obj_id)
         if self.is_new(obj_id):
             return True
+        if obj_id in self._resized_datasets:
+            return True
         return obj_id in self._dirty_objects
 
+    def is_resized(self, dset_id):
+        """ return true if this dataset has been resized """
+        dset_id = getHashTagForId(dset_id)
+
+        if dset_id in self._resized_datasets:
+            return True
+        else:
+            return False
+
     @property
     def new_objects(self):
         return self._new_objects
@@ -140,6 +180,18 @@ def dirty_objects(self):
     def deleted_objects(self):
         return self._deleted_objects
 
+    @property
+    def resized_datsets(self):
+        return self._resized_datasets
+
+    def _getDatasetUpdates(self, dset_id):
+        """ Get list of update tuples """
+        if getCollectionForId(dset_id) != "datasets":
+            raise TypeError("expected dataset id")
+        if dset_id not in self._dataset_updates:
+            self._dataset_updates[dset_id] = []
+        return self._dataset_updates[dset_id]
+
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
         obj_id = getHashTagForId(obj_id)
@@ -152,7 +204,7 @@ def make_dirty(self, obj_id):
         obj_json = self.db[obj_id]
         obj_json["lastModified"] = getNow()
         if not self.is_new(obj_id):
-            # object hasn't been initially written yet, add to dirt_object set
+            # object hasn't been initially written yet, add to dirty_object set
             self._dirty_objects.add(obj_id)
 
     def flush(self):
@@ -165,9 +217,11 @@ def flush(self):
             return False
 
         # reset new, dirty and deleted sets
-        self._new_objects = set()
-        self._dirty_objects = set()
-        self._deleted_objects = set()
+        self._new_objects.clear()
+        self._dirty_objects.clear()
+        self._deleted_objects.clear()
+        self._resized_datasets.clear()
+        self._dataset_updates.clear()
         return True
 
     def readAll(self):
@@ -587,7 +641,7 @@ def init_arr(dtype, cpl):
         else:
             cpl = {}
 
-        updates = _getDatasetUpdates(dset_json)
+        updates = self._getDatasetUpdates(dset_id)
 
         shape_class = getShapeClass(shape_json)
 
@@ -685,7 +739,7 @@ def setDatasetValues(self, dset_id, sel, arr):
             dims = getShapeDims(shape_json)
             if sel.shape != dims:
                 raise ValueError("Selection shape does not match dataset shape")
-        updates = _getDatasetUpdates(dset_json)
+        updates = self._getDatasetUpdates(dset_id)
         if sel.select_type == selections.H5S_SELECT_ALL:
             # for select all, throw out any existing updates since this will overwrite them
             updates.clear()
@@ -709,8 +763,22 @@ def resizeDataset(self, dset_id, shape):
         self.log.info(f"resizeDataset {dset_id}, {shape}")
 
         dset_json = self.getObjectById(dset_id)  # will throw exception if not found
-        if resize_dataset(dset_json, shape):
-            self._make_dirty(dset_id)
+        old_dims = getShapeDims(dset_json)
+        resize_dataset(dset_json, shape)
+
+        if dset_id not in self.new_objects:
+            self._resized_datasets.add(dset_id)
+
+        # if the shape has shrunk in any dimension, do a flush now
+        new_dims = getShapeDims(dset_json)
+        do_flush = False
+        for i in range(len(new_dims)):
+            if new_dims[i] < old_dims[i]:
+                do_flush = True
+                break
+
+        if do_flush:
+            self.flush()
 
     def deleteObject(self, obj_id):
         """ Delete the given object """
@@ -727,6 +795,9 @@ def deleteObject(self, obj_id):
         if obj_id in self._dirty_objects:
             self._dirty_objects.remove(obj_id)
 
+        if obj_id in self._resized_datasets:
+            self._resized_datasets.remove(obj_id)
+
         self._deleted_objects.add(obj_id)
 
     def getLinks(self, grp_id):
@@ -859,22 +930,7 @@ def createDataset(
         if self.closed:
             raise ValueError("db is closed")
         type_json = getTypeItem(dtype)
-        if shape is None:
-            raise ValueError("shape not set")
-        elif shape == "H5S_NULL":
-            shape_json = {"class": "H5S_NULL"}
-        elif shape == ():
-            shape_json = {"class": "H5S_SCALAR"}
-        else:
-            shape_json = {"class": "H5S_SIMPLE"}
-            shape_json["dims"] = list(shape)
-
-        if maxdims:
-            if shape_json["class"] != "H5S_SIMPLE":
-                raise ValueError("only simple shapes can be resizable")
-            if len(shape) != len(maxdims):
-                raise ValueError("maxdims length not equal to shape rank")
-            shape_json["maxdims"] = ["H5S_UNLIMITED" if x is None else x for x in maxdims]
+        shape_json = getShapeJson(shape, maxdims=maxdims)
 
         dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
         if cpl:
@@ -885,9 +941,28 @@ def createDataset(
                     supported_filters = ()
                 # validate and normalize supplied filter property list
                 validateFilters(cpl["filters"], supported_filters=supported_filters)
+            if cpl.get("fillValue"):
+                fillvalue = cpl["fillValue"]
+                # is it compatible with the array type?
+                if hasattr(fillvalue, "tolist"):
+                    # convert numpy object to list
+                    fillvalue = fillvalue.tolist()
+                fillvalue = _decode(fillvalue)
+                if not isinstance(fillvalue, str) and hasattr(fillvalue, "__iter__"):
+                    # fill value is a list, or similar: check that dtype is compound
+                    if len(fillvalue) != len(dtype):
+                        raise ValueError("Invalid fill value for non-compound type dataset")
+                    fillvalue = list(fillvalue)
+                    cpl["fillValue"] = fillvalue
+                else:
+                    if type_json["class"] == "H5T_COMPOUND":
+                        raise ValueError("Invalid fill value for compound type dataset")
             dset_json["creationProperties"] = cpl
         else:
             dset_json["creationProperties"] = {}
+
+        if maxdims and getDatasetLayoutClass(dset_json) != "H5D_CHUNKED":
+            raise ValueError("Only datasets with 'H5D_CHUNKED' layout can be resizable")
         dset_json["created"] = getNow()
 
         dset_id = createObjId("datasets", root_id=self.root_id)
diff --git a/src/h5json/shape_util.py b/src/h5json/shape_util.py
index cbc6a8fe..753603cd 100644
--- a/src/h5json/shape_util.py
+++ b/src/h5json/shape_util.py
@@ -48,11 +48,33 @@ def getShapeJson(dims, maxdims=None):
         shape_class = "H5S_SCALAR"
     else:
         shape_class = "H5S_SIMPLE"
+    if dims:
+        for extent in dims:
+            if not isinstance(extent, int):
+                raise TypeError("expected an integer value for dimensions")
+            if extent < 0:
+                raise ValueError("negative extent values are not supported")
+
     if maxdims is not None:
         if shape_class != "H5S_SIMPLE":
             raise ValueError(f"maxdims can not be used with shape class: {shape_class}")
         if len(maxdims) != len(dims):
             raise ValueError("maxdims must match dataspace rank")
+        # convert any 0 or None vlues to "H5S_UNLIMITED"
+        maxdims = list(tuple(maxdims))
+        for i in range(len(maxdims)):
+            extent = maxdims[i]
+            if extent is None or extent == 0:
+                maxdims[i] = "H5S_UNLIMITED"
+            elif isinstance(extent, str):
+                if extent != "H5S_UNLIMITED":
+                    raise ValueError(f"invalid maxdims extent: {extent}")
+            elif isinstance(extent, int):
+                if extent < 0:
+                    raise ValueError("negative extent values are not supported")
+            else:
+                raise TypeError("expected an integer value for maxdims")
+
     shape_json = {"class": shape_class}
     if shape_class == "H5S_SIMPLE":
         shape_json["dims"] = dims
diff --git a/src/h5json/track_util.py b/src/h5json/track_util.py
new file mode 100644
index 00000000..b59e2a08
--- /dev/null
+++ b/src/h5json/track_util.py
@@ -0,0 +1,26 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+def getTrackTimes(obj_json):
+    """ Return a boolean if trackTimes is set in the objects' creation Property list.
+        Otherwise return None. """
+
+    if "creationProperties" in obj_json:
+        cpl = obj_json["creationProperties"]
+    else:
+        cpl = obj_json  # assume this is the cpl
+    if "trackTimes" in cpl:
+        track_times = bool(cpl["trackTimes"])
+    else:
+        track_times = None
+
+    return track_times
diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 1331cd97..364d8929 100644
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -15,7 +15,7 @@
 from h5json.filters import getFilterItem
 from h5json.dset_util import guessChunk, shrinkChunk, getChunkSize, expandChunk, generateLayout
 from h5json.dset_util import getDatasetLayoutClass, getContiguousLayout, getChunkDims
-from h5json.dset_util import validateChunkLayout, validateDatasetCreationProps, getDatasetLayout
+from h5json.dset_util import validateLayout, validateDatasetCreationProps, getDatasetLayout
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -49,7 +49,7 @@ def testGetLayout(self):
 
         # contigous layout with resizable shape should raise exception
         try:
-            validateChunkLayout(dset_json["shape"], type_json, layout)
+            validateLayout(dset_json["shape"], type_json, layout)
             self.assertTrue(False)  # should not reach here
         except ValueError:
             pass  # should raise exception
@@ -68,7 +68,7 @@ def testGetLayout(self):
         self.assertEqual(layout_class, "H5D_CHUNKED")
 
         try:
-            validateChunkLayout(dset_json["shape"], type_json, layout)
+            validateLayout(dset_json["shape"], type_json, layout)
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
 
@@ -186,7 +186,7 @@ def testGuessChunk(self):
         chunk_size = getChunkSize(layout, typesize)
         self.assertTrue(chunk_size <= chunk_max)
 
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, "H5S_UNLIMITED"]}
         layout = guessChunk(shape, typesize)
         self.assertTrue(len(layout), 2)
         for i in range(2):
@@ -333,7 +333,7 @@ def testExpandChunk(self):
         shape = {
             "class": "H5S_SIMPLE",
             "dims": [1000, 10, 1000],
-            "maxdims": [1000, 0, 1000],
+            "maxdims": [1000, "H5S_UNLIMITED", 1000],
         }
         layout = (10, 10, 10)
         typesize = 4
@@ -360,37 +360,47 @@ def testExpandChunk(self):
         self.assertTrue(num_bytes < CHUNK_MAX)
 
     def testGenerateLayout(self):
-        typesize = 4
         chunk_min = 4000
         chunk_max = 8000
         shape = {
             "class": "H5S_SIMPLE",
             "dims": [40, 20],
         }
+        base_type = 'H5T_IEEE_F32LE'
+        type_json = {'class': 'H5T_FLOAT', 'base': base_type}
+
         kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
-        layout = generateLayout(shape, typesize, **kwargs)
+        layout = generateLayout(shape, type_json, **kwargs)
         self.assertTrue("class" in layout)
         self.assertEqual(layout["class"], "H5D_CONTIGUOUS")
         self.assertFalse("dims" in layout)
 
-        layout = generateLayout(shape, typesize, chunks=True, **kwargs)
+        layout = generateLayout(shape, type_json, chunks=True, **kwargs)
         self.assertTrue("class" in layout)
         self.assertEqual(layout["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout)
         self.assertEqual(layout["dims"], [40, 20])
 
-        layout = generateLayout(shape, typesize, chunks=(20, 10), **kwargs)
+        layout = generateLayout(shape, type_json, chunks=(20, 10), **kwargs)
         self.assertTrue("class" in layout)
         self.assertEqual(layout["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout)
         self.assertEqual(layout["dims"], [20, 10])
 
+        try:
+            # proposed chunk shape can't be larger than shape in
+            # any dimension
+            generateLayout(shape, type_json, chunks=(50, 10), **kwargs)
+            self.assertTrue(False)  # shouldn't get here
+        except ValueError:
+            pass  # expected
+
         shape = {
             "class": "H5S_SIMPLE",
             "dims": [0, 20],
             "maxdims": [0, 20]
         }
-        layout = generateLayout(shape, typesize, **kwargs)
+        layout = generateLayout(shape, type_json, **kwargs)
         self.assertTrue("class" in layout)
         self.assertEqual(layout["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout)
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index f0091a39..6e208ada 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -199,6 +199,52 @@ def testSimple(self):
                         expected = i * j
                     self.assertEqual(dset[i, j], expected)
 
+    def testResizableDataset(self):
+        filepath = "test/unit/out/h5py_writer_test_testResizableDataset.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+
+        nrows = 8
+        ncols = 10
+        shape = (nrows, ncols)
+        dtype = np.int32
+        maxdims = (None, ncols * 2)
+        layout = {"class": "H5D_CHUNKED", "dims": (nrows, ncols)}
+        cpl = {"layout": layout}
+
+        root_id = db.open()
+        dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype, cpl=cpl)
+        db.createHardLink(root_id, "dset", dset_id)
+        db.createAttribute(dset_id, "a1", "Hello, world")
+
+        # resize limited dimension
+        db.resizeDataset(dset_id, (nrows, ncols * 2))
+
+        # try to go beyond max extent
+        try:
+            db.resizeDataset(dset_id, (nrows, ncols * 3))
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+        db.close()
+
+        with h5py.File(filepath) as f:
+            dset = f["dset"]
+            self.assertEqual(dset.shape, (nrows, ncols * 2))
+
+        db.open()
+        # resize unlimited dimension
+        db.resizeDataset(dset_id, (nrows * 10, ncols))
+
+        db.close()
+
+        with h5py.File(filepath) as f:
+            dset = f["dset"]
+            self.assertEqual(dset.shape, (nrows * 10, ncols))
+
     def testNullSpaceAttribute(self):
 
         filepath = "test/unit/out/h5py_writer_test_testNullSpaceAttribute.h5"
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 11bdd30b..f7d27f76 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -575,11 +575,13 @@ def testResizableDataset(self):
         shape = (nrows, ncols)
         dtype = np.int32
         maxdims = (None, ncols * 2)
+        layout = {"class": "H5D_CHUNKED", "dims": shape}
+        cpl = {"layout": layout}
 
         db = Hdf5db(app_logger=self.log)
 
         root_id = db.open()
-        dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
+        dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype, cpl=cpl)
         db.createHardLink(root_id, "dset", dset_id)
         db.createAttribute(dset_id, "a1", "Hello, world")
 
diff --git a/test/unit/shape_util_test.py b/test/unit/shape_util_test.py
index 98812692..61c78e51 100644
--- a/test/unit/shape_util_test.py
+++ b/test/unit/shape_util_test.py
@@ -45,6 +45,7 @@ def testSimple(self):
         simple_shape_obj = {"type": type_json, "shape": simple_shape_json}
         vstr_simple_shape_obj = {"type": vstr_json, "shape": simple_shape_json}
         resizable_shape_obj = {'class': 'H5S_SIMPLE', 'dims': [10], 'maxdims': [20]}
+        unlimited_shape_obj = {'class': 'H5S_SIMPLE', 'dims': [0, 20], 'maxdims': ["H5S_UNLIMITED", 40]}
 
         self.assertEqual(getShapeClass(null_shape_json), "H5S_NULL")
         self.assertEqual(getShapeClass(null_shape_obj), "H5S_NULL")
@@ -55,6 +56,7 @@ def testSimple(self):
         self.assertEqual(getShapeClass(simple_shape_obj), "H5S_SIMPLE")
         self.assertEqual(getShapeClass(vstr_simple_shape_obj), "H5S_SIMPLE")
         self.assertEqual(getShapeClass(resizable_shape_obj), "H5S_SIMPLE")
+        self.assertEqual(getShapeClass(unlimited_shape_obj), "H5S_SIMPLE")
 
         self.assertEqual(getShapeDims(null_shape_json), None)
         self.assertEqual(getShapeDims(null_shape_obj), None)
@@ -66,6 +68,7 @@ def testSimple(self):
         self.assertEqual(getShapeDims(vstr_simple_shape_obj), (5, 7))
         self.assertEqual(getShapeDims(12), (12,))
         self.assertEqual(getShapeDims(resizable_shape_obj), (10,))
+        self.assertEqual(getShapeDims(unlimited_shape_obj), (0, 20))
 
         self.assertEqual(getMaxDims(null_shape_json), None)
         self.assertEqual(getMaxDims(null_shape_obj), None)
@@ -76,6 +79,7 @@ def testSimple(self):
         self.assertEqual(getMaxDims(simple_shape_obj), (5, 7))
         self.assertEqual(getMaxDims(vstr_simple_shape_obj), (5, 7))
         self.assertEqual(getMaxDims(resizable_shape_obj), (20,))
+        self.assertEqual(getMaxDims(unlimited_shape_obj), ("H5S_UNLIMITED", 40))
 
         self.assertEqual(getRank(null_shape_json), 0)
         self.assertEqual(getRank(null_shape_obj), 0)
@@ -86,6 +90,7 @@ def testSimple(self):
         self.assertEqual(getRank(simple_shape_obj), 2)
         self.assertEqual(getRank(vstr_simple_shape_obj), 2)
         self.assertEqual(getRank(resizable_shape_obj), 1)
+        self.assertEqual(getRank(unlimited_shape_obj), 2)
 
         self.assertEqual(getNumElements(null_shape_json), 0)
         self.assertEqual(getNumElements(null_shape_obj), 0)
@@ -96,6 +101,7 @@ def testSimple(self):
         self.assertEqual(getNumElements(simple_shape_obj), 35)
         self.assertEqual(getNumElements(vstr_simple_shape_obj), 35)
         self.assertEqual(getNumElements(resizable_shape_obj), 10)
+        self.assertEqual(getNumElements(unlimited_shape_obj), 0)
         self.assertEqual(getNumElements(()), 1)
         self.assertEqual(getNumElements([1, 2, 3]), 6)
 
@@ -108,6 +114,7 @@ def testSimple(self):
         self.assertEqual(isNullSpace(simple_shape_obj), False)
         self.assertEqual(isNullSpace(vstr_simple_shape_obj), False)
         self.assertEqual(isNullSpace(resizable_shape_obj), False)
+        self.assertEqual(isNullSpace(unlimited_shape_obj), False)
 
         self.assertEqual(isScalar(null_shape_json), False)
         self.assertEqual(isScalar(null_shape_obj), False)
@@ -118,6 +125,7 @@ def testSimple(self):
         self.assertEqual(isScalar(simple_shape_obj), False)
         self.assertEqual(isScalar(vstr_simple_shape_obj), False)
         self.assertEqual(isScalar(resizable_shape_obj), False)
+        self.assertEqual(isScalar(unlimited_shape_obj), False)
 
         self.assertEqual(getDataSize(null_shape_json, 4), 0)
         self.assertEqual(getDataSize(null_shape_obj, 4), 0)
@@ -128,6 +136,8 @@ def testSimple(self):
         self.assertEqual(getDataSize(simple_shape_obj, 4), 140)
         self.assertEqual(getDataSize(vstr_simple_shape_obj, 4), 140)
         self.assertEqual(getDataSize(resizable_shape_obj, 4), 40)
+        self.assertEqual(getDataSize(unlimited_shape_obj, 4), 0)
+
         self.assertEqual(getDataSize((), 4), 4)
         self.assertEqual(getDataSize([1, 2, 3], 4), 24)
 
@@ -140,6 +150,7 @@ def testSimple(self):
         self.assertEqual(isScalar(simple_shape_obj), False)
         self.assertEqual(isScalar(vstr_simple_shape_obj), False)
         self.assertEqual(isScalar(resizable_shape_obj), False)
+        self.assertEqual(isScalar(unlimited_shape_obj), False)
 
         self.assertEqual(isExtensible(null_shape_json), False)
         self.assertEqual(isExtensible(null_shape_obj), False)
@@ -150,6 +161,7 @@ def testSimple(self):
         self.assertEqual(isExtensible(simple_shape_obj), False)
         self.assertEqual(isExtensible(vstr_simple_shape_obj), False)
         self.assertEqual(isExtensible(resizable_shape_obj), True)
+        self.assertEqual(isExtensible(unlimited_shape_obj), True)
 
 
 if __name__ == "__main__":

From 6f94e0736a26f8819cb0fc651c9c52a48119581e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 2 Jan 2026 10:09:04 +0800
Subject: [PATCH 106/129] adjust dataset updates for resize

---
 src/h5json/hdf5db.py         | 20 ++++++++++--
 src/h5json/selections.py     |  1 -
 src/h5json/shape_util.py     |  6 ++--
 test/unit/shape_util_test.py | 60 +++++++++++++++++++++++++++++++++++-
 4 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index eadf0dd0..a901d0ba 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -181,7 +181,7 @@ def deleted_objects(self):
         return self._deleted_objects
 
     @property
-    def resized_datsets(self):
+    def resized_datasets(self):
         return self._resized_datasets
 
     def _getDatasetUpdates(self, dset_id):
@@ -769,8 +769,24 @@ def resizeDataset(self, dset_id, shape):
         if dset_id not in self.new_objects:
             self._resized_datasets.add(dset_id)
 
-        # if the shape has shrunk in any dimension, do a flush now
         new_dims = getShapeDims(dset_json)
+        rank = len(new_dims)
+
+        # adjust any selections in the update list
+        updates = self._getDatasetUpdates(dset_id)
+        for i in range(len(updates)):
+            (sel_update, arr) = updates[i]
+            if sel_update.select_type == selections.H5S_SELECT_HYPERSLABS:
+                slices = list(sel_update.slices)
+                for dim in range(rank):
+                    s = slices[dim]
+                    if s.stop > new_dims[dim]:
+                        # selection outside new bounds of dataset
+                        slices[dim] = slice(s.start, new_dims[dim], s.step)
+                sel_update = selections.select(new_dims, tuple(slices))
+                updates[i] = (sel_update, arr)
+
+        # if the shape has shrunk in any dimension, do a flush now
         do_flush = False
         for i in range(len(new_dims)):
             if new_dims[i] < old_dims[i]:
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 93dd8bcb..75b06913 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -422,7 +422,6 @@ def __getitem__(self, args):
         start, count, step, scalar = _handle_simple(self._shape, args)
         self._sel = (start, count, step, scalar)
 
-        # self._id.select_hyperslab(start, count, step)
         self._select_type = H5S_SELECT_HYPERSLABS
 
         self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
diff --git a/src/h5json/shape_util.py b/src/h5json/shape_util.py
index 753603cd..5f96c392 100644
--- a/src/h5json/shape_util.py
+++ b/src/h5json/shape_util.py
@@ -40,8 +40,8 @@ def getShapeJson(dims, maxdims=None):
         datasets) """
     if isinstance(dims, int):
         dims = (dims, )
-    if isinstance(maxdims, int):
-        maxdims = (maxdims, )
+    elif dims == "H5S_NULL":
+        dims = None
     if dims is None:
         shape_class = "H5S_NULL"
     elif len(dims) == 0:
@@ -58,6 +58,8 @@ def getShapeJson(dims, maxdims=None):
     if maxdims is not None:
         if shape_class != "H5S_SIMPLE":
             raise ValueError(f"maxdims can not be used with shape class: {shape_class}")
+        if isinstance(maxdims, int):
+            maxdims = (maxdims, )
         if len(maxdims) != len(dims):
             raise ValueError("maxdims must match dataspace rank")
         # convert any 0 or None vlues to "H5S_UNLIMITED"
diff --git a/test/unit/shape_util_test.py b/test/unit/shape_util_test.py
index 61c78e51..4266e2a9 100644
--- a/test/unit/shape_util_test.py
+++ b/test/unit/shape_util_test.py
@@ -12,7 +12,7 @@
 import unittest
 import logging
 
-from h5json.shape_util import getShapeClass, getShapeDims, getNumElements, getRank
+from h5json.shape_util import getShapeClass, getShapeDims, getNumElements, getRank, getShapeJson
 from h5json.shape_util import isNullSpace, isScalar, getDataSize, isExtensible, getMaxDims
 
 
@@ -23,6 +23,64 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.WARNING)
 
+    def testGetShape(self):
+
+        null_shape = getShapeJson("H5S_NULL")
+        self.assertTrue("class" in null_shape)
+        self.assertEqual(null_shape["class"], "H5S_NULL")
+        self.assertFalse("dims" in null_shape)
+        self.assertFalse("maxdims" in null_shape)
+
+        null_shape = getShapeJson(None)
+        self.assertTrue("class" in null_shape)
+        self.assertEqual(null_shape["class"], "H5S_NULL")
+        self.assertFalse("dims" in null_shape)
+        self.assertFalse("maxdims" in null_shape)
+
+        scalar_shape = getShapeJson(())
+        self.assertTrue("class" in scalar_shape)
+        self.assertEqual(scalar_shape["class"], "H5S_SCALAR")
+        self.assertTrue("dims" not in scalar_shape)
+        self.assertFalse("maxdims" in scalar_shape)
+
+        simple_shape = getShapeJson(42)
+        self.assertTrue("class" in simple_shape)
+        self.assertEqual(simple_shape["class"], "H5S_SIMPLE")
+        self.assertTrue("dims" in simple_shape)
+        self.assertEqual(simple_shape["dims"], (42, ))
+        self.assertFalse("maxdims" in simple_shape)
+
+        simple_shape = getShapeJson((42, ))
+        self.assertTrue("class" in simple_shape)
+        self.assertEqual(simple_shape["class"], "H5S_SIMPLE")
+        self.assertTrue("dims" in simple_shape)
+        self.assertEqual(simple_shape["dims"], (42, ))
+        self.assertFalse("maxdims" in simple_shape)
+
+        extendable_shape = getShapeJson((4, 5), maxdims=("H5S_UNLIMITED", 10))
+        self.assertTrue("class" in extendable_shape)
+        self.assertEqual(extendable_shape["class"], "H5S_SIMPLE")
+        self.assertTrue("dims" in extendable_shape)
+        self.assertEqual(extendable_shape["dims"], (4, 5))
+        self.assertTrue("maxdims" in extendable_shape)
+        self.assertTrue(extendable_shape["maxdims"], ("H5S_UNLIMITED", 10))
+
+        extendable_shape = getShapeJson((4, 5), maxdims=(None, 10))
+        self.assertTrue("class" in extendable_shape)
+        self.assertEqual(extendable_shape["class"], "H5S_SIMPLE")
+        self.assertTrue("dims" in extendable_shape)
+        self.assertEqual(extendable_shape["dims"], (4, 5))
+        self.assertTrue("maxdims" in extendable_shape)
+        self.assertTrue(extendable_shape["maxdims"], ("H5S_UNLIMITED", 10))
+
+        extendable_shape = getShapeJson((4, 5), maxdims=(0, 10))
+        self.assertTrue("class" in extendable_shape)
+        self.assertEqual(extendable_shape["class"], "H5S_SIMPLE")
+        self.assertTrue("dims" in extendable_shape)
+        self.assertEqual(extendable_shape["dims"], (4, 5))
+        self.assertTrue("maxdims" in extendable_shape)
+        self.assertTrue(extendable_shape["maxdims"], ("H5S_UNLIMITED", 10))
+
     def testSimple(self):
 
         type_json = {

From b4485eb44675b32c38e565776b6d731f004f8a84 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 2 Jan 2026 11:20:43 +0800
Subject: [PATCH 107/129] test broadcasting

---
 src/h5json/hdf5db.py          |  2 +-
 test/unit/h5py_writer_test.py | 30 ++++++++++++++++++++++++------
 test/unit/hdf5db_test.py      | 17 +++++++++++++++++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index a901d0ba..b4758ff0 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -753,7 +753,7 @@ def setDatasetValues(self, dset_id, sel, arr):
             if sel.select_type != selections.H5S_SELECT_HYPERSLABS:
                 raise ValueError("tbd")
             arr = arr.reshape(sel.mshape)
-        updates.append((sel, arr.copy()))
+        updates.append((sel, arr))
         self.make_dirty(dset_id)
 
     def resizeDataset(self, dset_id, shape):
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 6e208ada..567c1439 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -97,12 +97,13 @@ def testSimple(self):
         g1_1_id = db.createGroup()
         db.createHardLink(g1_id, "g1.1", g1_1_id)
         dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
-        arr = np.zeros((10, 10), dtype=np.int32)
-        for i in range(10):
-            for j in range(10):
-                arr[i, j] = i * j
+
+        # try setting dset values with broadcasting
+        arr_one_value = np.zeros((1, 1), dtype=np.int32)
+        arr_one_value[0, 0] = 42
         sel_all = selections.select((10, 10), ...)
-        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.setDatasetValues(dset_111_id, sel_all, arr_one_value)
+
         db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
         db.createSoftLink(g2_id, "slink", "somewhere")
         db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
@@ -126,12 +127,29 @@ def testSimple(self):
             self.assertEqual(dset.shape, (10, 10))
             for i in range(10):
                 for j in range(10):
-                    self.assertEqual(dset[i, j], i * j)
+                    self.assertEqual(dset[i, j], 42)
             self.assertTrue("g2" in f)
             g2 = f["g2"]
             self.assertTrue("extlink" in g2)
             self.assertTrue("slink" in g2)
 
+        # write dataset values element by element
+        db.open()
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.close()
+
+        # verify changes in h5py
+        with h5py.File(filepath) as f:
+            dset = f["/g1/g1.1/dset1.1.1"]
+            for i in range(10):
+                for j in range(10):
+                    self.assertEqual(dset[i, j], i * j)
+
         db.open()
         db.createAttribute(g1_id, "a1", "hello")
         db.createAttribute(g1_id, "a2", "bye-bye")
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index f7d27f76..b3b4891c 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -478,6 +478,23 @@ def testSimpleDataset(self):
                 self.assertEqual(val.shape, (1, 1))
                 self.assertEqual(val[0, 0], i * 10 + j)
 
+        # test select all write
+        sel = selections.select(shape, ...)
+        print("got sel:", sel)
+        print(sel.select_type)
+        arr = np.zeros(shape, dtype=dtype)
+        arr[...] = 42
+        db.setDatasetValues(dset_id, sel, arr)
+        arr = db.getDatasetValues(dset_id, sel)
+        for i in range(nrows):
+            for j in range(ncols):
+                self.assertEqual(arr[i, j], 42)
+
+        # try with broadcasting
+        arr_one_value = np.zeros((1, 1), dtype=dtype)
+        arr_one_value[0, 0] = 7
+        db.setDatasetValues(dset_id, sel, arr_one_value)
+
         db.close()
 
     def testStringDataset(self):

From bb4d148a213e5ae53fd6d7e1f81cb61faff62cb2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 2 Jan 2026 18:55:35 +0800
Subject: [PATCH 108/129] added data limit option to json writer

---
 src/h5json/apps/h5tojson.py           | 16 +++--
 src/h5json/hdf5db.py                  |  2 +-
 src/h5json/jsonstore/h5json_writer.py | 33 +++++++--
 test/unit/h5json_writer_test.py       | 99 +++++++++++++++++++++++----
 test/unit/hdf5db_test.py              |  5 +-
 5 files changed, 127 insertions(+), 28 deletions(-)

diff --git a/src/h5json/apps/h5tojson.py b/src/h5json/apps/h5tojson.py
index 284de84c..24b5716e 100755
--- a/src/h5json/apps/h5tojson.py
+++ b/src/h5json/apps/h5tojson.py
@@ -20,14 +20,22 @@
 
 def main():
     if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
-        print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file>")
+        print(f"usage: {sys.argv[0]} [-h] [--nodata] [--data-limit n] <hdf5_file>")
         sys.exit(0)
 
-    no_data = False
+    data_limit = None
     filename = None
     for i in range(1, len(sys.argv)):
         if sys.argv[i] == "--nodata":
-            no_data = True
+            data_limit = 0
+        elif sys.argv[i] == "--data-limit":
+            i += 1
+            if i >= len(sys.argv):
+                sys.exit("Error: --data-limit requires a numeric argument")
+            try:
+                data_limit = int(sys.argv[i])
+            except ValueError:
+                sys.exit("Error: --data-limit requires a numeric argument")
         else:
             filename = sys.argv[i]
 
@@ -45,7 +53,7 @@ def main():
 
     db = Hdf5db(app_logger=log)
     db.reader = H5pyReader(filename, app_logger=log)
-    db.writer = H5JsonWriter(None, no_data=no_data, app_logger=log)
+    db.writer = H5JsonWriter(None, data_limit=data_limit, app_logger=log)
     db.open()  # read HDF5 data into db
     db.close()  # close will trigger write to json file
 
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index b4758ff0..0085b93e 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -215,7 +215,7 @@ def flush(self):
             # flush not successful, don't clear dirty set
             self.log.error("writer flush failed")
             return False
-
+        self.log.debug("clearing new, dirty, deleted sets")
         # reset new, dirty and deleted sets
         self._new_objects.clear()
         self._dirty_objects.clear()
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index f37ac415..f97df007 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -17,6 +17,7 @@
 from ..h5writer import H5Writer
 from ..objid import getUuidFromId, getCollectionForId, createObjId
 from ..array_util import bytesArrayToList
+from ..hdf5dtype import getItemSize
 from .. import selections
 
 
@@ -30,15 +31,20 @@ def __init__(
         self,
         filepath,
         append=False,
-        no_data=False,
+        data_limit=None,
+        indent=4,
         app_logger=None
     ):
+        no_data = True if data_limit == 0 else False
         super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
         if append:
             raise ValueError("H5JsonWriter does not support append mode")
         self.alias_db = {}
         self.json = {}
+        self._data_limit = data_limit
         self._root_id = None
+        self._indent = indent
+        self._file_dumped = False
 
     def flush(self):
         """ Write dirty items """
@@ -49,7 +55,12 @@ def flush(self):
             raise IOError(msg)
 
         self.log.info("flush")
-        self.dumpFile()
+        if self._file_dumped:
+            self.log.info("flush: file already dumped, nothing to do")
+        else:
+            self.dumpFile()
+            self._file_dumped = True
+
         return True
 
     def open(self):
@@ -196,7 +207,8 @@ def dumpDataset(self, obj_id):
         alias = self.getAliasList(obj_id)
         response["alias"] = alias
 
-        response["type"] = item["type"]
+        type_item = item["type"]
+        response["type"] = type_item
         shapeItem = item["shape"]
         shape_rsp = {}
         num_elements = 1
@@ -229,8 +241,15 @@ def dumpDataset(self, obj_id):
         attributes = self.dumpAttributes(obj_id)
         if attributes:
             response["attributes"] = attributes
-
-        if not self.no_data:
+        if self._data_limit is not None:
+            item_size = getItemSize(type_item)
+            if item_size == "H5T_VARIABLE":
+                item_size = 1024  # assume average size for variable length types
+            total_size = item_size * num_elements
+
+            if total_size > self._data_limit:
+                self.log.info(f"skipping data dump for dataset {obj_id} with {num_elements} elements")
+        if self._data_limit is None or total_size <= self._data_limit:
             if num_elements > 0:
                 sel_all = selections.select(dims, ...)
                 arr = self.db.getDatasetValues(obj_id, sel_all)
@@ -287,10 +306,10 @@ def dumpFile(self):
         self.dumpDatasets()
 
         self.dumpDatatypes()
-        indent = 4
+        indent = self._indent
         ensure_ascii = True
         if self._filepath:
-            with open('data.json', 'w', encoding='utf-8') as f:
+            with open(self._filepath, 'w', encoding='utf-8') as f:
                 json.dump(self.json, f, ensure_ascii=ensure_ascii, indent=indent)
         else:
             print(json.dumps(self.json, sort_keys=True, ensure_ascii=ensure_ascii, indent=indent))
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
index ba2cbc19..bb1b8a5d 100644
--- a/test/unit/h5json_writer_test.py
+++ b/test/unit/h5json_writer_test.py
@@ -11,10 +11,12 @@
 ##############################################################################
 import unittest
 import time
+from os.path import getsize
 import logging
 import numpy as np
 from h5json import Hdf5db
 from h5json.jsonstore.h5json_writer import H5JsonWriter
+
 from h5json.hdf5dtype import special_dtype, Reference
 from h5json import selections
 
@@ -44,7 +46,7 @@ def __init__(self, *args, **kwargs):
 
     def testSimple(self):
 
-        filepath = "test/unit/out/h5json_writer_testSimple.h5"
+        filepath = "test/unit/out/h5json_writer_testSimple.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -71,12 +73,12 @@ def testSimple(self):
         db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
         db.createCustomLink(g2_id, "cust", {"foo": "bar"})
         self.assertTrue(db.writer.lastModified is None)  # no update yet
-        db.flush()
+        db.close()
         self.assertTrue(db.writer.lastModified > 0)  # timestamp should be updated
 
     def testNullSpaceAttribute(self):
 
-        filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -90,9 +92,10 @@ def testNullSpaceAttribute(self):
         self.assertTrue(item["created"] > time.time() - 1.0)
         value = db.getAttributeValue(root_id, "A1")
         self.assertEqual(value, None)
+        db.close()
 
     def testScalarAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testScalarAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testScalarAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -116,9 +119,10 @@ def testScalarAttribute(self):
 
         self.assertEqual(item_type["class"], "H5T_INTEGER")
         self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db.close()
 
     def testFixedStringAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -138,9 +142,10 @@ def testFixedStringAttribute(self):
         self.assertTrue(item["created"] > now - 1)
         ret_value = db.getAttributeValue(root_id, "A1")
         self.assertEqual(ret_value, b'Hello, world!')
+        db.close()
 
     def testVlenAsciiAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -163,9 +168,10 @@ def testVlenAsciiAttribute(self):
         self.assertEqual(item["value"], "Hello, world!")
         now = int(time.time())
         self.assertTrue(item["created"] > now - 1)
+        db.close()
 
     def testVlenUtf8Attribute(self):
-        filepath = "test/unit/out/h5json_writer_testVlenutf8Attribute.h5"
+        filepath = "test/unit/out/h5json_writer_testVlenutf8Attribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -188,9 +194,10 @@ def testVlenUtf8Attribute(self):
         self.assertEqual(item["value"], "Hello, world!")
         now = int(time.time())
         self.assertTrue(item["created"] > now - 1)
+        db.close()
 
     def testIntAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testIntAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testIntAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -207,9 +214,10 @@ def testIntAttribute(self):
         item_type = item["type"]
         self.assertEqual(item_type["class"], "H5T_INTEGER")
         self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db.close()
 
     def testCreateReferenceAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testCreateReferenceAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testCreateReferenceAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -223,19 +231,22 @@ def testCreateReferenceAttribute(self):
         ds1_ref = "datasets/" + dset_id
         value = [ds1_ref,]
         db.createAttribute(root_id, "A1", value, dtype=dt)
-        item = db.getAttribute(root_id, "A1")
         attr = db.getAttribute(root_id, "A1")
         self.assertTrue("shape" in attr)
+        shape = attr["shape"]
+        self.assertEqual(shape["class"], "H5S_SIMPLE")
+        self.assertEqual(shape["dims"], [1,])
 
         attr_type = attr["type"]
         self.assertEqual(attr_type["class"], "H5T_REFERENCE")
         self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
-        attr_value = item["value"]
+        attr_value = attr["value"]
         self.assertEqual(len(attr_value), 1)
         self.assertEqual(attr_value[0], ds1_ref)
+        db.close()
 
     def testCreateVlenReferenceAttribute(self):
-        filepath = "test/unit/out/h5json_writer_testVlenReferenceAttribute.h5"
+        filepath = "test/unit/out/h5json_writer_testVlenReferenceAttribute.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -268,9 +279,10 @@ def testCreateVlenReferenceAttribute(self):
 
         item_shape = item["shape"]
         self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        db.close()
 
     def testCommittedType(self):
-        filepath = "test/unit/out/h5json_writer_testCommittedType.h5"
+        filepath = "test/unit/out/h5json_writer_testCommittedType.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -299,9 +311,10 @@ def testCommittedType(self):
         self.assertEqual(attr_type["class"], "H5T_STRING")
         self.assertEqual(attr_type["length"], 15)
         self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        db.close()
 
     def testCommittedCompoundType(self):
-        filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.h5"
+        filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.json"
 
         db = Hdf5db(app_logger=self.log)
         db.writer = H5JsonWriter(filepath, app_logger=self.log)
@@ -340,6 +353,64 @@ def testCommittedCompoundType(self):
 
         value = db.getAttributeValue(root_id, "A1")
         self.assertTrue(isinstance(value, np.ndarray))
+        db.close()
+
+    def testNoData(self):
+
+        def init_db(db):
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+            db.createAttribute(root_id, "attr2", 42)
+            g1_id = db.createGroup()
+            db.createHardLink(root_id, "g1", g1_id)
+            g2_id = db.createGroup()
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+            arr = np.zeros((10, 10), dtype=np.int32)
+            for i in range(10):
+                for j in range(10):
+                    arr[i, j] = i * j
+            sel_all = selections.select((10, 10), ...)
+            db.setDatasetValues(dset_111_id, sel_all, arr)
+            dset_0_id = db.createDataset(shape=(), dtype=np.int32)
+            arr = np.zeros((), dtype=np.int32)
+            arr[()] = 42
+            sel_all = selections.select((), ...)
+            db.setDatasetValues(dset_0_id, sel_all, arr)
+            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+            db.createHardLink(g1_1_id, "dset0", dset_0_id)
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+
+        def save_json(filepath, data_limit=None):
+            db = Hdf5db(app_logger=self.log)
+            kwargs = {"indent": 2, "app_logger": self.log}
+            db.writer = H5JsonWriter(filepath, data_limit=data_limit, **kwargs)
+            db.open()
+            init_db(db)
+            db.close()
+            file_size = getsize(filepath)
+            return file_size
+
+        file_prefix = "test/unit/out/h5json_writer_testNoData_"
+
+        size_with_data = save_json(file_prefix + "withData.json", data_limit=None)
+        # should be close to 4640
+        self.assertTrue(size_with_data > 4000)
+
+        size_without_data = save_json(file_prefix + "withoutData.json", data_limit=0)
+        # should be close to 3038
+        self.assertTrue(size_without_data > 3000)
+        self.assertTrue(size_without_data < 4000)
+
+        size_with_smalldata = save_json(file_prefix + "withSmallData.json", data_limit=100)
+        # should be close to 3057
+        self.assertTrue(size_with_smalldata > size_without_data)
+        self.assertTrue(size_with_smalldata < size_with_data)
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index b3b4891c..e34dd3b3 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -145,6 +145,9 @@ def testGroup(self):
         self.assertEqual(len(db.getAttributes(g1_id)), 2)
         a1_attr = db.getAttribute(g1_id, "a1")
         self.assertEqual(a1_attr["value"], "hello")
+        self.assertTrue("shape" in a1_attr)
+        attr_shape = a1_attr["shape"]
+        self.assertEqual(attr_shape["class"], "H5S_SCALAR")
 
         db.deleteAttribute(g1_id, "a1")
         self.assertEqual(len(db.getAttributes(g1_id)), 1)
@@ -480,8 +483,6 @@ def testSimpleDataset(self):
 
         # test select all write
         sel = selections.select(shape, ...)
-        print("got sel:", sel)
-        print(sel.select_type)
         arr = np.zeros(shape, dtype=dtype)
         arr[...] = 42
         db.setDatasetValues(dset_id, sel, arr)

From db47efae7f4cbc2fc060e04ab8ee2477032b391c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 6 Jan 2026 19:05:39 +0800
Subject: [PATCH 109/129] fix for H5S_UNLIMITED

---
 src/h5json/dset_util.py | 2 +-
 src/h5json/hdf5db.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
index 50340438..d3f8dfba 100644
--- a/src/h5json/dset_util.py
+++ b/src/h5json/dset_util.py
@@ -256,7 +256,7 @@ def validateLayout(shape_json, type_json, layout):
             if max_dims is None:
                 if chunk_extent > dim_extent:
                     msg = "Invalid layout value"
-                    raise ValueError(reason=msg)
+                    raise ValueError(msg)
             elif max_dims[i] not in (0, "H5S_UNLIMITED"):
                 if chunk_extent > max_dims[i]:
                     msg = "Invalid layout value for extensible dimension"
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 0085b93e..d3ceeb01 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -346,7 +346,7 @@ def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
         self._checkReader()
         obj_id = getHashTagForId(obj_id)
-        if obj_id not in self.db or refresh:
+        if obj_id not in self.db or (refresh and not self.is_new(obj_id)):
             # load the obj from the reader
             self.log.debug(f"getObjectById - fetching {obj_id} from reader")
             obj_json = self.reader.getObjectById(obj_id)

From 3a2e6b29fbddc51fb6ed130e8197b1b2deeb9542 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 8 Jan 2026 13:12:24 +0800
Subject: [PATCH 110/129] added link_util file

---
 src/h5json/link_util.py | 146 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 src/h5json/link_util.py

diff --git a/src/h5json/link_util.py b/src/h5json/link_util.py
new file mode 100644
index 00000000..5d659210
--- /dev/null
+++ b/src/h5json/link_util.py
@@ -0,0 +1,146 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+#
+# link_util:
+# link related functions
+#
+from h5json.objid import isValidUuid
+
+
+def validateLinkName(name):
+    """ verify the link name is valid """
+    if not isinstance(name, str):
+        msg = "Unexpected type for link name"
+        raise ValueError(msg)
+    if name.find("/") >= 0:
+        msg = "link name contains slash"
+        raise ValueError(msg)
+
+
+def getLinkClass(link_json):
+    """ verify this is a valid link
+        returns the link class """
+    if "class" in link_json:
+        link_class = link_json["class"]
+    else:
+        link_class = None
+    if "h5path" in link_json and "id" in link_json:
+        msg = "link tgt_id and h5path both set"
+        raise ValueError(msg)
+    if "id" in link_json:
+        tgt_id = link_json["id"]
+        if not isValidUuid(tgt_id):
+            msg = f"link with invalid id: {tgt_id}"
+            raise ValueError(msg)
+        if link_class:
+            if link_class != "H5L_TYPE_HARD":
+                msg = f"expected link class to be H5L_TYPE_HARD but got: {link_class}"
+                raise ValueError(msg)
+        else:
+            link_class = "H5L_TYPE_HARD"
+    elif link_json.get("h5path"):
+        if link_json.get("h5domain") or link_json.get("file"):
+            if link_class:
+                if link_class != "H5L_TYPE_EXTERNAL":
+                    msg = f"expected link class to be H5L_TYPE_EXTERNAL but got: {link_class}"
+                    raise ValueError(msg)
+            else:
+                link_class = "H5L_TYPE_EXTERNAL"
+        else:
+            if link_class:
+                if link_class != "H5L_TYPE_SOFT":
+                    msg = f"expected link class to be H5L_TYPE_SOFT but got: {link_class}"
+                    raise ValueError(msg)
+            else:
+                link_class = "H5L_TYPE_SOFT"
+    else:
+        msg = "link with no id or h5path"
+        raise ValueError(msg)
+
+    return link_class
+
+
+def getLinkId(link_json):
+    """ return id for hard links, otherwise raise type error """
+    if getLinkClass(link_json) != "H5L_TYPE_HARD":
+        raise TypeError("expected hard link")
+    return link_json["id"]
+
+
+def getLinkPath(link_json):
+    """ Returns h5path for soft or external link.  Otherwise raise type error """
+
+    if getLinkClass(link_json) not in ("H5L_TYPE_SOFT", "H5L_TYPE_EXTERNAL"):
+        raise TypeError("expected soft or external link")
+
+    return link_json["h5path"]
+
+
+def getLinkFilePath(link_json):
+    """ return file path for an external link.  Otherwise raise type error """
+    if getLinkClass(link_json) != "H5L_TYPE_EXTERNAL":
+        raise TypeError("expected External Link")
+    if "file" in link_json:
+        link_file = link_json["file"]
+    elif "h5domain" in link_json:
+        # h5domain was the deprecated storage key
+        # check for backward compatibility
+        link_file = link_json["h5domain"]
+    else:
+        raise KeyError("unexpected link format")
+    return link_file
+
+
+def isEqualLink(link1, link2):
+    """ Return True if the two links are the same """
+
+    for obj in (link1, link2):
+        if not isinstance(obj, dict):
+            raise TypeError(f"unexpected type: {type(obj)}")
+        if "class" not in obj:
+            raise TypeError("expected class key for link")
+    link_class = getLinkClass(link1)
+    if link_class != getLinkClass(link2):
+        return False  # different link types
+    if link_class == "H5L_TYPE_HARD":
+        if getLinkId(link1) != getLinkId(link2):
+            return False
+        else:
+            return True
+    elif link_class == "H5L_TYPE_SOFT":
+        if getLinkPath(link1) != getLinkPath(link2):
+            return False
+        else:
+            return True
+    elif link_class == "H5L_TYPE_EXTERNAL":
+        if getLinkPath(link1) != getLinkPath(link2):
+            return False
+        if getLinkFilePath(link1) != getLinkFilePath(link2):
+            return False
+        return True
+    else:
+        raise TypeError(f"unexpected link class: {link_class}")
+
+
+def h5Join(path, paths):
+    """ join the paths """
+
+    h5path = path
+    if not paths:
+        return h5path
+    if isinstance(paths, str):
+        paths = (paths,)
+    for s in paths:
+        if h5path[-1] != "/":
+            h5path += "/"
+        h5path += s
+    return h5path

From c8f2aa35261f497f535bfc84f87301c1435c1a83 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 13 Jan 2026 18:24:51 -0800
Subject: [PATCH 111/129] fix circular import

---
 src/h5json/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/h5json/__init__.py b/src/h5json/__init__.py
index d4a7f781..d44ab67c 100644
--- a/src/h5json/__init__.py
+++ b/src/h5json/__init__.py
@@ -30,6 +30,3 @@
 from .objid import isSchema2Id
 from .objid import isRootObjId
 from .hdf5db import Hdf5db
-from . import _version
-
-__version__ = _version.__version__

From 40c47052bc294693b9cc94cca740d19f02d75fad Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 27 Jan 2026 22:33:58 -0800
Subject: [PATCH 112/129] fix for vlen types

---
 src/h5json/array_util.py            | 76 ++++++++++++++++++++++-------
 src/h5json/h5pystore/h5py_writer.py | 29 ++++++++++-
 src/h5json/hdf5db.py                |  6 ++-
 src/h5json/hdf5dtype.py             | 23 ++++++---
 test/unit/array_util_test.py        | 66 +++++++++++++++----------
 test/unit/hdf5db_test.py            |  1 +
 6 files changed, 149 insertions(+), 52 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index e57a3892..39966715 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -15,7 +15,7 @@
 import binascii
 import numpy as np
 
-from .hdf5dtype import isVlen, is_float16_dtype, guess_dtype
+from .hdf5dtype import isVlen, is_float16_dtype, guess_dtype, vlenBaseType
 
 MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
 
@@ -108,8 +108,6 @@ def jsonToArray(data_shape, data_dtype, data_json):
     Return numpy array from the given json array.
     """
 
-    # print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
-
     def get_array(data, rank, dtype):
         # helper function to create an array with encoding if needed
         try:
@@ -120,28 +118,72 @@ def get_array(data, rank, dtype):
             arr = np.array(data, dtype=dtype)
         return arr
 
-    if data_json is None:
-        return np.array([]).astype(data_dtype)
+    def fillVlenArray(rank, data, arr, index):
+        for i in range(len(data)):
+            if rank > 1:
+                index = fillVlenArray(rank - 1, data[i], arr, index)
+            elif len(arr.dtype) > 0:
+                # deal with compound dtype
+                element_data = data[i]
+                arr_element = []
+                for j in range(len(arr.dtype)):
+                    compound_data = element_data[j]
+                    compound_dtype = arr.dtype[j]
+                    if isVlen(compound_dtype):
+                        base_dt = vlenBaseType(compound_dtype)
+                        if base_dt is str and isinstance(compound_data, bytes):
+                            compound_data = compound_data.decode('utf8')
+                        if base_dt in (str, bytes):
+                            arr_element.append(compound_data)
+                        else:
+                            arr_element.append(np.array(compound_data, base_dt))
+                    else:
+                        arr_element.append(compound_data)
+                arr[i] = tuple(arr_element)
+                index += 1
+            else:
+                base_dt = vlenBaseType(arr.dtype)
+                element_data = data[i]
+                # If base dtype is str and data is bytes, decode it first
+                if base_dt is str and isinstance(element_data, bytes):
+                    element_data = element_data.decode('utf8')
+                arr_element = np.array(element_data, base_dt)
+                arr[index] = arr_element
+                index += 1
+        return index
 
-    if isinstance(data_json, (list, tuple)):
-        if None in data_json:
-            return np.array([]).astype(data_dtype)
+    if data_json is None:
+        return np.array(data_shape).astype(data_dtype)
 
-    # need some special conversion for compound types --
-    # each element must be a tuple, but the JSON decoder
-    # gives us a list instead.
-    if len(data_dtype) > 0 and not isinstance(data_json, (list, tuple)):
-        raise TypeError("expected list data for compound data type")
     npoints = getNumElements(data_shape)
     np_shape_rank = len(data_shape)
 
-    if type(data_json) in (list, tuple):
-        data_json = toTuple(np_shape_rank, data_json)
+    was_list_input = type(data_json) in (list, tuple)
+    if was_list_input:
+        converted_data = []
+        if npoints == 1 and len(data_json) == len(data_dtype):
+            converted_data.append(toTuple(0, data_json))
+        else:
+            converted_data = toTuple(np_shape_rank, data_json)
+        data_json = converted_data
+    else:
+        if isinstance(data_json, str):
+            data_json = data_json.encode("utf8")
+        data_json = [data_json,]  # listify
 
     if isVlen(data_dtype):
-        # for vlen data we need to initialize of zero numpy array to ensure the right shape
+        # For scalar vlen where input was a list with multiple items (e.g. ['ref1', 'ref2']
+        # for vlen refs), the items represent vlen contents for the single scalar, not
+        # separate array elements. Wrap so fillVlenArray sees one element.
+        # Skip wrapping if already has 1 element (e.g. [('foo', 'bar')] is already correct).
+        if np_shape_rank == 0 and len(data_dtype) == 0 and was_list_input and len(data_json) > 1:
+            data_json = [data_json]
+        # for vlen data we need to initialize a zero numpy array to ensure the right shape
+        arr = np.zeros((npoints,), dtype=data_dtype)
+        fillVlenArray(np_shape_rank, data_json, arr, 0)
+    elif all(e is None for e in data_json):
+        # just create a zero array
         arr = np.zeros(data_shape, dtype=data_dtype)
-        arr[...] = data_json
     else:
         try:
             arr = get_array(data_json, np_shape_rank, data_dtype)
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index b801af83..0bb7fc9d 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -15,7 +15,7 @@
 import time
 
 from ..objid import getCollectionForId, isValidUuid, createObjId
-from ..hdf5dtype import createDataType
+from ..hdf5dtype import createDataType, isVlen, vlenBaseType
 from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
 from ..shape_util import getShapeDims, getShapeClass, isExtensible, getMaxDims
 from ..array_util import jsonToArray
@@ -141,6 +141,32 @@ def _copy_array(self, src_arr, fout=None):
                 element = self._copy_element(e, src_arr.dtype, tgt_dt, fout=fout)
                 tgt_arr_flat[i] = element
             tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
+        elif len(src_arr.dtype) == 0 and isVlen(src_arr.dtype) and vlenBaseType(src_arr.dtype) in (bytes, str):
+            # vlen strings need elements converted to Python str for h5py
+            count = int(np.prod(src_arr.shape))
+            tgt_dt = h5py.special_dtype(vlen=str)
+            tgt_arr = np.zeros(src_arr.shape, dtype=tgt_dt)
+            tgt_arr_flat = tgt_arr.reshape((count,))
+            src_arr_flat = src_arr.reshape((count,))
+            for i in range(count):
+                e = src_arr_flat[i]
+                if isinstance(e, str):
+                    tgt_arr_flat[i] = e
+                elif isinstance(e, bytes):
+                    tgt_arr_flat[i] = e.decode('utf-8')
+                elif isinstance(e, np.ndarray) and e.dtype.kind == 'S':
+                    # numpy byte string array - convert to Python string
+                    tgt_arr_flat[i] = e.item().decode('utf-8')
+                elif isinstance(e, np.ndarray) and e.dtype.kind == 'U':
+                    # numpy unicode array - get Python string
+                    tgt_arr_flat[i] = e.item()
+                elif isinstance(e, np.bytes_):
+                    tgt_arr_flat[i] = e.decode('utf-8')
+                elif isinstance(e, np.str_):
+                    tgt_arr_flat[i] = str(e)
+                else:
+                    tgt_arr_flat[i] = e
+            tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
         else:
             # can just copy the entire array
             tgt_arr[...] = src_arr[...]
@@ -366,6 +392,7 @@ def initializeDatasetValues(self, dset_id, dset):
         sel_all = selections.select(dset.shape, ...)
         arr = self.db.getDatasetValues(dset_id, sel_all)
         if arr is not None:
+            arr = self._copy_array(arr, fout=dset.file)
             dset[...] = arr
 
     def createAttribute(self, obj, name, attr_json):
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index d3ceeb01..d19f7da5 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -574,7 +574,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         else:
             value_json = None
 
-        if shape is None:
+        if shape is None and value is not None:
             shape = value.shape
         if shape == "H5S_NULL":
             shape_json = {"class": "H5S_NULL"}
@@ -588,7 +588,9 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         attrs_json = obj_json["attributes"]
         type_json = getTypeItem(dtype)
         # finally put it all together...
-        attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
+        attr_json = {"shape": shape_json, "type": type_json}
+        if shape != "H5S_NULL":
+            attr_json["value"] = value_json
         attr_json["created"] = getNow()
 
         # slot into the obj_json["attrs"]
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index c0ed2884..defd09a2 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -235,7 +235,7 @@ class (either Reference or RegionReference).  Returns None if the dtype
     name, dt = kwds.popitem()
 
     if name not in ("vlen", "enum", "ref"):
-        raise TypeError('Unknown special type "%s"' % name)
+        raise TypeError(f"Unknown special type {name}")
 
     try:
         return dt.metadata[name]
@@ -341,11 +341,7 @@ def getTypeItem(dt, metadata=None):
         # vlen string or data
         #
         # check for h5py variable length extension
-        vlen_check = None
-        if metadata and "vlen" in metadata:
-            vlen_check = metadata["vlen"]
-            if vlen_check is not None and not isinstance(vlen_check, np.dtype):
-                vlen_check = np.dtype(vlen_check)
+        vlen_check = vlenBaseType(dt)
 
         if metadata and "ref" in metadata:
             ref_check = metadata["ref"]
@@ -509,6 +505,21 @@ def isVlen(dt):
     return is_vlen
 
 
+def vlenBaseType(dt):
+    """
+    Return the base dtype of a vlen, otherwise none
+    """
+    if len(dt):
+        raise TypeError("BaseType can't be deterined for compound type")
+    if dt.base.metadata and "vlen" in dt.base.metadata:
+        base_dt = dt.base.metadata["vlen"]
+        if base_dt not in (bytes, str):
+            base_dt = np.dtype(base_dt)
+    else:
+        base_dt = None
+    return base_dt
+
+
 def isOpaqueDtype(dt):
     """
     Return True if this is an opaque dtype
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 1ede343d..b3b7c266 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -234,7 +234,8 @@ def testJsonToArray(self):
         self.assertTrue("vlen" in out.dtype.metadata)
         self.assertEqual(out.dtype.metadata["vlen"], bytes)
         self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out[2], "three")
+        e = out[2]
+        self.assertEqual(e, "three".encode())
 
         # test utf8 strings
         dt = np.dtype("S26")
@@ -277,9 +278,13 @@ def testJsonToArray(self):
         # VLEN data
         shape = []
         dt = special_dtype(vlen=np.dtype("S10"))
-        data = ["foo", "bar"]
+        data = [("foo", "bar")]
         out = jsonToArray(shape, dt, data)
+
         self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertEqual(out[()][0], b'foo')
+        self.assertEqual(out[()][1], b'bar')
 
         dt = special_dtype(vlen=np.dtype("int32"))
         shape = [4, ]
@@ -298,8 +303,11 @@ def testJsonToArray(self):
         self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
         for i in range(4):
             e = out[i]  # .tolist()
-            self.assertTrue(isinstance(e, tuple))
-            self.assertEqual(e, tuple(range(1, i + 2)))
+            self.assertTrue(isinstance(e, np.ndarray))
+            self.assertEqual(e.shape, (i + 1,))
+            self.assertEqual(e.dtype, np.dtype("int32"))
+            for j in range(i + 1):
+                self.assertEqual(e[j], j + 1)
 
         # VLEN 2D data
         dt = special_dtype(vlen=np.dtype("int32"))
@@ -321,10 +329,18 @@ def testJsonToArray(self):
         self.assertEqual(out.shape, (2, 2))
         self.assertEqual(out.dtype.kind, "O")
         self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
-        for i in range(2):
-            for j in range(2):
-                e = out[i, j]  # .tolist()
-                self.assertTrue(isinstance(e, tuple))
+        e = out[0, 0]
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [0])
+        e = out[0, 1]
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [1, 2])
+        e = out[1, 0]
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [1])
+        e = out[1, 1]
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [2, 3])
 
         # create VLEN of obj ref's
         ref_type = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
@@ -352,14 +368,14 @@ def testJsonToArray(self):
         self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("S48"))
 
         e = out[0]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0,))
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [id0,])
         e = out[1]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0, id1))
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [id0, id1])
         e = out[2]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0, id1, id2))
+        self.assertTrue(isinstance(e, np.ndarray))
+        self.assertEqual(list(e), [id0, id1, id2])
 
         # compound type
         dt = np.dtype([("a", "i4"), ("b", "S5")])
@@ -939,7 +955,6 @@ def array_equal(a, b):
             """ compare two values element by element."""
             if type(a) in (list, tuple, np.void, np.ndarray):
                 if len(a) != len(b):
-                    print("number of elements doesn't match")
                     return False
                 nelements = len(a)
                 for i in range(nelements):
@@ -999,15 +1014,16 @@ def array_equal(a, b):
         data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
         arr = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(tuple(arr[0]), (42, 'Hello'))
+        self.assertEqual(tuple(arr[3]), (84, 'Bye'))
         buffer = arrayToBytes(arr)
         self.assertEqual(len(buffer), 40)
 
         expected = bytearray(40)
-        expected[0:8] = b"*\x00\x00\x00\x05\x00\x00\x00"
-        expected[8:19] = b"Hello\x00\x00\x00\x00\x00\x00"
-        expected[19:26] = b"\x00\x00\x00\x00\x00\x00\x00"
-        expected[26:40] = b"\x00\x00\x00T\x00\x00\x00\x03\x00\x00\x00Bye"
-
+        expected[0:10] = b'*\x00\x00\x00\x05\x00\x00\x00He'
+        expected[10:20] = b'llo\x00\x00\x00\x00\x00\x00\x00'
+        expected[20:30] = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00T'
+        expected[30:40] = b'\x00\x00\x00\x03\x00\x00\x00Bye'
         self.assertEqual(buffer, expected)
 
         # convert back to array
@@ -1219,16 +1235,14 @@ def testGetNumpyValueBase64Encoded(self):
 
     def testJsonToArrayOnNoneArray(self):
         data_dtype = np.dtype("i4")
-        data_shape = [0, ]
-        data_json = [None]
+        data_shape = [3, ]
+        data_json = [None, None, None]
         arr = None
-
         try:
             arr = jsonToArray(data_shape, data_dtype, data_json)
         except Exception as e:
             print(f"Exception while testing jsonToArray on array with None elements: {e}")
-
-        self.assertTrue(len(arr) == 0)
+        self.assertEqual(arr.shape, (3, ))
         self.assertTrue(arr.dtype == data_dtype)
 
     def testGetBroadcastShape(self):
@@ -1259,7 +1273,7 @@ def testJsonToArrayOnNoneCompoundArray(self):
 
         arr = jsonToArray(shape, dt, data)
 
-        self.assertEqual(len(arr), 0)
+        self.assertEqual(arr.shape, (1,))
         self.assertEqual(arr.dtype, dt)
 
 
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index e34dd3b3..dcaf92fe 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -164,6 +164,7 @@ def testNullSpaceAttribute(self):
         shape_item = item["shape"]
         self.assertTrue("class" in shape_item)
         self.assertEqual(shape_item["class"], "H5S_NULL")
+        self.assertFalse("value" in item)
         self.assertTrue(item["created"] > time.time() - 1.0)
         value = db.getAttributeValue(root_id, "A1")
         self.assertEqual(value, None)

From 7295f6a4a0f978c7749a12ee03c94a0e366aff02 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 2 Feb 2026 17:45:07 -0800
Subject: [PATCH 113/129] fix for str encoding

---
 src/h5json/array_util.py     | 7 ++++---
 test/unit/array_util_test.py | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 39966715..3888c06f 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -144,11 +144,12 @@ def fillVlenArray(rank, data, arr, index):
             else:
                 base_dt = vlenBaseType(arr.dtype)
                 element_data = data[i]
-                # If base dtype is str and data is bytes, decode it first
                 if base_dt is str and isinstance(element_data, bytes):
                     element_data = element_data.decode('utf8')
-                arr_element = np.array(element_data, base_dt)
-                arr[index] = arr_element
+                if base_dt in (str, bytes):
+                    arr[index] = element_data
+                else:
+                    arr[index] = np.array(element_data, base_dt)
                 index += 1
         return index
 
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index b3b7c266..52d9f668 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -235,7 +235,7 @@ def testJsonToArray(self):
         self.assertEqual(out.dtype.metadata["vlen"], bytes)
         self.assertEqual(out.dtype.kind, "O")
         e = out[2]
-        self.assertEqual(e, "three".encode())
+        self.assertEqual(e, "three")
 
         # test utf8 strings
         dt = np.dtype("S26")
@@ -243,7 +243,7 @@ def testJsonToArray(self):
         data = "eight: \u516b"
         out = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out[()], data.encode("utf8"))
+        self.assertEqual(out[()], data.encode())
 
         dt = special_dtype(vlen=str)
         out = jsonToArray(shape, dt, data)

From 3a9e57374b8d549f8f179d41123c24f25c518eb8 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 3 Feb 2026 13:11:30 -0800
Subject: [PATCH 114/129] update for vlen dsets

---
 src/h5json/array_util.py      | 36 +++++++++++++++++++------------
 test/unit/array_util_test.py  | 23 +++++++++++---------
 test/unit/h5py_writer_test.py | 40 +++++++++++++++++++++++++++++++++++
 test/unit/hdf5db_test.py      | 27 +++++++++++++++++++++++
 4 files changed, 102 insertions(+), 24 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 3888c06f..44f245ad 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -443,22 +443,22 @@ def readElement(buffer, offset, arr, index, dt):
             offset += 4
             n = offset
             m = offset + count
-            if count > 0:
-                e_buffer = buffer[n:m]
-                offset += count
-
-                if vlenBaseType is bytes:
+            if vlenBaseType is bytes or vlenBaseType is str:
+                if count > 0:
+                    e_buffer = buffer[n:m]
+                    offset += count
                     arr[index] = bytes(e_buffer)
-                elif vlenBaseType is str:
-                    s = e_buffer.decode("utf-8")
-                    arr[index] = s
                 else:
-                    try:
-                        e = np.frombuffer(bytes(e_buffer), dtype=vlenBaseType)
-                    except ValueError:
-                        msg = f"Failed to parse vlen data: {e_buffer} with dtype: {vlenBaseType}"
-                        raise ValueError(msg)
-                    arr[index] = e
+                    arr[index] = b""
+            elif count > 0:
+                e_buffer = buffer[n:m]
+                offset += count
+                try:
+                    e = np.frombuffer(bytes(e_buffer), dtype=vlenBaseType)
+                except ValueError:
+                    msg = f"Failed to parse vlen data: {e_buffer} with dtype: {vlenBaseType}"
+                    raise ValueError(msg)
+                arr[index] = e
     return offset
 
 
@@ -703,6 +703,14 @@ def ndarray_compare(arr1, arr2):
     # TBD: this is slow for multi-megabyte vlen arrays, needs to be optimized
     if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
         if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            if not arr1 and not arr2:
+                # treat 0, b"", and "" as equivalent (uninitialized vlen)
+                return True
+            # compare str and bytes by encoding/decoding
+            if isinstance(arr1, str) and isinstance(arr2, bytes):
+                return arr1.encode("utf-8") == arr2
+            if isinstance(arr1, bytes) and isinstance(arr2, str):
+                return arr1 == arr2.encode("utf-8")
             return arr1 == arr2
         if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
             if arr1.size == 0 and not arr2:
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index 52d9f668..ba712d61 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -590,8 +590,14 @@ def testToBytes(self):
         self.assertEqual(buffer, expected)
         # convert back to array
         arr_copy = bytesToArray(buffer, dt, (5,))
-
-        self.assertTrue(ndarray_compare(arr, arr_copy))
+        print("arr_copy[0]:", arr_copy[0])
+        print("arr_copy[0] type:", type(arr_copy[0]))
+        
+        for i in range(4):
+            self.assertTrue(isinstance(arr_copy[i], bytes))
+            self.assertEqual(arr_copy[i].decode(), arr[i])
+        self.assertTrue(isinstance(arr_copy[4], bytes))
+        self.assertEqual(arr_copy[4], b"")
         # VLEN of bytes
         dt = special_dtype(vlen=bytes)
         arr = np.zeros((5,), dtype=dt)
@@ -684,10 +690,7 @@ def testToBytes(self):
 
         self.assertEqual(arr.dtype, arr_copy.dtype)
         self.assertEqual(arr.shape, arr_copy.shape)
-        for i in range(4):
-            e = arr[i]
-            e_copy = arr_copy[i]
-            self.assertTrue(np.array_equal(e, e_copy))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
         #
         # VLEN ascii with array type
         #
@@ -896,10 +899,7 @@ def testArrToBytesBase64(self):
 
         self.assertEqual(arr.dtype, arr_copy.dtype)
         self.assertEqual(arr.shape, arr_copy.shape)
-        for i in range(4):
-            e = arr[i]
-            e_copy = arr_copy[i]
-            self.assertTrue(np.array_equal(e, e_copy))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
         #
         # VLEN ascii with array type
         #
@@ -967,6 +967,9 @@ def array_equal(a, b):
                     a = a.encode("utf8")
                 if isinstance(b, str):
                     b = b.encode("utf8")
+                # treat 0 and b"" as equivalent (uninitialized vlen)
+                if not a and not b:
+                    return True
                 if a != b:
                     return False
 
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 567c1439..36c1dbd9 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -522,6 +522,46 @@ def testCreateVlenReferenceAttribute(self):
             ref_obj = f[a1[0]]
             self.assertEqual(ref_obj.name, "/DS1")
 
+    def testVlenStringDataset(self):
+        filepath = "test/unit/out/h5py_writer_test_testVlenStringDataset.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        nrows = 4
+        shape = (nrows,)
+        dtype = special_dtype(vlen=str)
+        data = ["Hello", "HDF5", "REST", "API"]
+        init_arr = np.array(data, dtype=dtype)
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+
+        db.setDatasetValues(dset_id, sel_all, init_arr)
+
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue(np.array_equal(arr, init_arr))
+        sel_one = selections.select(shape, slice(2, 3))
+        arr = db.getDatasetValues(dset_id, sel_one)
+        self.assertEqual(arr.shape, (1,))
+        self.assertEqual(arr[0], 'REST')
+
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("dset" in f)
+            dset = f["dset"]
+            self.assertEqual(dset.shape, (nrows,))
+            self.assertEqual(dset.dtype, dtype)
+            for i in range(nrows):
+                self.assertEqual(dset[i], data[i].encode())
+
     def testCommittedType(self):
 
         filepath = "test/unit/out/h5py_writer_test_testCommittedType.h5"
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index dcaf92fe..9d89893c 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -566,6 +566,33 @@ def testBoolDataset(self):
 
         db.close()
 
+    def testVlenStringDataset(self):
+        nrows = 4
+        shape = (nrows,)
+        dtype = special_dtype(vlen=str)
+        data = ["Hello", "HDF5", "REST", "API"]
+        init_arr = np.array(data, dtype=dtype)
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+
+        db.setDatasetValues(dset_id, sel_all, init_arr)
+
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue(np.array_equal(arr, init_arr))
+        sel_one = selections.select(shape, slice(2, 3))
+        arr = db.getDatasetValues(dset_id, sel_one)
+        self.assertEqual(arr.shape, (1,))
+        self.assertEqual(arr[0], 'REST')
+
+        db.close()
+
     def testScalarDataset(self):
         dtype = np.int32
 

From 6201152884c6e706e32cca18cb6d36ccc049c1c1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 5 Feb 2026 16:59:23 -0800
Subject: [PATCH 115/129] log warning on link replacement

---
 src/h5json/hdf5db.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index d19f7da5..82d1dd92 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -346,7 +346,7 @@ def getObjectById(self, obj_id, refresh=False):
         """ return object with given id """
         self._checkReader()
         obj_id = getHashTagForId(obj_id)
-        if obj_id not in self.db or (refresh and not self.is_new(obj_id)):
+        if obj_id not in self.db or (refresh and not self.is_new(obj_id) and not self.is_dirty(obj_id)):
             # load the obj from the reader
             self.log.debug(f"getObjectById - fetching {obj_id} from reader")
             obj_json = self.reader.getObjectById(obj_id)
@@ -852,6 +852,8 @@ def getLink(self, grp_id, name):
     def _addLink(self, grp_id, name, link_json):
         obj_json = self.getObjectById(grp_id)
         links = obj_json["links"]
+        if name in links:
+            self.log.warning(f"Link [{name}] already exists in {grp_id}")
         links[name] = link_json
         self.make_dirty(grp_id)
 

From 4aecf515af7b312c9d77811b2fcfb68e592e7819 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 13 Mar 2026 14:54:22 +0100
Subject: [PATCH 116/129] fix filtertest

---
 test/unit/dset_util_test.py | 15 +++++++++++++++
 test/unit/hdf5db_test.py    |  7 +++++++
 2 files changed, 22 insertions(+)

diff --git a/test/unit/dset_util_test.py b/test/unit/dset_util_test.py
index 364d8929..fae31451 100644
--- a/test/unit/dset_util_test.py
+++ b/test/unit/dset_util_test.py
@@ -104,11 +104,13 @@ def testFilterValidation(self):
             self.assertTrue(False)  # should not reach here
         except ValueError:
             pass  # filters are invalid with contiguous layout
+
         cpl["layout"] = chunked_layout
         try:
             validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
+
         # add an invlaid level option for deflate
         deflate_filter["level"] = 20
         try:
@@ -116,11 +118,13 @@ def testFilterValidation(self):
             self.assertTrue(False)  # should not reach here
         except ValueError:
             pass  # invalid deflate level
+
         deflate_filter["level"] = 5
         try:
             validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
+
         # try with just a filter name
         gzip_filter = getFilterItem("gzip")
         cpl["filters"] = [gzip_filter, ]
@@ -128,6 +132,7 @@ def testFilterValidation(self):
             validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
+
         # try with an invalid filter name
         cpl["filters"] = ["invalid_filter_name", ]
         try:
@@ -145,6 +150,16 @@ def testFilterValidation(self):
         except ValueError:
             self.assertTrue(False)  # shouldn't raise exception
 
+        sc_filter = {'class': 'H5Z_FILTER_SCALEOFFSET', 'id': 6, 'name': 'scaleoffset'}
+        sc_filter['scaleOffset'] = 12
+        sc_filter['scaleType'] = 'H5Z_SO_INT'
+        filters = [sc_filter, ]
+        cpl["filters"] = filters
+        try:
+            validateDatasetCreationProps(cpl, type_json, dset_json["shape"])
+        except ValueError:
+            self.assertTrue(False)  # shouldn't raise exception
+
     def testGuessChunk(self):
 
         typesize = "H5T_VARIABLE"
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 9d89893c..1fc13a0f 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -78,6 +78,13 @@ def testGroup(self):
         self.assertTrue(isValidUuid(g2_id, obj_class="groups"))
         db.createHardLink(root_id, "g2", g2_id)
 
+        root_obj = db.getObjectById(root_id)
+        self.assertTrue("links" in root_obj)
+        root_links = root_obj["links"]
+        self.assertTrue("g1" in root_links)
+        self.assertTrue("g2" in root_links)
+        self.assertEqual(len(root_links), 2)
+
         g1_1_id = db.createGroup()
         self.assertTrue(isSchema2Id(g1_1_id))
         self.assertFalse(isRootObjId(g1_1_id))

From 413827603a5df0a2098f469d44bf7b486f944210 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 16 Mar 2026 16:07:13 +0100
Subject: [PATCH 117/129] vlen array fix

---
 src/h5json/array_util.py     |  9 +++++++--
 test/unit/array_util_test.py | 10 ++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 44f245ad..56fc15be 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -447,9 +447,14 @@ def readElement(buffer, offset, arr, index, dt):
                 if count > 0:
                     e_buffer = buffer[n:m]
                     offset += count
-                    arr[index] = bytes(e_buffer)
+                    if vlenBaseType is str:
+                        e_buffer = e_buffer.decode("utf-8")
+                    arr[index] = e_buffer
                 else:
-                    arr[index] = b""
+                    if vlenBaseType is str:
+                        arr[index] = ""
+                    else:
+                        arr[index] = b""
             elif count > 0:
                 e_buffer = buffer[n:m]
                 offset += count
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index ba712d61..f6196168 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -590,14 +590,8 @@ def testToBytes(self):
         self.assertEqual(buffer, expected)
         # convert back to array
         arr_copy = bytesToArray(buffer, dt, (5,))
-        print("arr_copy[0]:", arr_copy[0])
-        print("arr_copy[0] type:", type(arr_copy[0]))
-        
-        for i in range(4):
-            self.assertTrue(isinstance(arr_copy[i], bytes))
-            self.assertEqual(arr_copy[i].decode(), arr[i])
-        self.assertTrue(isinstance(arr_copy[4], bytes))
-        self.assertEqual(arr_copy[4], b"")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
         # VLEN of bytes
         dt = special_dtype(vlen=bytes)
         arr = np.zeros((5,), dtype=dt)

From adafa903bf06af88211328660d8f7a06aa2da364 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 17 Mar 2026 16:17:39 +0100
Subject: [PATCH 118/129] check that db.setvalue has same rank as dataset

---
 src/h5json/hdf5db.py     |  5 ++++-
 src/h5json/selections.py | 10 ++++++++++
 test/unit/hdf5db_test.py |  4 ++--
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 82d1dd92..39d2cc5d 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -673,7 +673,7 @@ def init_arr(dtype, cpl):
             # done with NULL and SCALAR cases
             return arr
 
-        # simple daaset
+        # simple dataset
         arr = None
         fetch = True
 
@@ -708,6 +708,7 @@ def init_arr(dtype, cpl):
             # apply the update to the array to be returned
             src_sel = selections.translate(update_sel, x_sel)
             tgt_sel = selections.translate(sel, x_sel)
+
             arr[tgt_sel.slices] = update_val[src_sel.slices]
 
         return arr
@@ -741,6 +742,8 @@ def setDatasetValues(self, dset_id, sel, arr):
             dims = getShapeDims(shape_json)
             if sel.shape != dims:
                 raise ValueError("Selection shape does not match dataset shape")
+            if len(arr.shape) != len(dims):
+                raise TypeError("Expected ndarray with same rank as dataset")
         updates = self._getDatasetUpdates(dset_id)
         if sel.select_type == selections.H5S_SELECT_ALL:
             # for select all, throw out any existing updates since this will overwrite them
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 75b06913..cfa70769 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -252,6 +252,11 @@ def mshape(self):
         """ Shape of selection (always 1-D for this class) """
         return (self.nselect,)
 
+    @property
+    def tgtshape(self):
+        """ shape of selection in rank of dataspace"""
+        return self.mshape
+
     def getSelectNpoints(self):
         npoints = None
         if self._select_type == H5S_SELECT_NONE:
@@ -388,6 +393,11 @@ class SimpleSelection(Selection):
     def mshape(self):
         """ Shape of current selection """
         return self._mshape
+    
+    @property
+    def tgtshape(self):
+        """ shape of selection in rank of dataspace"""
+        return [self.count[dim] for dim in range(len(self._shape))]
 
     @property
     def start(self):
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 1fc13a0f..a1fb27d7 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -448,11 +448,11 @@ def testSimpleDataset(self):
         self.assertEqual(arr.shape, shape)
         self.assertEqual(arr.min(), 0)
         self.assertEqual(arr.max(), 0)
-        row = np.zeros((ncols,), dtype=dtype)
+        row = np.zeros((1, ncols,), dtype=dtype)
 
         # set values row by row
         for i in range(nrows):
-            row[:] = list(range(i * 10, (i + 1) * 10))
+            row[0, :] = list(range(i * 10, (i + 1) * 10))
             row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
             db.setDatasetValues(dset_id, row_sel, row)
 

From a7b16133b032ac3f109bf433c4272665332d4544 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 24 Mar 2026 19:14:21 +0100
Subject: [PATCH 119/129] raise error when attempting to serialize object
 arrays

---
 src/h5json/array_util.py     |  3 +++
 src/h5json/hdf5db.py         |  2 +-
 src/h5json/hdf5dtype.py      |  3 +--
 src/h5json/selections.py     |  2 +-
 test/unit/array_util_test.py |  9 +++++++++
 test/unit/hdf5db_test.py     | 38 ++++++++++++++++++++++++++++++++++++
 6 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
index 56fc15be..575d1968 100644
--- a/src/h5json/array_util.py
+++ b/src/h5json/array_util.py
@@ -514,6 +514,9 @@ def arrayToBytes(arr, encoding=None):
             offset = copyElement(e, arr1d.dtype, buffer, offset)
         data = bytes(buffer)
     else:
+        if arr.dtype.kind == "O":
+            # object array, can't convert to bytes
+            raise TypeError("Object arrays with no vlen  are not supported for arrayToBytes")
         # fixed length type
         data = arr.tobytes()
 
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 39d2cc5d..49837069 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -743,7 +743,7 @@ def setDatasetValues(self, dset_id, sel, arr):
             if sel.shape != dims:
                 raise ValueError("Selection shape does not match dataset shape")
             if len(arr.shape) != len(dims):
-                raise TypeError("Expected ndarray with same rank as dataset")
+                arr = arr.reshape(sel.mshape)  # reshape to match dataset rank
         updates = self._getDatasetUpdates(dset_id)
         if sel.select_type == selections.H5S_SELECT_ALL:
             # for select all, throw out any existing updates since this will overwrite them
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index defd09a2..570d396e 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -215,8 +215,7 @@ def check_dtype(**kwds):
 
     vlen = dtype
         If the dtype represents an HDF5 vlen, returns the Python base class.
-        Currently only builting string vlens (str) are supported.  Returns
-        None if the dtype does not represent an HDF5 vlen.
+        Returns None if the dtype does not represent an HDF5 vlen.
 
     enum = dtype
         If the dtype represents an HDF5 enumerated type, returns the dictionary
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index cfa70769..04e2ddbe 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -393,7 +393,7 @@ class SimpleSelection(Selection):
     def mshape(self):
         """ Shape of current selection """
         return self._mshape
-    
+
     @property
     def tgtshape(self):
         """ shape of selection in rank of dataspace"""
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
index f6196168..c61baf9f 100644
--- a/test/unit/array_util_test.py
+++ b/test/unit/array_util_test.py
@@ -567,6 +567,15 @@ def testToBytes(self):
         arr_copy = bytesToArray(buffer, dt, (4,))
         self.assertTrue(ndarray_compare(arr, arr_copy))
 
+        # VLEN of generic object ndarray
+        arr = np.zeros((4,), dtype=object)
+
+        try:
+            arrayToBytes(arr)
+            self.assertTrue(False)  # expected type error
+        except TypeError:
+            pass  # expected, object arrays not supported for arrayToBytes
+
         # VLEN of strings
         dt = special_dtype(vlen=str)
         arr = np.zeros((5,), dtype=dt)
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index a1fb27d7..208af9df 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -600,6 +600,44 @@ def testVlenStringDataset(self):
 
         db.close()
 
+    def testVlenIntDataset(self):
+        nrows = 4
+        shape = (nrows,)
+        dtype = special_dtype(vlen=np.int32)
+
+        init_arr = np.empty((nrows,), dtype=dtype)
+        for i in range(nrows):
+            init_arr[i] = np.array(list(range(i, 2 * i + 1)), dtype=np.int32)
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+
+        db.setDatasetValues(dset_id, sel_all, init_arr)
+
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.dtype.kind, 'O')
+        self.assertTrue("vlen" in arr.dtype.metadata)
+        self.assertEqual(arr.dtype.metadata["vlen"], np.dtype(np.int32))
+        for i in range(nrows):
+            e = arr[i]
+            self.assertTrue(isinstance(e, np.ndarray))
+            self.assertEqual(e.dtype, np.int32)
+            self.assertTrue(np.array_equal(e, init_arr[i]))
+
+        sel_one = selections.select(shape, slice(2, 3))
+        arr = db.getDatasetValues(dset_id, sel_one)
+        self.assertEqual(arr.shape, (1,))
+        self.assertTrue(np.array_equal(arr[0], init_arr[2]))
+
+        db.close()
+
     def testScalarDataset(self):
         dtype = np.int32
 

From fbd0688db9582513df016558ff1d65b42f821e33 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 2 Apr 2026 19:21:08 +0200
Subject: [PATCH 120/129] added additional hdf5db tests

---
 test/unit/hdf5db_test.py | 69 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 208af9df..6e64419e 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -199,6 +199,12 @@ def testScalarAttribute(self):
 
         self.assertEqual(item_type["class"], "H5T_INTEGER")
         self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(value, np.ndarray))
+        self.assertEqual(value.shape, ())
+        self.assertEqual(value.dtype, np.int32)
+        self.assertEqual(value[()], 42)
         db.close()
 
     def testFixedStringAttribute(self):
@@ -218,7 +224,10 @@ def testFixedStringAttribute(self):
         now = int(time.time())
         self.assertTrue(item["created"] > now - 1)
         ret_value = db.getAttributeValue(root_id, "A1")
-        self.assertEqual(ret_value, value.encode("ascii"))
+        self.assertTrue(isinstance(ret_value, np.ndarray))
+        self.assertEqual(ret_value.shape, ())
+        self.assertEqual(ret_value.dtype, np.dtype("S13"))
+        self.assertEqual(ret_value[()], value.encode("ascii"))
         db.close()
 
     def testVlenAsciiAttribute(self):
@@ -240,6 +249,13 @@ def testVlenAsciiAttribute(self):
         self.assertEqual(item_type["length"], "H5T_VARIABLE")
         self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
         self.assertEqual(item["value"], "Hello, world!")
+
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(ret_value, np.ndarray))
+        self.assertEqual(ret_value.shape, ())
+        self.assertEqual(ret_value.dtype, dt)
+        self.assertEqual(ret_value[()], value)
+
         now = int(time.time())
         self.assertTrue(item["created"] > now - 1)
         db.close()
@@ -263,6 +279,13 @@ def testVlenUtf8Attribute(self):
         self.assertEqual(item_type["length"], "H5T_VARIABLE")
         self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
         self.assertEqual(item["value"], "Hello, world!")
+
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(ret_value, np.ndarray))
+        self.assertEqual(ret_value.shape, ())
+        self.assertEqual(ret_value.dtype, dt)
+        self.assertEqual(ret_value[()].encode(), value)
+
         now = int(time.time())
         self.assertTrue(item["created"] > now - 1)
         db.close()
@@ -282,6 +305,50 @@ def testIntAttribute(self):
         item_type = item["type"]
         self.assertEqual(item_type["class"], "H5T_INTEGER")
         self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(ret_value, np.ndarray))
+        self.assertEqual(ret_value.shape, (len(value),))
+        self.assertEqual(ret_value.dtype, np.int16)
+        for i in range(len(value)):
+            self.assertEqual(ret_value[i], value[i])
+
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+
+        db.close()
+
+    def testCompoundAttribute(self):
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dt_compound = np.dtype([("field1", "S8"), ("field2", np.int32)])
+        value = [("hello", 42), ('', 0), ("world", 99),]
+        db.createAttribute(root_id, "A1", value, dtype=dt_compound)
+        item = db.getAttribute(root_id, "A1")
+        item_value = item['value']
+        self.assertEqual(len(item_value), 3)
+        for i in range(3):
+            e = item_value[i]
+            # self.assertTrue(isinstance(e, tuple))  # TBD
+            self.assertEqual(tuple(e), value[i])
+
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(item_shape["dims"], [3,])
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_COMPOUND")
+
+        ret_value = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(ret_value, np.ndarray))
+        self.assertEqual(ret_value.shape, (3,))
+        self.assertEqual(ret_value.dtype, dt_compound)
+        for i in range(3):
+            e = ret_value[i]
+            self.assertEqual((e[0].decode(), e[1]), value[i])
+
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+
         db.close()
 
     def testCreateReferenceAttribute(self):

From 79f78224411be5de9ba668dcac1d8c95fa4368a6 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 7 Apr 2026 18:59:30 +0200
Subject: [PATCH 121/129] add getPathsForObjectId method

---
 src/h5json/hdf5db.py                  | 56 ++++++++++++++++++++++++++-
 src/h5json/jsonstore/h5json_writer.py | 40 +------------------
 test/unit/hdf5db_test.py              |  9 +++++
 3 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 49837069..cf3d6a4c 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -426,6 +426,52 @@ def getObjectByPath(self, path):
         obj_json = self.getObjectById(obj_id)
         return obj_json
 
+    def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix=""):
+        """ Return list of paths for the given object id starting from parent_id if set,
+        otherwise the root_id """
+        # TBD: this function will be rather slow for domains with a large number
+        # of objects (it will search through the complete heirarchy).
+
+        if parent_id is None:
+            parent_id = self.root_id
+        else:
+            parent_id = getHashTagForId(parent_id)
+
+        obj_json = self.getObjectById(parent_id)
+        if obj_json is None:
+            self.log.warning("getPathsForObjectId - parent_id not found")
+            raise KeyError("parent_id: {parent_id} not found")
+
+        paths = []
+        obj_id = getHashTagForId(obj_id)
+        searched_ids = set(obj_id)
+
+        if parent_id == obj_id:
+            paths.append(path_prefix if path_prefix else "/")
+
+        if 'links' in obj_json:
+            links = obj_json['links']
+            for link_name in links:
+                link_tgt = links[link_name]
+                link_class = link_tgt['class']
+                if link_class == 'H5L_TYPE_HARD':
+                    # hard link
+                    tgt_obj_id = link_tgt['id']
+                    if tgt_obj_id in searched_ids:
+                        self.log.warning(f"circular reference using path: {path_prefix}/{link_name}")
+                        continue
+                    searched_ids.add(tgt_obj_id)
+                    kwargs = {"parent_id": tgt_obj_id, "path_prefix": path_prefix + "/" + link_name}
+                    paths.extend(self.getPathsForObjectId(obj_id, **kwargs))
+                elif link_class == 'H5L_TYPE_SOFT':
+                    self.log.warning("getPathsForObjectId can't follow soft links")
+                elif link_class == 'H5L_TYPE_EXTERNAL':
+                    self.log.warning("getPathsForObjectId can't follow external links")
+                else:
+                    self.log.error(f"link type: {link_class} not supported")
+
+        return paths
+
     def getDtype(self, obj_json):
         """ Return numpy data type for given dataset, datatype, or attribute
         """
@@ -535,7 +581,15 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
             else:
                 dtype = np.dtype(dtype)
         else:
-            value = np.asarray(value, dtype=dtype, order='C')
+            try:
+                value = np.asarray(value, dtype=dtype, order='C')
+            except ValueError:
+                # some special cases for compound and vlen types are handled
+                # by jsonToArray...
+                if shape is None or dtype is None:
+                    raise
+                print(f"calling jsonToArray for shape: {shape} dtype: {dtype} value: {value}")
+                value = jsonToArray(shape, dtype, value)
             if dtype is None:
                 dtype = value.dtype
             else:
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
index f97df007..756ef578 100644
--- a/src/h5json/jsonstore/h5json_writer.py
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -39,7 +39,6 @@ def __init__(
         super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
         if append:
             raise ValueError("H5JsonWriter does not support append mode")
-        self.alias_db = {}
         self.json = {}
         self._data_limit = data_limit
         self._root_id = None
@@ -83,43 +82,8 @@ def isClosed(self):
 
     def getAliasList(self, obj_id):
         """ return list of alias """
-        if obj_id not in self.alias_db:
-            self.alias_db[obj_id] = []
-        return self.alias_db[obj_id]
-
-    def updateAliasList(self):
-        """ update the alias list for each object """
-        # clear exiting aliases
-        obj_ids = self.db.getCollection()
-        for obj_id in obj_ids:
-            self.alias_db[obj_id] = []
-
-        self._setAlias(self._root_uuid, set(), "/")
-
-    def _setAlias(self, obj_id, id_set, h5path):
-        """ add the given h5path to the object's alias list
-            If the object is a group, recurse through each hard link """
-        obj_json = self.db.getObjectById(obj_id)
-        alias_list = self.getAliasList(obj_id)
-        if h5path in alias_list:
-            return  # nothing to do
-        alias_list.append(h5path)
-        if getCollectionForId(obj_id) != "groups":
-            return  # done
-        id_set.add(obj_id)  # keep track of objects we've visited to avoid loops
-        links = obj_json["links"]
-        if h5path[-1] != '/':
-            h5path += '/'
 
-        for link_name in links:
-            link_json = links[link_name]
-            if link_json["class"] == "H5L_TYPE_HARD":
-                tgt_id = link_json["id"]
-                if tgt_id in id_set:
-                    self.log.info("_setAlias - circular loop found")
-                else:
-                    self._setAlias(tgt_id, id_set, f"{h5path}{link_name}")
-        id_set.remove(obj_id)
+        return self.db.getPathsForObjectId(obj_id)
 
     def dumpAttribute(self, obj_id, attr_name):
         self.log.info(f"dumpAttribute: [{attr_name}]")
@@ -299,8 +263,6 @@ def dumpFile(self):
         self.json["apiVersion"] = db_version_info["hdf5-json-version"]
         self.json["root"] = getUuidFromId(self._root_uuid)
 
-        self.updateAliasList()  # create alias_db with obj_id to alias list dict
-
         self.dumpGroups()
 
         self.dumpDatasets()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 6e64419e..bccf7a52 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -66,11 +66,18 @@ def testGroup(self):
         db = Hdf5db(app_logger=self.log)
         root_id = db.open()
 
+        paths = db.getPathsForObjectId(root_id)
+        self.assertEqual(paths, ["/"])
+
         g1_id = db.createGroup()
         self.assertTrue(isSchema2Id(g1_id))
         self.assertFalse(isRootObjId(g1_id))
         self.assertTrue(isValidUuid(g1_id, obj_class="groups"))
+        paths = db.getPathsForObjectId(g1_id)
+        self.assertEqual(paths, [])
         db.createHardLink(root_id, "g1", g1_id)
+        paths = db.getPathsForObjectId(g1_id)
+        self.assertEqual(paths, ["/g1"])
 
         g2_id = db.createGroup()
         self.assertTrue(isSchema2Id(g2_id))
@@ -90,6 +97,8 @@ def testGroup(self):
         self.assertFalse(isRootObjId(g1_1_id))
         self.assertTrue(isValidUuid(g1_1_id, obj_class="groups"))
         db.createHardLink(g1_id, "g1.1", g1_1_id)
+        paths = db.getPathsForObjectId(g1_1_id)
+        self.assertEqual(paths, ["/g1/g1.1"])
 
         self.assertEqual(db.getObjectIdByPath("g1"), g1_id)
         self.assertEqual(db.getObjectIdByPath("/g1"), g1_id)

From f5c27f4240ed923a0b63649999348c86b9043f92 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 7 Apr 2026 19:42:18 +0200
Subject: [PATCH 122/129] fix for circular links

---
 src/h5json/hdf5db.py     | 23 +++++++++++------------
 test/unit/hdf5db_test.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index cf3d6a4c..2a512804 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -375,7 +375,6 @@ def getObjectIdByPath(self, h5path, parent_id=None):
             raise KeyError("parent_id: {parent_id} not found")
 
         obj_id = parent_id
-        searched_ids = set(obj_id)
 
         link_names = h5path.split('/')
         self.log.debug(f"link_names: {link_names}")
@@ -403,11 +402,7 @@ def getObjectIdByPath(self, h5path, parent_id=None):
             if link_class == 'H5L_TYPE_HARD':
                 # hard link
                 obj_id = link_tgt['id']
-                if obj_id in searched_ids:
-                    self.log.warning(f"circular reference using path: {h5path}")
-                    raise KeyError(h5path)
                 obj_json = self.getObjectById(obj_id)
-                searched_ids.add(obj_id)
             elif link_class == 'H5L_TYPE_SOFT':
                 self.log.warning("getObjectIdByPath can't follow soft links")
             elif link_class == 'H5L_TYPE_EXTERNAL':
@@ -426,7 +421,7 @@ def getObjectByPath(self, path):
         obj_json = self.getObjectById(obj_id)
         return obj_json
 
-    def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix=""):
+    def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix="", _visited=None):
         """ Return list of paths for the given object id starting from parent_id if set,
         otherwise the root_id """
         # TBD: this function will be rather slow for domains with a large number
@@ -437,6 +432,14 @@ def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix=""):
         else:
             parent_id = getHashTagForId(parent_id)
 
+        if _visited is None:
+            _visited = set()
+
+        if parent_id in _visited:
+            self.log.warning(f"circular reference detected at path: {path_prefix}")
+            return []
+        _visited.add(parent_id)
+
         obj_json = self.getObjectById(parent_id)
         if obj_json is None:
             self.log.warning("getPathsForObjectId - parent_id not found")
@@ -444,7 +447,6 @@ def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix=""):
 
         paths = []
         obj_id = getHashTagForId(obj_id)
-        searched_ids = set(obj_id)
 
         if parent_id == obj_id:
             paths.append(path_prefix if path_prefix else "/")
@@ -457,11 +459,8 @@ def getPathsForObjectId(self, obj_id, parent_id=None, path_prefix=""):
                 if link_class == 'H5L_TYPE_HARD':
                     # hard link
                     tgt_obj_id = link_tgt['id']
-                    if tgt_obj_id in searched_ids:
-                        self.log.warning(f"circular reference using path: {path_prefix}/{link_name}")
-                        continue
-                    searched_ids.add(tgt_obj_id)
-                    kwargs = {"parent_id": tgt_obj_id, "path_prefix": path_prefix + "/" + link_name}
+                    kwargs = {"parent_id": tgt_obj_id, "_visited": _visited}
+                    kwargs["path_prefix"] = path_prefix + "/" + link_name
                     paths.extend(self.getPathsForObjectId(obj_id, **kwargs))
                 elif link_class == 'H5L_TYPE_SOFT':
                     self.log.warning("getPathsForObjectId can't follow soft links")
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index bccf7a52..7220e5f6 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -168,7 +168,41 @@ def testGroup(self):
         db.deleteAttribute(g1_id, "a1")
         self.assertEqual(len(db.getAttributes(g1_id)), 1)
         self.assertEqual(db.getAttribute(g1_id, "a1"), None)
+        db.close()
 
+    def testCircularLinks(self):
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        g2_id = db.createGroup()
+        db.createHardLink(g1_id, "g2", g2_id)
+        # create circular link
+        db.createHardLink(g2_id, "g1", g1_id)
+
+        g1_json = db.getObjectById(g1_id)
+        self.assertTrue("links" in g1_json)
+        g1_links = g1_json["links"]
+        self.assertTrue("g2" in g1_links)
+        self.assertEqual(len(g1_links), 1)
+
+        g2_json = db.getObjectById(g2_id)
+        self.assertTrue("links" in g2_json)
+        g2_links = g2_json["links"]
+        self.assertTrue("g1" in g2_links)
+        self.assertEqual(len(g2_links), 1)
+
+        paths = db.getPathsForObjectId(g2_id)
+        # only the canonical path is returned
+        self.assertEqual(paths, ["/g1/g2"])
+        grp_id = db.getObjectIdByPath("/g1/g2")
+        self.assertEqual(grp_id, g2_id)
+        # you can still get objects via circular paths...
+        grp_id = db.getObjectIdByPath("/g1/g2/g1")
+        self.assertEqual(grp_id, g1_id)
+        grp_id = db.getObjectIdByPath("/g1/g2/g1/g2")
+        self.assertEqual(grp_id, g2_id)
+        
         db.close()
 
     def testNullSpaceAttribute(self):

From 59fea055672defb3739ecd5fc46bf56aa80e48bf Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 7 Apr 2026 19:44:37 +0200
Subject: [PATCH 123/129] fix flake8 error

---
 test/unit/hdf5db_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 7220e5f6..446b1dda 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -202,7 +202,7 @@ def testCircularLinks(self):
         self.assertEqual(grp_id, g1_id)
         grp_id = db.getObjectIdByPath("/g1/g2/g1/g2")
         self.assertEqual(grp_id, g2_id)
-        
+
         db.close()
 
     def testNullSpaceAttribute(self):

From 70a1d8d0784f598ddc5499430581f3c4ab2e5074 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 7 Apr 2026 20:50:53 +0200
Subject: [PATCH 124/129] remove debug print

---
 src/h5json/hdf5db.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 2a512804..3723ff07 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -587,7 +587,6 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
                 # by jsonToArray...
                 if shape is None or dtype is None:
                     raise
-                print(f"calling jsonToArray for shape: {shape} dtype: {dtype} value: {value}")
                 value = jsonToArray(shape, dtype, value)
             if dtype is None:
                 dtype = value.dtype

From c25dc3a51bc5daf5cafb70a1f381b9fdec88a728 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 13 Apr 2026 19:38:50 +0200
Subject: [PATCH 125/129] allow <collection_type>/<obj_id> for Reference
 constructor

---
 src/h5json/hdf5dtype.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
index 570d396e..7ee65462 100644
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -43,6 +43,11 @@ def __init__(self, bind):
             if not isinstance(bind, str):
                 raise TypeError("Expected string id")
 
+            if bind.find('/') != -1:
+                parts = bind.split('/')
+                if parts[0] not in ("groups", "datasets", "datatypes"):
+                    raise TypeError("Expected id to start with 'groups/', 'datasets/' or 'datatypes/'")
+                bind = parts[1]
             self._id = getHashTagForId(bind)
 
     def __repr__(self):

From dfc0224c051d9698a11afdb5f423397c68cc4978 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 12 May 2026 18:00:00 +0200
Subject: [PATCH 126/129] updates for point selection

---
 src/h5json/h5pystore/h5py_reader.py   |   9 +-
 src/h5json/hdf5db.py                  | 100 +++++++----
 src/h5json/jsonstore/h5json_reader.py |   2 +-
 src/h5json/selections.py              | 235 ++++++++++++++++++--------
 test/unit/h5py_reader_test.py         |   8 +
 test/unit/h5py_writer_test.py         |   5 +-
 test/unit/hdf5db_test.py              |  23 ++-
 7 files changed, 273 insertions(+), 109 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
index e0d5d825..23684ab3 100644
--- a/src/h5json/h5pystore/h5py_reader.py
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -548,10 +548,17 @@ def getDatasetValues(self, dset_id, sel=None, dtype=None):
         if isOpaqueDtype(dset.dtype):
             # TBD: Opaque data not supported yet
             return None
-        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+        if sel is None or sel.select_type == selections.H5S_SEL_ALL:
             arr = dset[...]
         elif isinstance(sel, selections.SimpleSelection):
             arr = dset[sel.slices]
+        elif isinstance(sel, selections.PointSelection):
+            # h5py has no native point-selection API, so read each point individually.
+            # sel.points rows are numpy arrays; wrap each in a tuple so h5py
+            # interprets it as a multi-dimensional index rather than fancy indexing.
+            arr = np.zeros((sel.nselect,), dtype=dset.dtype)
+            for i, pt in enumerate(selections._iter_points(sel)):
+                arr[i] = dset[pt]
         else:
             raise NotImplementedError("selection type not supported")
 
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 3723ff07..e65b877d 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -672,7 +672,10 @@ def getDatasetValues(self, dset_id, sel):
         def init_arr(dtype, cpl):
             """ create an ndarray with the give shape, dtype and fill_value
                 (if the latter is found in the creation properties list) """
-            arr_shape = sel.count if isinstance(sel.count, tuple) else (sel.count, )
+            if hasattr(sel, "count"):
+                arr_shape = sel.count if isinstance(sel.count, tuple) else (sel.count, )
+            else:
+                arr_shape = (sel.nselect,)
             arr = np.zeros(arr_shape, dtype=dtype)
             if "fillValue" in cpl:
                 fillValue = cpl["fillValue"]
@@ -707,7 +710,7 @@ def init_arr(dtype, cpl):
             raise ValueError("Selection shape does not match dataset shape")
 
         if shape_class == "H5S_SCALAR":
-            if sel.select_type != selections.H5S_SELECT_ALL:
+            if sel.select_type != selections.H5S_SEL_ALL:
                 # TBD: support other selection types
                 raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
             if sel.shape != ():
@@ -751,6 +754,7 @@ def init_arr(dtype, cpl):
             arr = init_arr(dtype, cpl)
 
         # apply any updates that impact this selection
+
         for (update_sel, update_val) in updates:
             # get the part of the update that is in common with the requested selection
             x_sel = selections.intersect(sel, update_sel)
@@ -758,10 +762,20 @@ def init_arr(dtype, cpl):
                 # this update doesn't effect the selection, so ignore
                 continue
             # apply the update to the array to be returned
-            src_sel = selections.translate(update_sel, x_sel)
-            tgt_sel = selections.translate(sel, x_sel)
-
-            arr[tgt_sel.slices] = update_val[src_sel.slices]
+            if sel.select_type == selections.H5S_SEL_POINTS:
+                # For point selections apply each intersecting point individually.
+                # arr is 1-D with one entry per selected point; map each intersection
+                # point back to its position in sel and its offset in update_val.
+                rank = len(sel.shape)
+                sel_pts = list(selections._iter_points(sel))
+                for pt in selections._iter_points(x_sel):
+                    tgt_idx = sel_pts.index(pt)
+                    src_coords = tuple(pt[d] - update_sel.start[d] for d in range(rank))
+                    arr[tgt_idx] = update_val[src_coords]
+            else:
+                src_sel = selections.translate(update_sel, x_sel)
+                tgt_sel = selections.translate(sel, x_sel)
+                arr[tgt_sel.slices] = update_val[src_sel.slices]
 
         return arr
 
@@ -769,47 +783,63 @@ def setDatasetValues(self, dset_id, sel, arr):
         """
         Write the given ndarray to the dataset using the selection
         """
-        dset_json = self.getObjectById(dset_id)
-        shape_json = dset_json["shape"]
+
         if not isinstance(sel, selections.Selection):
             raise TypeError("Expected Selection class")
-        if sel.select_type not in (selections.H5S_SELECT_HYPERSLABS, selections.H5S_SELECT_ALL):
-            # TBD: support other selection types
-            raise ValueError("Only hyperslab selections are currently supported")
-        if not isinstance(arr, np.ndarray):
-            raise TypeError("Expected ndarray for data value")
-        tgt_dt = self.getDtype(dset_json)
-        src_dt = arr.dtype
-        if src_dt != tgt_dt:
-            raise TypeError("arr.dtype doesn't match dataset dtype")
+
+        dset_json = self.getObjectById(dset_id)
+        shape_json = dset_json["shape"]
+
         shape_class = getShapeClass(shape_json)
         if shape_class == "H5S_NULL":
             raise ValueError("writing to null space dataset not supported")
+
+        updates = self._getDatasetUpdates(dset_id)
+
         if shape_class == "H5S_SCALAR":
+            if sel.select_type != selections.H5S_SEL_ALL:
+                # TBD: support other selection types
+                raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
             if sel.shape != ():
                 raise ValueError("Selection shape does not match dataset shape")
-            if len(arr.shape) > 0:
-                raise TypeError("Expected scalar ndarray for scalar dataset")
-        else:
+
+            if arr.shape != ():
+                raise ValueError("Expected scalar array for scalar dataset")
+
+        if not isinstance(arr, np.ndarray):
+            raise TypeError("Expected ndarray for data value")
+
+        tgt_dt = self.getDtype(dset_json)
+        src_dt = arr.dtype
+        if src_dt != tgt_dt:
+            raise TypeError("arr.dtype doesn't match dataset dtype")
+
+        if sel.select_type == selections.H5S_SEL_POINTS:
+            if sel.nselect != arr.shape[0]:
+                raise TypeError("Selection shape does not match number of points")
+        elif sel.select_type == selections.H5S_SEL_ALL:
+            if sel.shape != getShapeDims(shape_json):
+                raise TypeError("Selection shape does not match dataset shape")
+        elif sel.select_type == selections.H5S_SEL_HYPERSLABS:
             dims = getShapeDims(shape_json)
             if sel.shape != dims:
-                raise ValueError("Selection shape does not match dataset shape")
+                raise TypeError("Selection shape does not match dataset shape")
             if len(arr.shape) != len(dims):
-                arr = arr.reshape(sel.mshape)  # reshape to match dataset rank
-        updates = self._getDatasetUpdates(dset_id)
-        if sel.select_type == selections.H5S_SELECT_ALL:
+                raise TypeError("Array shape does not match dataset shape")
+            try:
+                sel.broadcast(arr.shape)
+            except TypeError:
+                # selection can't be broadcast to array shape
+                raise
+        else:
+            raise TypeError("Unsupported selection type")
+
+        if sel.select_type == selections.H5S_SEL_ALL or sel.shape == sel.mshape:
             # for select all, throw out any existing updates since this will overwrite them
             updates.clear()
-        arr = arr.copy()  # make a copy in case the client updates it later
-        rank = len(sel.shape)
-        if len(arr.shape) < rank:
-            # reshape to keep compatiblity with dataset rank
-            if sel.select_type == selections.H5S_SELECT_ALL:
-                # this should not result in a dimension reduction
-                raise ValueError("unexpected selection shape")
-            if sel.select_type != selections.H5S_SELECT_HYPERSLABS:
-                raise ValueError("tbd")
-            arr = arr.reshape(sel.mshape)
+
+        # make a copy in case the client updates it later
+        arr = arr.copy()
         updates.append((sel, arr))
         self.make_dirty(dset_id)
 
@@ -833,7 +863,7 @@ def resizeDataset(self, dset_id, shape):
         updates = self._getDatasetUpdates(dset_id)
         for i in range(len(updates)):
             (sel_update, arr) = updates[i]
-            if sel_update.select_type == selections.H5S_SELECT_HYPERSLABS:
+            if sel_update.select_type == selections.H5S_SEL_HYPERSLABS:
                 slices = list(sel_update.slices)
                 for dim in range(rank):
                     s = slices[dim]
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
index b64a3d1d..2196eb10 100644
--- a/src/h5json/jsonstore/h5json_reader.py
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -208,7 +208,7 @@ def getDatasetValues(self, obj_id, sel=None, dtype=None):
             dims = shape_json["dims"]
 
         arr = jsonToArray(dims, dtype, json_value)
-        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+        if sel is None or sel.select_type == selections.H5S_SEL_ALL:
             pass  # just return the entire array
         elif isinstance(sel, selections.SimpleSelection):
             arr = arr[sel.slices]
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 04e2ddbe..1ce10c29 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -21,16 +21,22 @@
 
 import numpy as np
 
-H5S_SEL_POINTS = 0
+
+# Selection types
+H5S_SEL_NONE = 0
+H5S_SEL_POINTS = 1
+H5S_SEL_HYPERSLABS = 2
+H5S_SEL_ALL = 3
+H5S_SEL_FANCY = 4
+
+
+# Boolean selection operations
 H5S_SELECT_SET = 1
 H5S_SELECT_APPEND = 2
 H5S_SELECT_PREPEND = 3
 H5S_SELECT_OR = 4
 H5S_SELECT_NONE = 5
-H5S_SELECT_ALL = 6
-H5S_SELECT_HYPERSLABS = 7
-H5S_SELECT_NOTB = 8
-H5S_SELLECT_FANCY = 9
+H5S_SELECT_NOTB = 6
 
 
 def select(obj, args):
@@ -73,14 +79,18 @@ def select(obj, args):
     if len(args) == 1:
 
         arg = args[0]
+        if hasattr(arg, "shape"):
+            obj_shape = obj.shape
+        else:
+            obj_shape = obj
 
         if isinstance(arg, Selection):
-            if arg.shape != obj.shape:
+            if arg.shape != obj_shape:
                 raise TypeError("Mismatched selection shape")
             return arg
 
         elif isinstance(arg, np.ndarray) or isinstance(arg, list):
-            sel = PointSelection(obj.shape)
+            sel = PointSelection(obj_shape)
             sel[arg]
             return sel
         """
@@ -119,37 +129,119 @@ def select(obj, args):
 def _check_bool_args(s1, s2):
     """ verify argument for boolean operations """
     # TBD: this is currently only working for simple selections with stride 1
-    valid_select_types = (H5S_SELECT_HYPERSLABS, H5S_SELECT_ALL)
+    valid_s1_types = (H5S_SEL_HYPERSLABS, H5S_SEL_ALL)
+    valid_s2_types = (H5S_SEL_HYPERSLABS, H5S_SEL_POINTS, H5S_SEL_ALL)
+
     if not isinstance(s1, Selection):
         raise TypeError("Expected selection type for first arg")
     if not isinstance(s2, Selection):
         raise TypeError("Expected selection type for second arg")
-    if s1.select_type not in valid_select_types:
+    if s1.select_type not in valid_s1_types:
         raise TypeError("Expected hyperslab selection for first arg")
-    if s2.select_type not in valid_select_types:
+    if s2.select_type not in valid_s2_types:
         raise TypeError("Expected hyperslab selection for second arg")
     if s1.shape != s2.shape:
         raise ValueError("selections have incompatible shapes")
 
 
-def intersect(s1, s2):
-    """ Return the intersection of two selections """
-    # TBD: this is currently only working for simple selections with stride 1
-    _check_bool_args(s1, s2)
+def _iter_points(point_sel):
+    """Yield each point in a PointSelection as a tuple of ints."""
+    pts = point_sel.points
+    rank = len(point_sel.shape)
+    pts_arr = np.asarray(pts)
+
+    if pts_arr.size == 0:
+        return
+
+    if pts_arr.ndim == 1:
+        if rank == 1:
+            # Each scalar element is a coordinate in 1-D space
+            for p in pts_arr:
+                yield (int(p),)
+        else:
+            # Single point in rank-N space stored as a flat array [c0, c1, ..., c_{N-1}]
+            yield tuple(int(x) for x in pts_arr)
+    else:
+        # Shape (N, rank): each row is one point
+        for row in pts_arr:
+            yield tuple(int(x) for x in row)
+
+
+def _filter_points_by_hyperslab(point_sel, hyper_sel):
+    """Return a PointSelection of points from point_sel that lie within hyper_sel."""
+    start = hyper_sel.start
+    count = hyper_sel.count
+    step = hyper_sel.step
+    rank = len(point_sel.shape)
+
+    result_pts = []
+    for pt in _iter_points(point_sel):
+        if all(
+            start[d] <= pt[d] < start[d] + count[d] * step[d] and (pt[d] - start[d]) % step[d] == 0
+            for d in range(rank)
+        ):
+            result_pts.append(pt)
+
+    result = PointSelection(point_sel.shape)
+    if rank == 1:
+        result.set([p[0] for p in result_pts] if result_pts else [])
+    else:
+        result.set(result_pts if result_pts else [])
+    return result
+
+
+def _intersect_points_points(s1, s2):
+    """Return a PointSelection of points common to both s1 and s2."""
+    common = sorted(set(_iter_points(s1)) & set(_iter_points(s2)))
 
-    slices = []
     rank = len(s1.shape)
-    for dim in range(rank):
-        start = max(s1.start[dim], s2.start[dim])
-        stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim])
-        if s1.step[dim] > 1 or s2.step[dim] > 1:
-            raise ValueError("stepped slices not currently supported")
-        if start > stop:
-            stop = start
-        slices.append(slice(start, stop, 1))
-    slices = tuple(slices)
+    result = PointSelection(s1.shape)
+    if rank == 1:
+        result.set([p[0] for p in common] if common else [])
+    else:
+        result.set(common if common else [])
+    return result
 
-    return select(s1.shape, slices)
+
+def intersect(s1, s2):
+    """ Return the intersection of two selections.
+
+    Supports hyperslab/hyperslab, hyperslab/point, and point/point combinations.
+    """
+    if not isinstance(s1, Selection):
+        raise TypeError("Expected selection type for first arg")
+    if not isinstance(s2, Selection):
+        raise TypeError("Expected selection type for second arg")
+    if s1.shape != s2.shape:
+        raise ValueError("selections have incompatible shapes")
+
+    t1 = s1.select_type
+    t2 = s2.select_type
+    hyperslab_types = (H5S_SEL_HYPERSLABS, H5S_SEL_ALL)
+
+    if t1 in hyperslab_types and t2 in hyperslab_types:
+        slices = []
+        rank = len(s1.shape)
+        for dim in range(rank):
+            start = max(s1.start[dim], s2.start[dim])
+            stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim])
+            if s1.step[dim] > 1 or s2.step[dim] > 1:
+                raise ValueError("stepped slices not currently supported")
+            if start > stop:
+                stop = start
+            slices.append(slice(start, stop, 1))
+        return select(s1.shape, tuple(slices))
+
+    if t1 == H5S_SEL_POINTS and t2 in hyperslab_types:
+        return _filter_points_by_hyperslab(s1, s2)
+
+    if t1 in hyperslab_types and t2 == H5S_SEL_POINTS:
+        return _filter_points_by_hyperslab(s2, s1)
+
+    if t1 == H5S_SEL_POINTS and t2 == H5S_SEL_POINTS:
+        return _intersect_points_points(s1, s2)
+
+    raise TypeError(f"Unsupported selection types for intersection: {t1}, {t2}")
 
 
 def contained(s1, s2):
@@ -177,7 +269,7 @@ def contained(s1, s2):
 
 def translate(s1, s2):
     """ Given two selections, s1 and s2, return a new selection
-    definied by s2 relative to s1's stat and count.
+    definied by s2 relative to s1's start and count.
     s2 must be contained in s1 """
 
     _check_bool_args(s1, s2)
@@ -186,14 +278,25 @@ def translate(s1, s2):
         raise ValueError("translate - selections not overlapping")
 
     rank = len(s1.shape)
-
-    slices = []
-    for dim in range(rank):
-        start = s2.start[dim] - s1.start[dim]
-        count = s2.count[dim]
-        slices.append(slice(start, start + count, 1))
-    slices = tuple(slices)
-    return select(s1.shape, slices)
+    args = []
+    if s2.select_type == H5S_SEL_POINTS:
+        points = []
+        for pt in _iter_points(sel_inter):
+            for d in range(rank):
+                if pt[d] < s1.start[d] or pt[d] >= s1.start[d] + s1.count[d]:
+                    continue
+            points.append(tuple(pt[d] - s1.start[d] for d in range(rank)))
+        if len(points) == 0:
+            raise ValueError("translate - selections not overlapping")
+        args.append(points)
+    elif s2.select_type == H5S_SEL_HYPERSLABS:
+        for dim in range(rank):
+            start = s2.start[dim] - s1.start[dim]
+            count = s2.count[dim]
+            args.append(slice(start, start + count, 1))
+    else:
+        raise TypeError("translate - unsupported selection type for s2")
+    return select(s1.shape, tuple(args))
 
 
 class Selection(object):
@@ -229,7 +332,7 @@ def __init__(self, shape, *args, **kwds):
         shape = tuple(shape)
         self._shape = shape
 
-        self._select_type = H5S_SELECT_ALL
+        self._select_type = H5S_SEL_ALL
 
     @property
     def select_type(self):
@@ -259,9 +362,9 @@ def tgtshape(self):
 
     def getSelectNpoints(self):
         npoints = None
-        if self._select_type == H5S_SELECT_NONE:
+        if self._select_type == H5S_SEL_NONE:
             npoints = 0
-        elif self._select_type == H5S_SELECT_ALL:
+        elif self._select_type == H5S_SEL_ALL:
             dims = self._shape
             npoints = 1
             for nextent in dims:
@@ -294,6 +397,7 @@ def __init__(self, shape, *args, **kwds):
         """ Create a Point selection.   """
         Selection.__init__(self, shape, *args, **kwds)
         self._points = []
+        self._select_type = H5S_SEL_POINTS
 
     @property
     def points(self):
@@ -302,9 +406,9 @@ def points(self):
 
     def getSelectNpoints(self):
         npoints = None
-        if self._select_type == H5S_SELECT_NONE:
+        if self._select_type == H5S_SEL_NONE:
             npoints = 0
-        elif self._select_type == H5S_SELECT_ALL:
+        elif self._select_type == H5S_SEL_ALL:
             dims = self._shape
             npoints = 1
             for nextent in dims:
@@ -343,8 +447,6 @@ def _perform_selection(self, points, op):
         else:
             raise ValueError("Unsupported operation")
 
-    # def _perform_list_selection(points, H5S_SELECT_SET):
-
     def __getitem__(self, arg):
         """ Perform point-wise selection from a NumPy boolean array """
         if isinstance(arg, list):
@@ -416,7 +518,7 @@ def __init__(self, shape, *args, **kwds):
         rank = len(self._shape)
         self._sel = ((0,) * rank, self._shape, (1,) * rank, (False,) * rank)
         self._mshape = self._shape
-        self._select_type = H5S_SELECT_ALL
+        self._select_type = H5S_SEL_ALL
 
     def __getitem__(self, args):
 
@@ -426,13 +528,13 @@ def __getitem__(self, args):
         if self._shape == ():
             if len(args) > 0 and args[0] not in (Ellipsis, ()):
                 raise TypeError("Invalid index for scalar dataset (only ..., () allowed)")
-            self._select_type = H5S_SELECT_ALL
+            self._select_type = H5S_SEL_ALL
             return self
 
         start, count, step, scalar = _handle_simple(self._shape, args)
         self._sel = (start, count, step, scalar)
 
-        self._select_type = H5S_SELECT_HYPERSLABS
+        self._select_type = H5S_SEL_HYPERSLABS
 
         self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
 
@@ -442,14 +544,14 @@ def getSelectNpoints(self):
         """Return number of elements in current selection
         """
         npoints = None
-        if self._select_type == H5S_SELECT_NONE:
+        if self._select_type == H5S_SEL_NONE:
             npoints = 0
-        elif self._select_type == H5S_SELECT_ALL:
+        elif self._select_type == H5S_SEL_ALL:
             dims = self._shape
             npoints = 1
             for nextent in dims:
                 npoints *= nextent
-        elif self._select_type == H5S_SELECT_HYPERSLABS:
+        elif self._select_type == H5S_SEL_HYPERSLABS:
             dims = self._shape
             npoints = 1
             rank = len(dims)
@@ -490,8 +592,7 @@ def broadcast(self, target_shape):
         if self._shape == ():
             if np.product(target_shape) != 1:
                 raise TypeError(f"Can't broadcast {target_shape} to scalar")
-            self._id.select_all()
-            yield self._id
+            yield self._sel
             return
 
         start, count, step, scalar = self._sel
@@ -513,17 +614,18 @@ def broadcast(self, target_shape):
         tshape = tuple(tshape)
 
         chunks = tuple(x // y for x, y in zip(count, tshape))
-        nchunks = int(np.product(chunks))
+        nchunks = int(np.prod(chunks))
 
         if nchunks == 1:
-            yield self._id
+            yield self._sel
         else:
-            sid = self._id.copy()
-            sid.select_hyperslab((0,) * rank, tshape, step)
             for idx in range(nchunks):
-                offset = tuple(x * y * z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
-                sid.offset_simple(offset)
-                yield sid
+                offset = []
+                for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start):
+                    offset.append(int(x * y * z + s))
+                offset = tuple(offset)
+                sel = [tuple([sum(x) for x in zip(offset, start)]), tshape, step, scalar]
+                yield sel
 
     @property
     def slices(self):
@@ -567,6 +669,7 @@ def mshape(self):
     def __init__(self, shape, *args, **kwds):
         Selection.__init__(self, shape, *args, **kwds)
         self._slices = []
+        self._select_type = H5S_SEL_FANCY
 
     def __getitem__(self, args):
 
@@ -574,7 +677,7 @@ def __getitem__(self, args):
             args = (args,)
 
         args = _expand_ellipsis(args, len(self._shape))
-        select_type = H5S_SELECT_HYPERSLABS  # will adjust if we have a coord
+        select_type = H5S_SEL_HYPERSLABS  # will adjust if we have a coord
 
         # Create list of slices and/or coordinates
         slices = []
@@ -611,7 +714,7 @@ def __getitem__(self, args):
                     if sorted(arg) != list(arg):
                         raise TypeError("Indexing elements must be in increasing order")
                 mshape.append(len(arg))
-                select_type = H5S_SELLECT_FANCY
+                select_type = H5S_SEL_FANCY
             elif isinstance(arg, list) or hasattr(arg, 'dtype'):
                 # coordinate selection
                 slices.append(arg)
@@ -627,7 +730,7 @@ def __getitem__(self, args):
                     # this shouldn't happen since HSDS would have thrown an error
                     raise ValueError("coordinate num element missmatch")
                 mshape.append(len(arg))
-                select_type = H5S_SELLECT_FANCY
+                select_type = H5S_SEL_FANCY
             elif isinstance(arg, int):
                 if arg < 0 or arg >= length:
                     raise IndexError(f"Index ({arg}) out of range (0-{length - 1})")
@@ -804,9 +907,9 @@ def guess_shape(sid):
 
     elif sel_class == 'H5S_SCALAR':
         # NumPy has no way of expressing empty 0-rank selections, so we use None
-        if sel_type == H5S_SELECT_NONE:
+        if sel_type == H5S_SEL_NONE:
             return None
-        if sel_type == H5S_SELECT_ALL:
+        if sel_type == H5S_SEL_ALL:
             return tuple()
 
     elif sel_class != 'H5S_SIMPLE':
@@ -817,10 +920,10 @@ def guess_shape(sid):
     N = sid.get_select_npoints()
     rank = len(sid.shape)
 
-    if sel_type == H5S_SELECT_NONE:
+    if sel_type == H5S_SEL_NONE:
         return (0,) * rank
 
-    elif sel_type == H5S_SELECT_ALL:
+    elif sel_type == H5S_SEL_ALL:
         return sid.shape
 
     elif sel_type == H5S_SEL_POINTS:
@@ -828,7 +931,7 @@ def guess_shape(sid):
         # the dataspace rank
         return (N,)
 
-    elif sel_type != H5S_SELECT_HYPERSLABS:
+    elif sel_type != H5S_SEL_HYPERSLABS:
         raise TypeError(f"Unrecognized selection method {sel_type}")
 
     # We have a hyperslab-based selection
@@ -895,9 +998,9 @@ def __init__(self, shape, *args, **kwds):
             arg = args[0]
         if arg == ():
             self._mshape = None
-            self._select_type = H5S_SELECT_ALL
+            self._select_type = H5S_SEL_ALL
         elif arg == (Ellipsis,):
             self._mshape = ()
-            self._select_type = H5S_SELECT_ALL
+            self._select_type = H5S_SEL_ALL
         else:
             raise ValueError("Illegal slicing argument for scalar dataspace")
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
index 74108313..baebcf23 100644
--- a/test/unit/h5py_reader_test.py
+++ b/test/unit/h5py_reader_test.py
@@ -92,6 +92,14 @@ def testSimple(self):
         self.assertEqual(arr.shape, (1, 10))
         self.assertEqual(list(arr[0]), list(range(0, 40, 4)))
 
+        # do a point selection; dset1.1.1[i,j] = i*j, so diagonals are i*i
+        sel = selections.select(dims, [(0, 0), (1, 1), (2, 2), (3, 3)])
+        arr = db.getDatasetValues(dset111_id, sel)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(arr.shape, (4,))
+        for i in range(4):
+            self.assertEqual(arr[i], i * i)
+
         # try adding an attribute
         db.createAttribute(dset111_id, "attr3", value=42)
         dset_json = db.getObjectById(dset111_id)
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 36c1dbd9..4c2513b6 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -201,8 +201,8 @@ def testSimple(self):
 
         db.open()
         sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
-        arr = np.zeros((), dtype=np.int32)
-        arr[()] = 42
+        arr = np.zeros((1, 1), dtype=np.int32)
+        arr[0, 0] = 42
         db.setDatasetValues(dset_111_id, sel, arr)
         db.close()
 
@@ -726,6 +726,7 @@ def testReaderWithUpdate(self):
 
         db.open()
         arr = np.asarray(range(10), dtype=np.int32)
+        arr = arr.reshape(1, 10)
         sel = selections.select((10, 10), (slice(5, 6), slice(0, 10)))
         db.setDatasetValues(dset_id, sel, arr)
         db.close()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 446b1dda..57074866 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -599,12 +599,24 @@ def testSimpleDataset(self):
                 self.assertEqual(val.shape, (1, 1))
                 self.assertEqual(val[0, 0], i * 10 + j)
 
+        # do a point selection
+        sel = selections.select(shape, [(0, 0), (1, 1), (2, 2), (3, 3)])
+        val = db.getDatasetValues(dset_id, sel)
+        self.assertTrue(isinstance(val, np.ndarray))
+        self.assertEqual(val.shape, (4,))
+        for i in range(4):
+            self.assertEqual(val[i], i * 10 + i)
+
+        # point selection write
+        arr = np.zeros((4,), dtype=dtype)
+        db.setDatasetValues(dset_id, sel, arr)
+
         # test select all write
-        sel = selections.select(shape, ...)
+        sel_all = selections.select(shape, ...)
         arr = np.zeros(shape, dtype=dtype)
         arr[...] = 42
-        db.setDatasetValues(dset_id, sel, arr)
-        arr = db.getDatasetValues(dset_id, sel)
+        db.setDatasetValues(dset_id, sel_all, arr)
+        arr = db.getDatasetValues(dset_id, sel_all)
         for i in range(nrows):
             for j in range(ncols):
                 self.assertEqual(arr[i, j], 42)
@@ -612,7 +624,10 @@ def testSimpleDataset(self):
         # try with broadcasting
         arr_one_value = np.zeros((1, 1), dtype=dtype)
         arr_one_value[0, 0] = 7
-        db.setDatasetValues(dset_id, sel, arr_one_value)
+        db.setDatasetValues(dset_id, sel_all, arr_one_value)
+        # check that entire dataset is updated to the single value
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue((arr == 7).all())
 
         db.close()
 

From 47cff958c4b8e4d3a22b7b84dd4e93ec39a01e22 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 13 May 2026 10:42:27 +0200
Subject: [PATCH 127/129] support point write selections in h5pywriter

---
 src/h5json/h5pystore/h5py_writer.py | 30 ++++++++++++-----
 test/unit/h5py_writer_test.py       | 50 +++++++++++++++++++++--------
 2 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
index 0bb7fc9d..6d128239 100644
--- a/src/h5json/h5pystore/h5py_writer.py
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -373,14 +373,28 @@ def updateDatasetValues(self, dset_id, dset):
         updates = self.db._getDatasetUpdates(dset_id)
 
         for (sel, val) in updates:
-            slices = []
-            for dim in range(len(sel.shape)):
-                start = sel.start[dim]
-                stop = start + sel.count[dim]
-                step = sel.step[dim]
-                slices.append(slice(start, stop, step))
-            slices = tuple(slices)
-            dset[slices] = val
+            if sel is None or sel.select_type == selections.H5S_SEL_NONE:
+                pass  # no updates
+            elif sel.select_type == selections.H5S_SEL_ALL:
+                dset[...] = val
+                self.log.debug(f"h5py_writer dset {dset.name} updated with sel_all")
+            elif isinstance(sel, selections.SimpleSelection):
+                slices = []
+                for dim in range(len(sel.shape)):
+                    start = sel.start[dim]
+                    stop = start + sel.count[dim]
+                    step = sel.step[dim]
+                    slices.append(slice(start, stop, step))
+                slices = tuple(slices)
+                dset[slices] = val
+            elif isinstance(sel, selections.PointSelection):
+                for i in range(len(sel.points)):
+                    point = tuple(sel.points[i])
+                    dset[point] = val[i]
+                self.log.debug(f"h5py_writer dset {dset.name} updated with point selection")
+            else:
+                raise TypeError(f"Unexpected selection type: {type(sel)}")
+
             self.log.debug(f"h5py_writer dset {dset.name} updated")
 
     def initializeDatasetValues(self, dset_id, dset):
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
index 4c2513b6..f850f28d 100644
--- a/test/unit/h5py_writer_test.py
+++ b/test/unit/h5py_writer_test.py
@@ -96,12 +96,13 @@ def testSimple(self):
 
         g1_1_id = db.createGroup()
         db.createHardLink(g1_id, "g1.1", g1_1_id)
-        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        shape = (10, 10)
+        dset_111_id = db.createDataset(shape=shape, dtype=np.int32)
 
         # try setting dset values with broadcasting
         arr_one_value = np.zeros((1, 1), dtype=np.int32)
         arr_one_value[0, 0] = 42
-        sel_all = selections.select((10, 10), ...)
+        sel_all = selections.select(shape, ...)
         db.setDatasetValues(dset_111_id, sel_all, arr_one_value)
 
         db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
@@ -124,9 +125,9 @@ def testSimple(self):
             g11 = g1["g1.1"]
             self.assertTrue("dset1.1.1" in g11)
             dset = g11["dset1.1.1"]
-            self.assertEqual(dset.shape, (10, 10))
-            for i in range(10):
-                for j in range(10):
+            self.assertEqual(dset.shape, shape)
+            for i in range(shape[0]):
+                for j in range(shape[1]):
                     self.assertEqual(dset[i, j], 42)
             self.assertTrue("g2" in f)
             g2 = f["g2"]
@@ -135,19 +136,19 @@ def testSimple(self):
 
         # write dataset values element by element
         db.open()
-        arr = np.zeros((10, 10), dtype=np.int32)
-        for i in range(10):
-            for j in range(10):
+        arr = np.zeros(shape, dtype=np.int32)
+        for i in range(shape[0]):
+            for j in range(shape[1]):
                 arr[i, j] = i * j
-        sel_all = selections.select((10, 10), ...)
+        sel_all = selections.select(shape, ...)
         db.setDatasetValues(dset_111_id, sel_all, arr)
         db.close()
 
         # verify changes in h5py
         with h5py.File(filepath) as f:
             dset = f["/g1/g1.1/dset1.1.1"]
-            for i in range(10):
-                for j in range(10):
+            for i in range(shape[0]):
+                for j in range(shape[1]):
                     self.assertEqual(dset[i, j], i * j)
 
         db.open()
@@ -200,7 +201,7 @@ def testSimple(self):
             self.assertFalse("tmp_group" in g2)
 
         db.open()
-        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+        sel = selections.select(shape, (slice(4, 5), slice(4, 5)))
         arr = np.zeros((1, 1), dtype=np.int32)
         arr[0, 0] = 42
         db.setDatasetValues(dset_111_id, sel, arr)
@@ -208,8 +209,8 @@ def testSimple(self):
 
         with h5py.File(filepath) as f:
             dset = f["/g1/g1.1/dset1.1.1"]
-            for i in range(10):
-                for j in range(10):
+            for i in range(shape[0]):
+                for j in range(shape[1]):
                     if i == 4 and j == 4:
                         # this is the one element that was updated
                         expected = 42
@@ -217,6 +218,27 @@ def testSimple(self):
                         expected = i * j
                     self.assertEqual(dset[i, j], expected)
 
+        # try a point write
+        db.open()
+        points = []
+        for i in range(shape[0]):
+            points.append((i, i))
+        sel = selections.select(shape, points)
+        arr = np.zeros((len(points),), dtype=np.int32)
+        db.setDatasetValues(dset_111_id, sel, arr)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            dset = f["/g1/g1.1/dset1.1.1"]
+            for i in range(shape[0]):
+                for j in range(shape[1]):
+                    if i == j:
+                        # the diagonal elements were updated to 0
+                        expected = 0
+                    else:
+                        expected = i * j
+                    self.assertEqual(dset[i, j], expected)
+
     def testResizableDataset(self):
         filepath = "test/unit/out/h5py_writer_test_testResizableDataset.h5"
         if os.path.isfile(filepath):

From a61eb095e3990c2f0e0b52f190ea1a4b2dfc2410 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 26 May 2026 18:34:05 +0200
Subject: [PATCH 128/129] updated selection code

---
 src/h5json/hdf5db.py        |  95 +++--
 src/h5json/selections.py    |  60 +++-
 test/unit/hdf5db_test.py    |  70 +++-
 test/unit/selection_test.py | 668 ++++++++++++++++++++++++++++++++++++
 4 files changed, 855 insertions(+), 38 deletions(-)
 create mode 100644 test/unit/selection_test.py

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index e65b877d..adcd9f10 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -90,7 +90,7 @@ def __init__(
         self._dirty_objects = set()     # set of modified objects
         self._deleted_objects = set()   # set of deleted objects
         self._resized_datasets = set()  # set of dataset ids that have been resized
-        self._dataset_updates = {}         # list of dataset values updates keyed by dset_id
+        self._dataset_updates = {}      # list of dataset values updates keyed by dset_id
 
         self._root_id = None
 
@@ -106,6 +106,14 @@ def __init__(
         else:
             self._writer = None
 
+    def _getDatasetUpdates(self, dset_id):
+        """ Return list of updates for the given dataset id """
+
+        if dset_id not in self._dataset_updates:
+            self._dataset_updates[dset_id] = []
+
+        return self._dataset_updates[dset_id]
+
     @property
     def db(self):
         """ return object db dictionary """
@@ -184,14 +192,6 @@ def deleted_objects(self):
     def resized_datasets(self):
         return self._resized_datasets
 
-    def _getDatasetUpdates(self, dset_id):
-        """ Get list of update tuples """
-        if getCollectionForId(dset_id) != "datasets":
-            raise TypeError("expected dataset id")
-        if dset_id not in self._dataset_updates:
-            self._dataset_updates[dset_id] = []
-        return self._dataset_updates[dset_id]
-
     def make_dirty(self, obj_id):
         """ Mark the object as dirty and update the lastModified timestamp """
         obj_id = getHashTagForId(obj_id)
@@ -222,6 +222,7 @@ def flush(self):
         self._deleted_objects.clear()
         self._resized_datasets.clear()
         self._dataset_updates.clear()
+
         return True
 
     def readAll(self):
@@ -711,7 +712,6 @@ def init_arr(dtype, cpl):
 
         if shape_class == "H5S_SCALAR":
             if sel.select_type != selections.H5S_SEL_ALL:
-                # TBD: support other selection types
                 raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
             if sel.shape != ():
                 raise ValueError("Selection shape does not match dataset shape")
@@ -754,28 +754,59 @@ def init_arr(dtype, cpl):
             arr = init_arr(dtype, cpl)
 
         # apply any updates that impact this selection
-
-        for (update_sel, update_val) in updates:
-            # get the part of the update that is in common with the requested selection
-            x_sel = selections.intersect(sel, update_sel)
-            if x_sel.nselect == 0:
-                # this update doesn't effect the selection, so ignore
-                continue
-            # apply the update to the array to be returned
-            if sel.select_type == selections.H5S_SEL_POINTS:
-                # For point selections apply each intersecting point individually.
-                # arr is 1-D with one entry per selected point; map each intersection
-                # point back to its position in sel and its offset in update_val.
-                rank = len(sel.shape)
-                sel_pts = list(selections._iter_points(sel))
-                for pt in selections._iter_points(x_sel):
-                    tgt_idx = sel_pts.index(pt)
-                    src_coords = tuple(pt[d] - update_sel.start[d] for d in range(rank))
-                    arr[tgt_idx] = update_val[src_coords]
-            else:
-                src_sel = selections.translate(update_sel, x_sel)
-                tgt_sel = selections.translate(sel, x_sel)
-                arr[tgt_sel.slices] = update_val[src_sel.slices]
+        if sel.select_type == selections.H5S_SEL_POINTS:
+            # For point selections apply each intersecting point individually.
+            # arr is 1-D with one entry per selected point; map each intersection
+            # point back to its position in sel and its offset in update_val.
+            points = sel.points
+            for tgt_idx in range(len(points)):
+                pt = points[tgt_idx]
+                pt_sel = selections.select(sel.shape, [pt])
+                for (update_sel, update_val) in updates:
+                    x_sel = selections.intersect(update_sel, pt_sel)
+                    if x_sel.nselect == 0:
+                        pass  # no intersection, ignore
+                    elif x_sel.nselect > 1:
+                        raise ValueError("unexpected multiple points in intersection of point selection")
+                    else:
+                        if update_sel.select_type == selections.H5S_SEL_POINTS:
+                            # update_val is 1-D indexed by position in update_sel.points
+                            update_pts = list(selections._iter_points(update_sel))
+                            pt_tuple = next(iter(selections._iter_points(pt_sel)))
+                            src_idx = update_pts.index(pt_tuple)
+                            arr[tgt_idx] = update_val[src_idx]
+                        else:
+                            src_sel = selections.translate(update_sel, x_sel)
+                            # src_sel is a PointSelection with 1 translated point
+                            # index update_val using the full N-D coordinates
+                            src_pt = next(iter(selections._iter_points(src_sel)))
+                            arr[tgt_idx] = update_val[src_pt] if len(src_pt) > 1 else update_val[src_pt[0]]
+        else:
+            # hyperslab selections
+            for (update_sel, update_val) in updates:
+                # get the part of the update that is in common with the requested selection
+                x_sel = selections.intersect(sel, update_sel)
+                if x_sel.nselect == 0:
+                    # this update doesn't effect the selection, so ignore
+                    continue
+                if update_sel.select_type == selections.H5S_SEL_POINTS:
+                    # update_val is 1-D indexed by position in update_sel.points
+                    update_pts = list(selections._iter_points(update_sel))
+                    update_pt_to_idx = {pt: i for i, pt in enumerate(update_pts)}
+                    rank = len(sel.shape)
+                    sel_start = sel.start
+                    for pt in selections._iter_points(x_sel):
+                        src_idx = update_pt_to_idx[pt]
+                        tgt_coords = tuple(pt[d] - sel_start[d] for d in range(rank))
+                        if rank == 1:
+                            arr[tgt_coords[0]] = update_val[src_idx]
+                        else:
+                            arr[tgt_coords] = update_val[src_idx]
+                else:
+                    # apply the update to the array to be returned
+                    src_sel = selections.translate(update_sel, x_sel)
+                    tgt_sel = selections.translate(sel, x_sel)
+                    arr[tgt_sel.slices] = update_val[src_sel.slices]
 
         return arr
 
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 1ce10c29..93366937 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -167,8 +167,29 @@ def _iter_points(point_sel):
             yield tuple(int(x) for x in row)
 
 
+def _bboxes_overlap(s1, s2):
+    """Return True if the bounding boxes of s1 and s2 overlap in every dimension."""
+    min1, max1 = s1.bbox
+    if min1 is None:
+        return False
+    min2, max2 = s2.bbox
+    if min2 is None:
+        return False
+    return all(min1[d] < max2[d] and min2[d] < max1[d] for d in range(len(s1.shape)))
+
+
+def _empty_point_sel(shape):
+    """Return an empty PointSelection for the given shape."""
+    result = PointSelection(shape)
+    result.set([])
+    return result
+
+
 def _filter_points_by_hyperslab(point_sel, hyper_sel):
     """Return a PointSelection of points from point_sel that lie within hyper_sel."""
+    if not _bboxes_overlap(point_sel, hyper_sel):
+        return _empty_point_sel(point_sel.shape)
+
     start = hyper_sel.start
     count = hyper_sel.count
     step = hyper_sel.step
@@ -192,6 +213,9 @@ def _filter_points_by_hyperslab(point_sel, hyper_sel):
 
 def _intersect_points_points(s1, s2):
     """Return a PointSelection of points common to both s1 and s2."""
+    if not _bboxes_overlap(s1, s2):
+        return _empty_point_sel(s1.shape)
+
     common = sorted(set(_iter_points(s1)) & set(_iter_points(s2)))
 
     rank = len(s1.shape)
@@ -344,6 +368,33 @@ def shape(self):
         """ Shape of whole dataspace """
         return self._shape
 
+    @property
+    def bbox(self):
+        """ Bounding box of selection, as a tuple of (min, max) corner coordinates.
+
+        For point-based selections, this is the smallest hyperslab that contains
+        all selected points.  For hyperslab-based selections, this is the
+        smallest hyperslab that contains the selection (which may be larger than
+        the actual selection if stepped slices are used).
+        """
+        if self._select_type == H5S_SEL_POINTS:
+            pts_arr = np.asarray(self._points)
+            if pts_arr.size == 0:
+                return None, None
+            # For rank-1, pts_arr is 1-D (shape (N,)); reshape so axis=0 reduces over points.
+            rank = len(self._shape)
+            if pts_arr.ndim == 1 and rank == 1:
+                pts_arr = pts_arr.reshape(-1, 1)
+            min_corner = tuple(int(x) for x in np.min(pts_arr, axis=0))
+            max_corner = tuple(int(x) + 1 for x in np.max(pts_arr, axis=0))
+            return min_corner, max_corner
+        elif self._select_type in (H5S_SEL_HYPERSLABS, H5S_SEL_ALL):
+            start = self.start
+            stop = tuple(start[dim] + (self.count[dim] - 1) * self.step[dim] + 1 for dim in range(len(self._shape)))
+            return start, stop
+        else:
+            raise TypeError("Bounding box is not defined for this selection type")
+
     @property
     def nselect(self):
         """ Number of elements currently selected """
@@ -426,11 +477,10 @@ def getSelectNpoints(self):
 
     def _perform_selection(self, points, op):
         """ Internal method which actually performs the selection """
-        if isinstance(points, np.ndarray) or True:
-            points = np.asarray(points, order='C', dtype='u8')
-            if len(points.shape) == 1:
-                # points.shape = (1,points.shape[0])
-                pass
+        points = np.asarray(points, order='C', dtype='u8')
+        if len(points.shape) == 1:
+            # points.shape = (1,points.shape[0])
+            pass
 
         if self._select_type != H5S_SEL_POINTS:
             op = H5S_SELECT_SET
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 57074866..dea6f663 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -540,7 +540,75 @@ def testCommittedCompoundType(self):
 
         db.close()
 
-    def testSimpleDataset(self):
+    def test1DDataset(self):
+        nelements = 10
+        shape = (nelements,)
+        dtype = np.int32
+
+        db = Hdf5db(app_logger=self.log)
+        root_id = db.open()
+        dset_id = db.createDataset(shape, dtype=dtype)
+        db.createHardLink(root_id, "dset", dset_id)
+        db.createAttribute(dset_id, "a1", "Hello, world")
+        sel_all = selections.select(shape, ...)
+        arr = db.getDatasetValues(dset_id, sel_all)
+
+        self.assertEqual(arr.dtype, dtype)
+        self.assertEqual(arr.shape, shape)
+        self.assertEqual(arr.min(), 0)
+        self.assertEqual(arr.max(), 0)
+
+        # set values element by element
+        for i in range(nelements):
+            sel = selections.select(shape, slice(i, i + 1))
+            db.setDatasetValues(dset_id, sel, np.array([i], dtype=dtype))
+
+        # read entire dataset
+        arr = db.getDatasetValues(dset_id, sel_all)
+        for i in range(nelements):
+            val = np.array([i], dtype=dtype)
+            np.testing.assert_array_equal(arr[i], val)
+
+        # read element by element
+        for i in range(nelements):
+            sel = selections.select(shape, slice(i, i + 1))
+            val = db.getDatasetValues(dset_id, sel)
+            self.assertTrue(isinstance(val, np.ndarray))
+            self.assertEqual(val.shape, (1,))
+            self.assertEqual(val[0], i)
+
+        # do a point selection
+        sel = selections.select(shape, [2, 3, 5, 7])
+        val = db.getDatasetValues(dset_id, sel)
+        self.assertTrue(isinstance(val, np.ndarray))
+        self.assertEqual(val.shape, (4,))
+
+        self.assertEqual(val[0], 2)
+        self.assertEqual(val[1], 3)
+        self.assertEqual(val[2], 5)
+        self.assertEqual(val[3], 7)
+
+        # point selection write
+        arr = np.zeros((4,), dtype=dtype)
+        db.setDatasetValues(dset_id, sel, arr)
+        arr = db.getDatasetValues(dset_id, sel_all)
+        for i in range(nelements):
+            if i in (2, 3, 5, 7):
+                self.assertEqual(arr[i], 0)  # these were set to 0 by point selection write
+            else:
+                self.assertEqual(arr[i], i)
+
+        # try with broadcasting
+        arr_one_value = np.zeros((1), dtype=dtype)
+        arr_one_value[0] = 42
+        db.setDatasetValues(dset_id, sel_all, arr_one_value)
+        # check that entire dataset is updated to the single value
+        arr = db.getDatasetValues(dset_id, sel_all)
+        self.assertTrue((arr == 42).all())
+
+        db.close()
+
+    def test2DDataset(self):
         nrows = 8
         ncols = 10
         shape = (nrows, ncols)
diff --git a/test/unit/selection_test.py b/test/unit/selection_test.py
new file mode 100644
index 00000000..7ca42225
--- /dev/null
+++ b/test/unit/selection_test.py
@@ -0,0 +1,668 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+import numpy as np
+
+from h5json import selections
+from h5json.selections import (
+    H5S_SEL_POINTS,
+    H5S_SEL_ALL,
+    H5S_SEL_HYPERSLABS,
+    H5S_SEL_FANCY,
+    PointSelection,
+    SimpleSelection,
+    FancySelection,
+    ScalarSelection,
+)
+
+
+def make_point_sel(shape, mask):
+    """Build a PointSelection from a boolean ndarray mask."""
+    sel = PointSelection(shape)
+    sel[mask]
+    return sel
+
+
+class SimpleSelectionTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(SimpleSelectionTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testSelectAll(self):
+        shape = (10,)
+        sel = selections.select(shape, ...)
+        self.assertIsInstance(sel, SimpleSelection)
+        # __getitem__ always sets HYPERSLABS even for a full-range ellipsis
+        self.assertEqual(sel.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(sel.shape, shape)
+        self.assertEqual(sel.nselect, 10)
+        self.assertEqual(sel.shape, sel.mshape)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0,))
+        self.assertEqual(bbox[1], shape)
+
+    def testSelectAll2D(self):
+        shape = (4, 5)
+        sel = selections.select(shape, ...)
+        self.assertIsInstance(sel, SimpleSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(sel.nselect, 20)
+        self.assertEqual(sel.shape, sel.mshape)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0, 0))
+        self.assertEqual(bbox[1], shape)
+
+    def testSlice1D(self):
+        shape = (10,)
+        sel = selections.select(shape, slice(2, 7))
+        self.assertIsInstance(sel, SimpleSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(sel.start, (2,))
+        self.assertEqual(sel.count, (5,))
+        self.assertEqual(sel.step, (1,))
+        self.assertEqual(sel.nselect, 5)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (2,))
+        self.assertEqual(bbox[1], (7,))
+
+    def testSliceWithStep(self):
+        shape = (10,)
+        sel = selections.select(shape, slice(0, 10, 2))
+        self.assertIsInstance(sel, SimpleSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(sel.start, (0,))
+        self.assertEqual(sel.count, (5,))
+        self.assertEqual(sel.step, (2,))
+        self.assertEqual(sel.nselect, 5)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0,))
+        self.assertEqual(bbox[1], (9,))
+
+    def testSlice2D(self):
+        shape = (8, 10)
+        sel = selections.select(shape, (slice(1, 4), slice(2, 9)))
+        self.assertIsInstance(sel, SimpleSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(sel.start, (1, 2))
+        self.assertEqual(sel.count, (3, 7))
+        self.assertEqual(sel.step, (1, 1))
+        self.assertEqual(sel.nselect, 21)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (1, 2))
+        self.assertEqual(bbox[1], (4, 9))
+
+    def testBroadcast1D(self):
+        shape = (10,)
+        sel = selections.select(shape, ...)
+        self.assertIsInstance(sel, SimpleSelection)
+
+        it = sel.broadcast((1,))
+        count = 0
+        for x in it:
+            # start
+            self.assertTrue(x[0][0] >= 0 and x[0][0] < 10)
+            # count
+            self.assertEqual(x[1], (1,))
+            # step
+            self.assertEqual(x[2], (1,))
+            # scalar
+            self.assertEqual(x[3], (False,))
+            count += 1
+        self.assertEqual(count, 10)
+
+    def testBroadcast2D(self):
+        shape = (8, 10)
+        sel = selections.select(shape, ...)
+        self.assertIsInstance(sel, SimpleSelection)
+        try:
+            sel.broadcast(4, 5)
+            self.assertTrue(False)
+        except TypeError:
+            pass
+        it = sel.broadcast((1, 10))
+        count = 0
+        for x in it:
+            # start
+            self.assertTrue(x[0][0] >= 0 and x[0][0] < 8)
+            self.assertEqual(x[0][1], 0)
+            # count
+            self.assertEqual(x[1], (1, 10))
+            # step
+            self.assertEqual(x[2], (1, 1))
+            # scalar
+            self.assertEqual(x[3], (False, False))
+            count += 1
+        self.assertEqual(count, 8)
+
+    def testSlices(self):
+        shape = (8, 10)
+        sel = selections.select(shape, (slice(2, 5), slice(3, 7)))
+        self.assertEqual(sel.slices, (slice(2, 5, 1), slice(3, 7, 1)))
+
+    def testNselect(self):
+        shape = (100,)
+        sel = selections.select(shape, slice(0, 100))
+        self.assertEqual(sel.nselect, 100)
+        sel2 = selections.select(shape, slice(10, 20))
+        self.assertEqual(sel2.nselect, 10)
+
+    def testOutOfRangeRaises(self):
+        shape = (10,)
+        # integer index out of range raises IndexError; slices are silently clamped
+        with self.assertRaises(IndexError):
+            selections.select(shape, 15)
+
+    def testGetQueryParam1D(self):
+        shape = (10,)
+        sel = selections.select(shape, slice(2, 8))
+        param = sel.getQueryParam()
+        self.assertEqual(param, "[2:8]")
+
+    def testGetQueryParam2D(self):
+        shape = (8, 10)
+        sel = selections.select(shape, (slice(1, 4), slice(0, 10)))
+        param = sel.getQueryParam()
+        self.assertEqual(param, "[1:4,0:10]")
+
+    def testRepr(self):
+        shape = (10,)
+        sel = selections.select(shape, slice(0, 5))
+        self.assertIn("SimpleSelection", repr(sel))
+
+    def testScalarDataset(self):
+        # select() routes to ScalarSelection when obj has .shape == ()
+        scalar_ds = np.array(42)
+        sel = selections.select(scalar_ds, ...)
+        self.assertIsInstance(sel, ScalarSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_ALL)
+        self.assertEqual(sel.nselect, 1)
+
+
+class PointSelectionTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(PointSelectionTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testBoolMask1D(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[0, 3, 7]] = True
+        sel = make_point_sel(shape, mask)
+        self.assertIsInstance(sel, PointSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_POINTS)
+        self.assertEqual(sel.nselect, 3)
+        points = sel.points
+        self.assertEqual(len(points), 3)
+        for i in range(len(points)):
+            pt = points[i]
+            self.assertTrue(isinstance(pt, np.ndarray))
+            self.assertEqual(pt.shape, (1,))
+            self.assertTrue(pt[0] in (0, 3, 7))
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0,))
+        self.assertEqual(bbox[1], (8,))
+
+    def testBoolMask2D(self):
+        shape = (4, 5)
+        mask = np.zeros(shape, dtype=bool)
+        mask[0, 1] = True
+        mask[2, 3] = True
+        sel = make_point_sel(shape, mask)
+        self.assertEqual(sel.select_type, H5S_SEL_POINTS)
+        self.assertEqual(sel.nselect, 2)
+        pts = sel.points
+        self.assertEqual(pts.shape, (2, 2))
+        self.assertEqual(list(pts[0]), [0, 1])
+        self.assertEqual(list(pts[1]), [2, 3])
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0, 1))
+        self.assertEqual(bbox[1], (3, 4))
+
+    def testListOfCoords2D(self):
+        shape = (8, 10)
+        sel = selections.select(shape, [(0, 0), (1, 1), (2, 2), (3, 3)])
+        self.assertIsInstance(sel, PointSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_POINTS)
+        self.assertEqual(sel.nselect, 4)
+        points = sel.points
+        self.assertEqual(len(points), 4)
+        for i in range(len(points)):
+            pt = points[i]
+            self.assertTrue(isinstance(pt, np.ndarray))
+            self.assertEqual(pt.shape, (2,))
+            self.assertTrue(pt[0] == pt[1])
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (0, 0))
+        self.assertEqual(bbox[1], (4, 4))
+
+    def testEmptySet(self):
+        shape = (10,)
+        sel = PointSelection(shape)
+        sel.set([])
+        self.assertEqual(sel.nselect, 0)
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], None)
+        self.assertEqual(bbox[1], None)
+
+    def testSetReplacesPoints(self):
+        shape = (10,)
+        mask1 = np.zeros(10, dtype=bool)
+        mask1[[1, 2, 3]] = True
+        sel = make_point_sel(shape, mask1)
+        self.assertEqual(sel.nselect, 3)
+
+        mask2 = np.zeros(10, dtype=bool)
+        mask2[[5, 6]] = True
+        sel[mask2]
+        self.assertEqual(sel.nselect, 2)
+
+    def testRepr(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[0, 1]] = True
+        sel = make_point_sel(shape, mask)
+        self.assertIn("PointSelection", repr(sel))
+
+
+class FancySelectionTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(FancySelectionTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testCoordList1D(self):
+        shape = (10,)
+        sel = FancySelection(shape)
+        sel[[2, 5, 8]]
+        self.assertEqual(sel.select_type, H5S_SEL_FANCY)
+
+    def testGetQueryParamSlice(self):
+        shape = (10,)
+        sel = FancySelection(shape)
+        sel[slice(2, 8)]
+        param = sel.getQueryParam()
+        self.assertEqual(param, "[2:8]")
+
+    def testGetQueryParamList(self):
+        shape = (10,)
+        sel = FancySelection(shape)
+        sel[[1, 3, 5]]
+        param = sel.getQueryParam()
+        self.assertEqual(param, "[[1,3,5]]")
+
+    def testGetQueryParam2D(self):
+        shape = (10, 10)
+        sel = FancySelection(shape)
+        sel[(slice(1, 4), slice(2, 6))]
+        param = sel.getQueryParam()
+        self.assertEqual(param, "[1:4,2:6]")
+
+    def testRepr(self):
+        shape = (10,)
+        sel = FancySelection(shape)
+        sel[slice(0, 5)]
+        self.assertIn("FancySelection", repr(sel))
+
+
+class IntersectHyperslabTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(IntersectHyperslabTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testOverlapping1D(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(0, 6))
+        s2 = selections.select(shape, slice(3, 10))
+        result = selections.intersect(s1, s2)
+        self.assertIsInstance(result, SimpleSelection)
+        self.assertEqual(result.nselect, 3)
+        self.assertEqual(result.start, (3,))
+        self.assertEqual(result.count, (3,))
+
+    def testNonOverlapping1D(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(0, 3))
+        s2 = selections.select(shape, slice(5, 10))
+        result = selections.intersect(s1, s2)
+        self.assertEqual(result.nselect, 0)
+
+    def testOverlapping2D(self):
+        shape = (10, 10)
+        s1 = selections.select(shape, (slice(0, 6), slice(0, 6)))
+        s2 = selections.select(shape, (slice(3, 10), slice(3, 10)))
+        result = selections.intersect(s1, s2)
+        self.assertEqual(result.nselect, 9)
+        self.assertEqual(result.start, (3, 3))
+        self.assertEqual(result.count, (3, 3))
+
+    def testFullOverlap(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(2, 8))
+        s2 = selections.select(shape, slice(0, 10))
+        result = selections.intersect(s1, s2)
+        self.assertEqual(result.nselect, 6)
+        self.assertEqual(result.start, (2,))
+        self.assertEqual(result.count, (6,))
+
+    def testSelectAllWithHyperslab(self):
+        shape = (10,)
+        s_all = selections.select(shape, ...)
+        s_hyp = selections.select(shape, slice(3, 7))
+        result = selections.intersect(s_all, s_hyp)
+        self.assertEqual(result.nselect, 4)
+        self.assertEqual(result.start, (3,))
+
+    def testSteppedSliceRaises(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(0, 10, 2))
+        s2 = selections.select(shape, slice(0, 10, 2))
+        with self.assertRaises(ValueError):
+            selections.intersect(s1, s2)
+
+    def testShapeMismatchRaises(self):
+        s1 = selections.select((10,), slice(0, 5))
+        s2 = selections.select((20,), slice(0, 5))
+        with self.assertRaises(ValueError):
+            selections.intersect(s1, s2)
+
+    def testBadArgRaises(self):
+        s1 = selections.select((10,), slice(0, 5))
+        with self.assertRaises(TypeError):
+            selections.intersect(s1, "not a selection")
+
+
+class IntersectPointHyperslabTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(IntersectPointHyperslabTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testPointsInsideHyperslab1D(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[0, 1, 3, 5, 9]] = True
+        pts = make_point_sel(shape, mask)
+        hyp = selections.select(shape, slice(2, 8))
+        result = selections.intersect(pts, hyp)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 2)
+        self.assertEqual(list(result.points.flatten()), [3, 5])
+
+    def testHyperslabIntersectPoints1D(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[0, 1, 3, 5, 9]] = True
+        pts = make_point_sel(shape, mask)
+        hyp = selections.select(shape, slice(2, 8))
+        result = selections.intersect(hyp, pts)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 2)
+        self.assertEqual(list(result.points.flatten()), [3, 5])
+
+    def testAllPointsInsideHyperslab(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[2, 4, 6]] = True
+        pts = make_point_sel(shape, mask)
+        hyp = selections.select(shape, slice(0, 10))
+        result = selections.intersect(pts, hyp)
+        self.assertEqual(result.nselect, 3)
+
+    def testNoPointsInsideHyperslab(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[0, 1]] = True
+        pts = make_point_sel(shape, mask)
+        hyp = selections.select(shape, slice(5, 10))
+        result = selections.intersect(pts, hyp)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 0)
+
+    def testPoints2DIntersectHyperslab(self):
+        shape = (6, 6)
+        pts = selections.select(shape, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)])
+        hyp = selections.select(shape, (slice(1, 4), slice(1, 4)))
+        result = selections.intersect(pts, hyp)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 3)
+        pts_list = [tuple(row) for row in result.points]
+        self.assertIn((1, 1), pts_list)
+        self.assertIn((2, 2), pts_list)
+        self.assertIn((3, 3), pts_list)
+
+    def testPoints2DIntersectSelectAll(self):
+        shape = (5, 5)
+        pts = selections.select(shape, [(0, 0), (2, 3), (4, 4)])
+        s_all = selections.select(shape, ...)
+        result = selections.intersect(pts, s_all)
+        self.assertEqual(result.nselect, 3)
+
+    def testHyperslabWithStep1D(self):
+        shape = (20,)
+        mask = np.zeros(20, dtype=bool)
+        mask[[0, 2, 4, 6, 7]] = True
+        pts = make_point_sel(shape, mask)
+        # step-2 hyperslab covers 0,2,4,6,8,...
+        hyp = selections.select(shape, slice(0, 10, 2))
+        result = selections.intersect(pts, hyp)
+        self.assertEqual(result.nselect, 4)
+        self.assertEqual(list(result.points.flatten()), [0, 2, 4, 6])
+
+    def testHyperslabFirstArg2D(self):
+        # hyperslab as the first argument in 2-D
+        shape = (8, 10)
+        hyp = selections.select(shape, (slice(2, 6), slice(3, 8)))
+        pts = selections.select(shape, [(1, 1), (2, 3), (3, 5), (5, 7), (6, 9)])
+        result = selections.intersect(hyp, pts)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 3)
+        pts_list = [tuple(row) for row in result.points]
+        self.assertIn((2, 3), pts_list)
+        self.assertIn((3, 5), pts_list)
+        self.assertIn((5, 7), pts_list)
+
+    def testDisjointBboxReturnsEmpty(self):
+        # bounding boxes don't overlap at all — exercises the bbox fast path
+        shape = (20,)
+        mask = np.zeros(20, dtype=bool)
+        mask[[0, 1, 2, 3, 4]] = True        # points in [0, 5)
+        pts = make_point_sel(shape, mask)
+        hyp = selections.select(shape, slice(10, 20))  # hyperslab in [10, 20)
+        result = selections.intersect(hyp, pts)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 0)
+        # commuted
+        result2 = selections.intersect(pts, hyp)
+        self.assertEqual(result2.nselect, 0)
+
+
+class IntersectPointPointTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(IntersectPointPointTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testOverlapping1D(self):
+        shape = (10,)
+        mask1 = np.zeros(10, dtype=bool)
+        mask1[[0, 1, 3, 5]] = True
+        mask2 = np.zeros(10, dtype=bool)
+        mask2[[1, 3, 7]] = True
+        s1 = make_point_sel(shape, mask1)
+        s2 = make_point_sel(shape, mask2)
+        result = selections.intersect(s1, s2)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 2)
+        self.assertEqual(list(result.points.flatten()), [1, 3])
+
+    def testNoOverlap1D(self):
+        shape = (10,)
+        mask1 = np.zeros(10, dtype=bool)
+        mask1[[0, 1]] = True
+        mask2 = np.zeros(10, dtype=bool)
+        mask2[[8, 9]] = True
+        result = selections.intersect(make_point_sel(shape, mask1),
+                                      make_point_sel(shape, mask2))
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 0)
+
+    def testIdentical1D(self):
+        shape = (10,)
+        mask = np.zeros(10, dtype=bool)
+        mask[[2, 5, 8]] = True
+        s1 = make_point_sel(shape, mask)
+        s2 = make_point_sel(shape, mask)
+        result = selections.intersect(s1, s2)
+        self.assertEqual(result.nselect, 3)
+        self.assertEqual(list(result.points.flatten()), [2, 5, 8])
+
+    def testOverlapping2D(self):
+        shape = (6, 6)
+        s1 = selections.select(shape, [(0, 0), (1, 1), (2, 2), (3, 3)])
+        s2 = selections.select(shape, [(1, 1), (2, 2), (5, 5)])
+        result = selections.intersect(s1, s2)
+        self.assertIsInstance(result, PointSelection)
+        self.assertEqual(result.nselect, 2)
+        pts_list = [tuple(row) for row in result.points]
+        self.assertIn((1, 1), pts_list)
+        self.assertIn((2, 2), pts_list)
+
+    def testNoOverlap2D(self):
+        shape = (6, 6)
+        s1 = selections.select(shape, [(0, 0), (1, 1)])
+        s2 = selections.select(shape, [(3, 3), (4, 4)])
+        result = selections.intersect(s1, s2)
+        self.assertEqual(result.nselect, 0)
+
+    def testCommutativity(self):
+        shape = (10,)
+        mask1 = np.zeros(10, dtype=bool)
+        mask1[[0, 2, 4, 6]] = True
+        mask2 = np.zeros(10, dtype=bool)
+        mask2[[2, 4, 8]] = True
+        s1 = make_point_sel(shape, mask1)
+        s2 = make_point_sel(shape, mask2)
+        r_fwd = selections.intersect(s1, s2)
+        r_rev = selections.intersect(s2, s1)
+        self.assertEqual(r_fwd.nselect, r_rev.nselect)
+        self.assertEqual(list(r_fwd.points.flatten()), list(r_rev.points.flatten()))
+
+
+class ContainedTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(ContainedTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testContainedTrue(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(2, 5))
+        s2 = selections.select(shape, slice(0, 10))
+        self.assertTrue(selections.contained(s1, s2))
+
+    def testContainedFalse(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(0, 6))
+        s2 = selections.select(shape, slice(3, 10))
+        self.assertFalse(selections.contained(s1, s2))
+
+    def testContainedSelf(self):
+        shape = (10,)
+        s = selections.select(shape, slice(2, 8))
+        self.assertTrue(selections.contained(s, s))
+
+    def testContained2D(self):
+        shape = (10, 10)
+        inner = selections.select(shape, (slice(2, 5), slice(2, 5)))
+        outer = selections.select(shape, (slice(0, 10), slice(0, 10)))
+        self.assertTrue(selections.contained(inner, outer))
+        self.assertFalse(selections.contained(outer, inner))
+
+
+class TranslateTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(TranslateTest, self).__init__(*args, **kwargs)
+        self.logger = logging.getLogger()
+        self.logger.setLevel(logging.WARNING)
+
+    def testTranslate1D(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(2, 8))
+        s2 = selections.select(shape, slice(4, 7))
+        result = selections.translate(s1, s2)
+        self.assertEqual(result.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(result.start, (2,))
+        self.assertEqual(result.count, (3,))
+
+    def testTranslate2D(self):
+        shape = (10, 10)
+        s1 = selections.select(shape, (slice(2, 8), slice(2, 8)))
+        s2 = selections.select(shape, (slice(4, 6), slice(4, 6)))
+        result = selections.translate(s1, s2)
+        self.assertEqual(result.select_type, H5S_SEL_HYPERSLABS)
+        self.assertEqual(result.start, (2, 2))
+        self.assertEqual(result.count, (2, 2))
+
+    def testTranslate2DWithPoints(self):
+        shape = (10, 10)
+        s1 = selections.select(shape, (slice(2, 8), slice(2, 8)))
+        s2 = selections.select(shape, [(2, 2), (3, 3), (9, 9)])
+
+        result = selections.translate(s1, s2)
+        self.assertEqual(result.select_type, H5S_SEL_POINTS)
+        self.assertEqual(result.nselect, 2)
+
+        self.assertEqual(result.points.shape, (2, 2))
+        self.assertEqual(list(result.points[0]), [0, 0])
+        self.assertEqual(list(result.points[1]), [1, 1])
+
+    def testTranslateNoOverlapRaises(self):
+        shape = (10,)
+        s1 = selections.select(shape, slice(0, 3))
+        s2 = selections.select(shape, slice(5, 8))
+        with self.assertRaises(ValueError):
+            selections.translate(s1, s2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4b69b2ce8a546176e4dd3d42a3690b92f19fc929 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 1 Jun 2026 21:04:03 +0200
Subject: [PATCH 129/129] fix bug in selection creater

---
 src/h5json/hdf5db.py        |  4 +++-
 src/h5json/selections.py    | 27 ++++++++++++++++-----------
 test/unit/hdf5db_test.py    |  2 ++
 test/unit/selection_test.py | 18 ++++++++++++++++++
 4 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index adcd9f10..ab72ce9c 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -673,7 +673,9 @@ def getDatasetValues(self, dset_id, sel):
         def init_arr(dtype, cpl):
             """ create an ndarray with the give shape, dtype and fill_value
                 (if the latter is found in the creation properties list) """
-            if hasattr(sel, "count"):
+            if isinstance(sel, selections.ScalarSelection):
+                arr_shape = ()
+            elif hasattr(sel, "count"):
                 arr_shape = sel.count if isinstance(sel.count, tuple) else (sel.count, )
             else:
                 arr_shape = (sel.nselect,)
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
index 93366937..fe0186ea 100644
--- a/src/h5json/selections.py
+++ b/src/h5json/selections.py
@@ -70,9 +70,16 @@ def select(obj, args):
     if not isinstance(args, tuple):
         args = (args,)
 
-    if hasattr(obj, "shape") and obj.shape == ():
+    if hasattr(obj, "shape"):
+        obj_shape = obj.shape
+    elif isinstance(obj, tuple):
+        obj_shape = obj
+    else:
+        raise TypeError("Object must be a dataset or a shape tuple")
+
+    if len(obj_shape) == 0:
         # scalar object
-        sel = ScalarSelection(obj.shape, args)
+        sel = ScalarSelection(obj_shape, args)
         return sel
 
     # "Special" indexing objects
@@ -80,12 +87,12 @@ def select(obj, args):
 
         arg = args[0]
         if hasattr(arg, "shape"):
-            obj_shape = obj.shape
+            arg_shape = arg.shape
         else:
-            obj_shape = obj
+            arg_shape = obj_shape
 
         if isinstance(arg, Selection):
-            if arg.shape != obj_shape:
+            if arg_shape != obj_shape:
                 raise TypeError("Mismatched selection shape")
             return arg
 
@@ -114,14 +121,12 @@ def select(obj, args):
                 int(a)
             except Exception:
                 use_fancy = True
-        if use_fancy and hasattr(obj, "shape"):
-            sel = FancySelection(obj.shape)
+        if use_fancy:
+            sel = FancySelection(obj_shape)
             sel[args]
             return sel
-    if hasattr(obj, "shape"):
-        sel = SimpleSelection(obj.shape)
-    else:
-        sel = SimpleSelection(obj)
+    sel = SimpleSelection(obj_shape)
+     
     sel[args]
     return sel
 
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index dea6f663..0b383c43 100644
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -579,6 +579,7 @@ def test1DDataset(self):
 
         # do a point selection
         sel = selections.select(shape, [2, 3, 5, 7])
+        
         val = db.getDatasetValues(dset_id, sel)
         self.assertTrue(isinstance(val, np.ndarray))
         self.assertEqual(val.shape, (4,))
@@ -840,6 +841,7 @@ def testScalarDataset(self):
         db.createHardLink(root_id, "dset", dset_id)
         db.createAttribute(dset_id, "a1", "Hello, world")
         sel_all = selections.select((), ...)
+         
         arr = db.getDatasetValues(dset_id, sel_all)
         self.assertEqual(arr.dtype, dtype)
         self.assertEqual(arr.shape, ())
diff --git a/test/unit/selection_test.py b/test/unit/selection_test.py
index 7ca42225..8cac5603 100644
--- a/test/unit/selection_test.py
+++ b/test/unit/selection_test.py
@@ -251,6 +251,24 @@ def testBoolMask2D(self):
         self.assertEqual(bbox[0], (0, 1))
         self.assertEqual(bbox[1], (3, 4))
 
+    def testListOfCoords1D(self):
+        shape = (10,)
+        sel = selections.select(shape, [2, 3, 5, 7])
+        self.assertIsInstance(sel, PointSelection)
+        self.assertEqual(sel.select_type, H5S_SEL_POINTS)
+        self.assertEqual(sel.nselect, 4)
+        points = sel.points
+        self.assertEqual(len(points), 4)
+        for i in range(len(points)):
+            pt = points[i]
+            self.assertTrue(pt in (2, 3, 5, 7))
+
+        bbox = sel.bbox
+        self.assertTrue(isinstance(bbox, tuple))
+        self.assertEqual(len(bbox), 2)
+        self.assertEqual(bbox[0], (2,))
+        self.assertEqual(bbox[1], (8,))
+
     def testListOfCoords2D(self):
         shape = (8, 10)
         sel = selections.select(shape, [(0, 0), (1, 1), (2, 2), (3, 3)])