Skip to content

Commit 22c3591

Browse files
committed
Bump to v0.1.1 with simplied endianness handling
1 parent b371006 commit 22c3591

3 files changed

Lines changed: 22 additions & 25 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "numcodecs-tokenize"
7-
version = "0.1.0"
7+
version = "0.1.1"
88
description = "Tokenization codec for the `numcodecs` buffer compression API"
99
readme = "README.md"
1010
license = "MPL-2.0"

src/numcodecs_tokenize/__init__.py

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
__all__ = ["TokenizeCodec"]
66

77
from io import BytesIO
8-
from sys import byteorder
98

109
import numcodecs.compat
1110
import numcodecs.registry
@@ -83,32 +82,28 @@ def encode(
8382
else:
8483
utype = a.dtype
8584

85+
assert (dtype.itemsize % utype.itemsize) == 0
86+
8687
# insert padding to align with itemsize
8788
message.append(
8889
b"\0" * (utype.itemsize - (sum(len(m) for m in message) % utype.itemsize))
8990
)
9091

9192
# ensure that the table keys are encoded in little endian binary
9293
table_keys_array = unique[argsort]
93-
table_keys_byteorder = table_keys_array.dtype.byteorder
94-
table_keys_byteorder = (
95-
table_keys_byteorder
96-
if table_keys_byteorder in ("<", ">")
97-
else ("<" if (byteorder == "little") else ">")
94+
message.append(
95+
table_keys_array.astype(table_keys_array.dtype.newbyteorder("<")).tobytes()
9896
)
99-
if table_keys_byteorder != "<":
100-
table_keys_array = table_keys_array.byteswap()
101-
message.append(table_keys_array.tobytes())
10297

10398
indices = argsortinv[inverse].astype(utype)
104-
if table_keys_byteorder != "<":
105-
indices = indices.byteswap()
106-
message.append(indices.tobytes())
99+
message.append(indices.astype(indices.dtype.newbyteorder("<")).tobytes())
107100

108101
encoded_bytes = b"".join(message)
109102

110103
encoded: np.ndarray[tuple[int], np.dtype[np.unsignedinteger]] = np.frombuffer(
111-
encoded_bytes, dtype=utype, count=len(encoded_bytes) // utype.itemsize
104+
encoded_bytes,
105+
dtype=utype.newbyteorder("<"),
106+
count=len(encoded_bytes) // utype.itemsize,
112107
)
113108

114109
return encoded # type: ignore
@@ -168,24 +163,16 @@ def decode(
168163
dtype=_dtype_bits(dtype).newbyteorder("<"),
169164
count=table_len,
170165
)
171-
dtype_bits_byteorder = _dtype_bits(dtype).byteorder
172-
dtype_bits_byteorder = (
173-
dtype_bits_byteorder
174-
if dtype_bits_byteorder in ("<", ">")
175-
else ("<" if (byteorder == "little") else ">")
176-
)
177-
if dtype_bits_byteorder != "<":
178-
table_keys = table_keys.byteswap()
179166

180167
indices = np.frombuffer(
181168
b_io.read(),
182169
dtype=utype.newbyteorder("<"),
183170
count=np.prod(shape, dtype=np.uintp),
184171
)
185-
if dtype_bits_byteorder != "<":
186-
indices = indices.byteswap()
187172

188-
decoded = table_keys[indices].view(dtype).reshape(shape)
173+
decoded = (
174+
table_keys[indices].astype(_dtype_bits(dtype)).view(dtype).reshape(shape)
175+
)
189176

190177
return numcodecs.compat.ndarray_copy(decoded, out) # type: ignore
191178

tests/test_tokenize.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ def test_roundtrip():
2424
check_roundtrip(np.zeros(tuple()))
2525
check_roundtrip(np.zeros((0,)))
2626
check_roundtrip(np.arange(1000).reshape(10, 10, 10))
27+
check_roundtrip(
28+
np.arange(1000)
29+
.reshape(10, 10, 10)
30+
.astype(np.dtype(np.uint32).newbyteorder("<"))
31+
)
32+
check_roundtrip(
33+
np.arange(1000)
34+
.reshape(10, 10, 10)
35+
.astype(np.dtype(np.uint32).newbyteorder(">"))
36+
)
2737
check_roundtrip(np.array([np.inf, -np.inf, np.nan, -np.nan, 0.0, -0.0]))
2838
check_roundtrip(
2939
np.array(

0 commit comments

Comments
 (0)