Skip to content

Commit 1faadd5

Browse files
fix: drop arbitrary 20x cap on r3/v1 decompressed size
ZstdCompressor.compress() (used by the gateway-side r3_serializer) embeds the uncompressed size in the frame header, so passing max_output_size=len(compressed)*20 was both unnecessary and incorrect: highly compressible router-replay payloads (e.g. tokens routing to a small subset of experts) routinely exceed a 20:1 ratio, and would have failed deserialization with ZstdError. Removing the cap lets the library auto-allocate from the embedded content size. Verified locally: a 64 KiB zero-filled matrix payload compresses to ~35 bytes (>1800x ratio) and now deserializes cleanly. Adds a regression test covering the high-compression case. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 8caac53 commit 1faadd5

2 files changed

Lines changed: 27 additions & 1 deletion

File tree

eval_protocol/adapters/r3_deserializer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ def decompress_and_parse_r3(
118118
"""
119119
compressed = base64.b64decode(data_b64)
120120

121+
# ZstdCompressor.compress() embeds the uncompressed size in the frame
122+
# header by default, so the library can auto-allocate the output buffer.
121123
decompressor = zstd.ZstdDecompressor()
122-
raw = decompressor.decompress(compressed, max_output_size=len(compressed) * 20)
124+
raw = decompressor.decompress(compressed)
123125

124126
header = _parse_header(raw)
125127

tests/adapters/test_r3_deserializer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,30 @@ def test_zero_replayed_tokens(self):
266266
assert all(m is None for m in matrices)
267267
assert metadata["replayed_token_count"] == 0
268268

269+
def test_high_compression_ratio_payload(self):
270+
"""Highly compressible payloads (e.g. tokens routing to the same
271+
experts) can compress much better than 20:1; the deserializer must
272+
not impose an arbitrary cap on the decompressed size."""
273+
# 64 KiB of zeros compresses to ~35 bytes (>1000x ratio).
274+
total_tokens = 1024
275+
matrix_elem_size = 64 # bytes/token
276+
matrix_data = b"\x00" * (total_tokens * matrix_elem_size)
277+
278+
raw = _make_raw_r3(
279+
total_token_count=total_tokens,
280+
replayed_token_count=total_tokens,
281+
matrix_data=matrix_data,
282+
)
283+
blob = _compress_and_b64(raw)
284+
# Sanity: compression really is >> 20x for this case.
285+
assert len(base64.b64decode(blob)) * 20 < len(raw)
286+
287+
matrices, metadata = decompress_and_parse_r3(blob)
288+
assert len(matrices) == total_tokens
289+
assert metadata["replayed_token_count"] == total_tokens
290+
for m in matrices:
291+
assert base64.b64decode(m) == b"\x00" * matrix_elem_size
292+
269293

270294
class TestRoundTrip:
271295
"""Round-trip test using the gateway's serializer and EP's deserializer."""

0 commit comments

Comments
 (0)