Skip to content

Commit fc57b60

Browse files
committed
[GR-75555] Fix pure Java gzip compression exposed by pip update. Fixes #818.
PullRequest: graalpython/4518
2 parents e208d90 + a4650dc commit fc57b60

6 files changed

Lines changed: 389 additions & 88 deletions

File tree

graalpython/com.oracle.graal.python.test/src/tests/test_zlib.py

Lines changed: 147 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018, 2025, Oracle and/or its affiliates.
1+
# Copyright (c) 2018, 2026, Oracle and/or its affiliates.
22
# Copyright (C) 1996-2017 Python Software Foundation
33
#
44
# Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
@@ -9,6 +9,8 @@
99
import unittest
1010
import zlib
1111

12+
from tests.util import assert_raises
13+
1214
pintNumber = 98765432109876543210
1315
longNumber = 9876543210
1416
GZ_FILE_NAME = 'testgzfile.gz'
@@ -242,6 +244,119 @@ def test_zlib_decompress_gzip():
242244
data = d.decompress(f.read()) + d.flush()
243245
assert data == GZ_DATA
244246

247+
248+
def test_zlib_decompress_gzip_bad_trailer():
249+
import gzip
250+
251+
compressed = bytearray(gzip.compress(GZ_DATA))
252+
compressed[-8] ^= 1
253+
assert_raises(zlib.error, zlib.decompress, compressed, 16 + zlib.MAX_WBITS)
254+
255+
compressed = bytearray(gzip.compress(GZ_DATA))
256+
compressed[-4] ^= 1
257+
d = zlib.decompressobj(16 + zlib.MAX_WBITS)
258+
259+
def decompress_streaming():
260+
return d.decompress(compressed[:20]) + d.decompress(compressed[20:]) + d.flush()
261+
262+
assert_raises(zlib.error, decompress_streaming)
263+
264+
265+
def gzip_bytes_with_header_crc(data, header_crc=None):
266+
import struct
267+
268+
compressor = zlib.compressobj(wbits=-zlib.MAX_WBITS)
269+
compressed = compressor.compress(data) + compressor.flush()
270+
header = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x02\xff"
271+
if header_crc is None:
272+
header_crc = zlib.crc32(header) & 0xffff
273+
trailer = struct.pack("<II", zlib.crc32(data), len(data) & 0xffffffff)
274+
return header + struct.pack("<H", header_crc) + compressed + trailer
275+
276+
277+
def gzip_bytes_with_optional_header(data, flags, optional_header):
278+
import struct
279+
280+
compressor = zlib.compressobj(wbits=-zlib.MAX_WBITS)
281+
compressed = compressor.compress(data) + compressor.flush()
282+
header = b"\x1f\x8b\x08" + bytes([flags]) + b"\x00\x00\x00\x00\x02\x00"
283+
trailer = struct.pack("<II", zlib.crc32(data), len(data) & 0xffffffff)
284+
return header + optional_header + compressed + trailer
285+
286+
287+
def test_zlib_decompress_gzip_header_crc():
288+
compressed = gzip_bytes_with_header_crc(GZ_DATA)
289+
assert zlib.decompress(compressed, 16 + zlib.MAX_WBITS) == GZ_DATA
290+
291+
bad_header_crc = gzip_bytes_with_header_crc(GZ_DATA, 0)
292+
assert_raises(zlib.error, zlib.decompress, bad_header_crc, 16 + zlib.MAX_WBITS)
293+
294+
295+
def test_zlib_decompress_gzip_truncated_optional_header():
296+
assert_raises(zlib.error, zlib.decompress, b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x02\xffunterminated",
297+
16 + zlib.MAX_WBITS)
298+
299+
300+
def test_zlib_decompress_gzip_empty_fname_split_after_header():
301+
compressed = gzip_bytes_with_optional_header(GZ_DATA, 0x08, b"\x00")
302+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
303+
assert decompressor.decompress(compressed[:10]) == b""
304+
assert decompressor.decompress(compressed[10:]) + decompressor.flush() == GZ_DATA
305+
306+
307+
def test_zlib_decompress_gzip_empty_fcomment_split_after_fname():
308+
compressed = gzip_bytes_with_optional_header(GZ_DATA, 0x18, b"\x00\x00")
309+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
310+
assert decompressor.decompress(compressed[:11]) == b""
311+
assert decompressor.decompress(compressed[11:]) + decompressor.flush() == GZ_DATA
312+
313+
314+
def test_zlib_decompress_gzip_copy_preserves_crc():
315+
import gzip
316+
317+
contents = bytes(range(251)) * 4000
318+
compressed = gzip.compress(contents)
319+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
320+
first = decompressor.decompress(compressed[:400])
321+
assert len(first) > zlib.DEF_BUF_SIZE
322+
323+
copied = decompressor.copy()
324+
assert first + copied.decompress(compressed[400:]) + copied.flush() == contents
325+
326+
327+
def test_zlib_decompress_gzip_copy_after_eof_consumes_trailer():
328+
import gzip
329+
330+
compressed = gzip.compress(b"x")
331+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
332+
assert decompressor.decompress(compressed) == b"x"
333+
assert decompressor.eof
334+
335+
copied = decompressor.copy()
336+
assert copied.eof
337+
assert copied.decompress(b"") == b""
338+
339+
340+
def test_zlib_decompress_copy_preserves_eof_after_max_length():
341+
compressed = zlib.compress(b"abc")
342+
decompressor = zlib.decompressobj()
343+
assert decompressor.decompress(compressed, 1) == b"a"
344+
assert not decompressor.eof
345+
346+
copied = decompressor.copy()
347+
assert not copied.eof
348+
349+
350+
def test_zlib_decompress_copy_preserves_consumed_input_without_output():
351+
compressed = zlib.compress(HAMLET_SCENE)
352+
decompressor = zlib.decompressobj()
353+
first = decompressor.decompress(compressed[:32])
354+
assert first == b""
355+
356+
copied = decompressor.copy()
357+
assert copied.decompress(compressed[32:]) + copied.flush() == HAMLET_SCENE
358+
359+
245360
def test_GR65704():
246361
contents = b"The quick brown fox jumped over the lazy dog"
247362
wbits = 27
@@ -280,3 +395,34 @@ def test_various_chunks():
280395
decompressed += decompressor.decompress(compressed[200:])
281396

282397
assert decompressed == contents
398+
399+
def test_gzip_decompress_max_length_unconsumed_tail():
400+
import gzip
401+
402+
contents = bytes(range(251)) * 100
403+
compressed = gzip.compress(contents)
404+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
405+
406+
decompressed = b''
407+
for i in range(0, len(compressed), 8192):
408+
decompressed += decompressor.decompress(compressed[i:i + 8192], 1)
409+
while decompressor.unconsumed_tail:
410+
decompressed += decompressor.decompress(decompressor.unconsumed_tail, 1)
411+
decompressed += decompressor.flush()
412+
413+
assert decompressed == contents
414+
assert decompressor.eof
415+
assert decompressor.unused_data == b''
416+
417+
418+
def test_gzip_decompress_post_eof_unused_data():
419+
import gzip
420+
421+
compressed = gzip.compress(GZ_DATA)
422+
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
423+
424+
assert decompressor.decompress(compressed + b'first') == GZ_DATA
425+
assert decompressor.eof
426+
assert decompressor.unused_data == b'first'
427+
assert decompressor.decompress(b'second') == b''
428+
assert decompressor.unused_data == b'firstsecond'

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_time.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ test.test_time.TestAsctime4dyear.test_negative @ darwin-arm64,linux-aarch64,linu
33
test.test_time.TestAsctime4dyear.test_year @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github
44
test.test_time.TestLocale.test_bug_3061 @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github
55
test.test_time.TestPytime.test_localtime_timezone @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github
6-
# Can fail in specific timezones in CI
7-
!test.test_time.TestPytime.test_localtime_timezone, at line 757 with AssertionError: 3600 != 0
86
test.test_time.TestPytime.test_short_times @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github
97
test.test_time.TestPytime.test_strptime_timezone @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github
108
test.test_time.TestStrftime4dyear.test_large_year @ darwin-arm64,linux-aarch64,linux-aarch64-github,linux-x86_64,linux-x86_64-github,win32-AMD64,win32-AMD64-github

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/zlib/JavaCompress.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -183,9 +183,9 @@ private byte[] compressFinish() {
183183
@TruffleBoundary
184184
protected static byte[] compressFinish(byte[] bytes, int length, int level, int wbits, Node node) {
185185
CompressStream stream = createStream(level, wbits);
186-
stream.deflater.setInput(bytes, 0, length);
187186
if (stream.stream != null) {
188187
try {
188+
stream.stream.write(bytes, 0, length);
189189
stream.stream.finish();
190190
return stream.out.toByteArray();
191191
} catch (ZipException ze) {

0 commit comments

Comments
 (0)