diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 1bab785684bbab..3475fd606c0d21 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -77,6 +77,113 @@ The :mod:`binascii` module defines the following functions: Added the *newline* parameter. +.. function:: a2b_ascii85(string, /, *, fold_spaces=False, wrap=False, ignore=b"") + + Convert Ascii85 data back to binary and return the binary data. + + Valid Ascii85 data contains characters from the Ascii85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is + accepted as a short form of the group ``!!!!!``, which encodes four + consecutive null bytes. + + If *fold_spaces* is true, the special character ``y`` is also accepted as a + short form of the group ``+``, as in + the Adobe Ascii85 format. + + *ignore* is an optional bytes-like object that specifies characters to + ignore in the input. + + Invalid Ascii85 data will raise :exc:`binascii.Error`. + + .. versionadded:: next + + +.. function:: b2a_ascii85(data, /, *, fold_spaces=False, wrap=False, width=0, pad=False) + + Convert binary data to a formatted sequence of ASCII characters in Ascii85 + coding. The return value is the converted data. + + If *fold_spaces* is true, four consecutive spaces are encoded as the + special character ``y`` instead of the sequence ``+``, as + in the Adobe Ascii85 format. + + If *width* is provided and greater than 0, the output is split into lines + of no more than the specified width separated by the ASCII newline + character. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + .. versionadded:: next + + +.. function:: a2b_base85(string, /, *, strict_mode=False) + + Convert Base85 data back to binary and return the binary data. + More than one line may be passed at a time. + + If *strict_mode* is true, only valid Base85 data will be converted. + Invalid Base85 data will raise :exc:`binascii.Error`. + + Valid Base85 data contains characters from the Base85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. + + .. versionadded:: next + + +.. function:: b2a_base85(data, /, *, pad=False, newline=True) + + Convert binary data to a line of ASCII characters in Base85 coding. + The return value is the converted line. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + If *newline* is true, a newline char is appended to the result. + + .. versionadded:: next + + +.. function:: a2b_z85(string, /, *, strict_mode=False) + + Convert Z85 data back to binary and return the binary data. + More than one line may be passed at a time. + + If *strict_mode* is true, only valid Z85 data will be converted. + Invalid Z85 data will raise :exc:`binascii.Error`. + + Valid Z85 data contains characters from the Z85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. + + See `Z85 specification `_ for more information. + + .. versionadded:: next + + +.. function:: b2a_z85(data, /, *, pad=False, newline=True) + + Convert binary data to a line of ASCII characters in Z85 coding. + The return value is the converted line. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + If *newline* is true, a newline char is appended to the result. + + See `Z85 specification `_ for more information. + + .. versionadded:: next + + .. function:: a2b_qp(data, header=False) Convert a block of quoted-printable data back to binary and return the binary diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index e625bf2fef1912..93b9f8b44dd053 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1757,6 +1757,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold_spaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format_spec)); @@ -1955,6 +1956,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outpath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pad)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parameter)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); @@ -2141,7 +2143,9 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(weeks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(width)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wrap)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 771f0f8cb4ad87..3b2634dffee251 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -480,6 +480,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(flags) STRUCT_FOR_ID(flush) STRUCT_FOR_ID(fold) + STRUCT_FOR_ID(fold_spaces) STRUCT_FOR_ID(follow_symlinks) STRUCT_FOR_ID(format) STRUCT_FOR_ID(format_spec) @@ -678,6 +679,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(outpath) STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) + STRUCT_FOR_ID(pad) STRUCT_FOR_ID(pages) STRUCT_FOR_ID(parameter) STRUCT_FOR_ID(parent) @@ -864,7 +866,9 @@ struct _Py_global_strings { STRUCT_FOR_ID(weeks) STRUCT_FOR_ID(which) STRUCT_FOR_ID(who) + STRUCT_FOR_ID(width) STRUCT_FOR_ID(withdata) + STRUCT_FOR_ID(wrap) STRUCT_FOR_ID(writable) STRUCT_FOR_ID(write) STRUCT_FOR_ID(write_through) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 499a2569b9a06c..7181ee49c19116 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1755,6 +1755,7 @@ extern "C" { INIT_ID(flags), \ INIT_ID(flush), \ INIT_ID(fold), \ + INIT_ID(fold_spaces), \ INIT_ID(follow_symlinks), \ INIT_ID(format), \ INIT_ID(format_spec), \ @@ -1953,6 +1954,7 @@ extern "C" { INIT_ID(outpath), \ INIT_ID(overlapped), \ INIT_ID(owner), \ + INIT_ID(pad), \ INIT_ID(pages), \ INIT_ID(parameter), \ INIT_ID(parent), \ @@ -2139,7 +2141,9 @@ extern "C" { INIT_ID(weeks), \ INIT_ID(which), \ INIT_ID(who), \ + INIT_ID(width), \ INIT_ID(withdata), \ + INIT_ID(wrap), \ INIT_ID(writable), \ INIT_ID(write), \ INIT_ID(write_through), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 1375f46018f943..cbed66e1e1ad0c 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1700,6 +1700,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fold_spaces); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(follow_symlinks); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2492,6 +2496,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pad); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(pages); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3236,10 +3244,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(width); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(withdata); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wrap); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(writable); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index c2fdee8eab9690..a7124f29243852 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -4,7 +4,6 @@ # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere -import struct import binascii @@ -290,36 +289,6 @@ def b16decode(s, casefold=False): # # Ascii85 encoding/decoding # - -_a85chars = None -_a85chars2 = None -_A85START = b"<~" -_A85END = b"~>" - -def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): - # Helper function for a85encode and b85encode - if not isinstance(b, bytes_types): - b = memoryview(b).tobytes() - - padding = (-len(b)) % 4 - if padding: - b = b + b'\0' * padding - words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) - - chunks = [b'z' if foldnuls and not word else - b'y' if foldspaces and word == 0x20202020 else - (chars2[word // 614125] + - chars2[word // 85 % 7225] + - chars[word % 85]) - for word in words] - - if padding and not pad: - if chunks[-1] == b'z': - chunks[-1] = chars[0] * 5 - chunks[-1] = chunks[-1][:-padding] - - return b''.join(chunks) - def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. @@ -337,29 +306,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): adobe controls whether the encoded byte sequence is framed with <~ and ~>, which is used by the Adobe implementation. """ - global _a85chars, _a85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _a85chars2 is None: - _a85chars = [bytes((i,)) for i in range(33, 118)] - _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] - - result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) - - if adobe: - result = _A85START + result - if wrapcol: - wrapcol = max(2 if adobe else 1, wrapcol) - chunks = [result[i: i + wrapcol] - for i in range(0, len(result), wrapcol)] - if adobe: - if len(chunks[-1]) + 2 > wrapcol: - chunks.append(b'') - result = b'\n'.join(chunks) - if adobe: - result += _A85END - - return result + return binascii.b2a_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, width=wrapcol, pad=pad) def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): """Decode the Ascii85 encoded bytes-like object or ASCII string b. @@ -377,67 +325,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): The result is returned as a bytes object. """ - b = _bytes_from_decode_data(b) - if adobe: - if not b.endswith(_A85END): - raise ValueError( - "Ascii85 encoded byte sequences must end " - "with {!r}".format(_A85END) - ) - if b.startswith(_A85START): - b = b[2:-2] # Strip off start/end markers - else: - b = b[:-2] - # - # We have to go through this stepwise, so as to ignore spaces and handle - # special short sequences - # - packI = struct.Struct('!I').pack - decoded = [] - decoded_append = decoded.append - curr = [] - curr_append = curr.append - curr_clear = curr.clear - for x in b + b'u' * 4: - if b'!'[0] <= x <= b'u'[0]: - curr_append(x) - if len(curr) == 5: - acc = 0 - for x in curr: - acc = 85 * acc + (x - 33) - try: - decoded_append(packI(acc)) - except struct.error: - raise ValueError('Ascii85 overflow') from None - curr_clear() - elif x == b'z'[0]: - if curr: - raise ValueError('z inside Ascii85 5-tuple') - decoded_append(b'\0\0\0\0') - elif foldspaces and x == b'y'[0]: - if curr: - raise ValueError('y inside Ascii85 5-tuple') - decoded_append(b'\x20\x20\x20\x20') - elif x in ignorechars: - # Skip whitespace - continue - else: - raise ValueError('Non-Ascii85 digit found: %c' % x) - - result = b''.join(decoded) - padding = 4 - len(curr) - if padding: - # Throw away the extra padding - result = result[:-padding] - return result - -# The following code is originally taken (with permission) from Mercurial - -_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") -_b85chars = None -_b85chars2 = None -_b85dec = None + return binascii.a2b_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, ignore=ignorechars) def b85encode(b, pad=False): """Encode bytes-like object b in base85 format and return a bytes object. @@ -445,84 +334,25 @@ def b85encode(b, pad=False): If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - global _b85chars, _b85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85chars2 is None: - _b85chars = [bytes((i,)) for i in _b85alphabet] - _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] - return _85encode(b, _b85chars, _b85chars2, pad) + return binascii.b2a_base85(b, pad=pad, newline=False) def b85decode(b): """Decode the base85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ - global _b85dec - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85dec is None: - # we don't assign to _b85dec directly to avoid issues when - # multiple threads call this function simultaneously - b85dec_tmp = [None] * 256 - for i, c in enumerate(_b85alphabet): - b85dec_tmp[c] = i - _b85dec = b85dec_tmp - - b = _bytes_from_decode_data(b) - padding = (-len(b)) % 5 - b = b + b'~' * padding - out = [] - packI = struct.Struct('!I').pack - for i in range(0, len(b), 5): - chunk = b[i:i + 5] - acc = 0 - try: - for c in chunk: - acc = acc * 85 + _b85dec[c] - except TypeError: - for j, c in enumerate(chunk): - if _b85dec[c] is None: - raise ValueError('bad base85 character at position %d' - % (i + j)) from None - raise - try: - out.append(packI(acc)) - except struct.error: - raise ValueError('base85 overflow in hunk starting at byte %d' - % i) from None - - result = b''.join(out) - if padding: - result = result[:-padding] - return result - -_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' - b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') -# Translating b85 valid but z85 invalid chars to b'\x00' is required -# to prevent them from being decoded as b85 valid chars. -_z85_b85_decode_diff = b';_`|~' -_z85_decode_translation = bytes.maketrans( - _z85alphabet + _z85_b85_decode_diff, - _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) -) -_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + return binascii.a2b_base85(b, strict_mode=True) def z85encode(s, pad=False): """Encode bytes-like object b in z85 format and return a bytes object.""" - return b85encode(s, pad).translate(_z85_encode_translation) + return binascii.b2a_z85(s, pad=pad, newline=False) def z85decode(s): """Decode the z85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ - s = _bytes_from_decode_data(s) - s = s.translate(_z85_decode_translation) - try: - return b85decode(s) - except ValueError as e: - raise ValueError(e.args[0].replace('base85', 'z85')) from None + return binascii.a2b_z85(s, strict_mode=True) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 288caf663e8321..281ad0b41d19dc 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -826,6 +826,12 @@ def test_a85_padding(self): eq(base64.a85decode(b'G^+IX'), b"xxxx") eq(base64.a85decode(b'G^+IXGQ7^D'), b"xxxxx\x00\x00\x00") + eq(base64.a85encode(b"\x00", pad=True), b'z') + eq(base64.a85encode(b"\x00"*2, pad=True), b'z') + eq(base64.a85encode(b"\x00"*3, pad=True), b'z') + eq(base64.a85encode(b"\x00"*4, pad=True), b'z') + eq(base64.a85encode(b"\x00"*5, pad=True), b'zz') + def test_b85_padding(self): eq = self.assertEqual diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 7ed7d7c47b6de1..a47b6f436578d8 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -9,9 +9,11 @@ # Note: "*_hex" functions are aliases for "(un)hexlify" -b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_qp', 'b2a_uu', +b2a_functions = ['b2a_ascii85', 'b2a_base64', 'b2a_base85', 'b2a_z85', + 'b2a_hex', 'b2a_qp', 'b2a_uu', 'hexlify'] -a2b_functions = ['a2b_base64', 'a2b_hex', 'a2b_qp', 'a2b_uu', +a2b_functions = ['a2b_ascii85', 'a2b_base64', 'a2b_base85', 'a2b_z85', + 'a2b_hex', 'a2b_qp', 'a2b_uu', 'unhexlify'] all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx'] @@ -207,6 +209,432 @@ def assertInvalidLength(data): assertInvalidLength(b'a' * (4 * 87 + 1)) assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters + def test_ascii85_valid(self): + # Test Ascii85 with valid data + ASCII85_PREFIX = b"<~" + ASCII85_SUFFIX = b"~>" + + # Interleave blocks of 4 null bytes and 4 spaces into test data + rawdata = bytearray() + rawlines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = b"\0\0\0\0" if k & 1 else b" " + b = b + self.rawdata[i:i + k] + b = b" " if k & 1 else b"\0\0\0\0" + rawdata += b + rawlines.append(b) + i += k + if i >= len(self.rawdata): + break + + # Test core parameter combinations + params = (False, False), (False, True), (True, False), (True, True) + for fold_spaces, wrap in params: + lines = [] + for rawline in rawlines: + b = self.type2test(rawline) + a = binascii.b2a_ascii85(b, fold_spaces=fold_spaces, wrap=wrap) + lines.append(a) + res = bytearray() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_ascii85(a, fold_spaces=fold_spaces, wrap=wrap) + res += b + self.assertEqual(res, rawdata) + + # Test decoding inputs with length 1 mod 5 + params = [ + (b"a", False, False, b"", b""), + (b"xbw", False, False, b"wx", b""), + (b"<~c~>", False, True, b"", b""), + (b"{d ~>", False, True, b" {", b""), + (b"ye", True, False, b"", b" "), + (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), + (b"<~FCfN8yg~>", True, True, b"", b"test "), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + ] + for a, fold_spaces, wrap, ignore, b in params: + kwargs = {"fold_spaces": fold_spaces, "wrap": wrap, "ignore": ignore} + self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + + def test_ascii85_invalid(self): + # Test Ascii85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_ascii85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"!\"#$%&'()*+,-./0123456789:;<=>?@" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" + b"z" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_ascii85(a, ignore=fillers) + res += b + self.assertEqual(res, self.rawdata) + + # Test Ascii85 with only invalid characters + fillers = self.type2test(fillers) + b = binascii.a2b_ascii85(fillers, ignore=fillers) + self.assertEqual(b, b"") + + def test_ascii85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_ascii85(self.type2test(data), **kwargs) + + def assertMissingDelimiter(data): + _assertRegexTemplate(r"(?i)end with b'~>'", data, wrap=True) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)Ascii85 overflow", data) + + def assertInvalidSpecial(data): + _assertRegexTemplate(r"(?i)'[yz]'.+5-tuple", data, fold_spaces=True) + + def assertInvalidChar(data, **kwargs): + _assertRegexTemplate(r"(?i)Non-Ascii85 digit", data, **kwargs) + + # Test Ascii85 with missing delimiters + assertMissingDelimiter(b"") + assertMissingDelimiter(b"a") + assertMissingDelimiter(b"<~") + assertMissingDelimiter(b"<~!~") + assertMissingDelimiter(b"<~abc>") + assertMissingDelimiter(b"<~has delimiter but not terminal~> !") + + # Test Ascii85 with out-of-range encoded value + assertOverflow(b"t") + assertOverflow(b"s9") + assertOverflow(b"s8X") + assertOverflow(b"s8W.") + assertOverflow(b's8W-"') + assertOverflow(b"s8W-!u") + assertOverflow(b"s8W-!s8W-!zs8X") + + # Test Ascii85 with misplaced short form groups + assertInvalidSpecial(b"ay") + assertInvalidSpecial(b"az") + assertInvalidSpecial(b"aby") + assertInvalidSpecial(b"ayz") + assertInvalidSpecial(b"abcz") + assertInvalidSpecial(b"abcdy") + assertInvalidSpecial(b"y!and!z!then!!y") + + # Test Ascii85 with non-ignored invalid characters + assertInvalidChar(b"j\n") + assertInvalidChar(b" ", ignore=b"") + assertInvalidChar(b" valid\x02until\x03", ignore=b"\x00\x01\x02\x04") + assertInvalidChar(b"\tFCb", ignore=b"\n") + assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignore=b" \n\tv") + + def test_ascii85_width(self): + # Test Ascii85 splitting lines by width + def assertEncode(a_expected, data, n, wrap=False): + b = self.type2test(data) + a = binascii.b2a_ascii85(b, wrap=wrap, width=n) + self.assertEqual(a, a_expected) + + def assertDecode(data, b_expected, wrap=False): + a = self.type2test(data) + b = binascii.a2b_ascii85(a, wrap=wrap, ignore=b"\n") + self.assertEqual(b, b_expected) + + tests = [ + (b"", 0, b"", b"<~~>"), + (b"", 1, b"", b"<~\n~>"), + (b"a", 0, b"@/", b"<~@/~>"), + (b"a", 1, b"@\n/", b"<~\n@/\n~>"), + (b"a", 2, b"@/", b"<~\n@/\n~>"), + (b"a", 3, b"@/", b"<~@\n/~>"), + (b"a", 4, b"@/", b"<~@/\n~>"), + (b"a", 5, b"@/", b"<~@/\n~>"), + (b"a", 6, b"@/", b"<~@/~>"), + (b"a", 7, b"@/", b"<~@/~>"), + (b"a", 123, b"@/", b"<~@/~>"), + (b"this is a test", 7, b"FD,B0+D\nGm>@3BZ\n'F*%", + b"<~FD,B0\n+DGm>@3\nBZ'F*%\n~>"), + (b"a test!!!!!!! ", 11, b"@3BZ'F*&QK+\nX&!P+WqmM+9", + b"<~@3BZ'F*&Q\nK+X&!P+WqmM\n+9~>"), + (b"\0" * 56, 7, b"zzzzzzz\nzzzzzzz", b"<~zzzzz\nzzzzzzz\nzz~>"), + ] + for b, n, a, a_wrap in tests: + assertEncode(a, b, n) + assertEncode(a_wrap, b, n, wrap=True) + assertDecode(a, b) + assertDecode(a_wrap, b, wrap=True) + + def test_ascii85_pad(self): + # Test Ascii85 with encode padding + rawdata = b"n1n3tee\n ch@rAcTer$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_ascii85(self.type2test(b), pad=True) + b_pad = binascii.a2b_ascii85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + # Test Ascii85 short form groups with encode padding + def assertShortPad(data, expected, **kwargs): + data = self.type2test(data) + res = binascii.b2a_ascii85(data, **kwargs) + self.assertEqual(res, expected) + + assertShortPad(b"\0", b"!!", pad=False) + assertShortPad(b"\0", b"z", pad=True) + assertShortPad(b"\0" * 2, b"z", pad=True) + assertShortPad(b"\0" * 3, b"z", pad=True) + assertShortPad(b"\0" * 4, b"z", pad=True) + assertShortPad(b"\0" * 5, b"zz", pad=True) + assertShortPad(b"\0" * 6, b"z!!!") + assertShortPad(b" " * 7, b"y+", + fold_spaces=True, wrap=True, pad=True) + assertShortPad(b"\0\0\0\0abcd \0\0", b"<~z@:E_Wy\nz~>", + fold_spaces=True, wrap=True, width=9, pad=True) + + def test_ascii85_ignore(self): + # Test Ascii85 with ignored characters + def assertIgnore(data, expected, ignore=b"", **kwargs): + data = self.type2test(data) + ignore = self.type2test(ignore) + with self.assertRaisesRegex(binascii.Error, r"(?i)Non-Ascii85 digit"): + binascii.a2b_ascii85(data, **kwargs) + res = binascii.a2b_ascii85(data, ignore=ignore, **kwargs) + self.assertEqual(res, expected) + + assertIgnore(b"\n", b"", ignore=b"\n") + assertIgnore(b"<~ ~>", b"", ignore=b" ", wrap=True) + assertIgnore(b"z|z", b"\0" * 8, ignore=b"|||") # repeats don't matter + assertIgnore(b"zz!!|", b"\0" * 9, ignore=b"|!z") # ignore only if invalid + assertIgnore(b"<~B P~@~>", b"hi", ignore=b" <~>", wrap=True) + assertIgnore(b"zy}", b"\0\0\0\0", ignore=b"zy}") + assertIgnore(b"zy}", b"\0\0\0\0 ", ignore=b"zy}", fold_spaces=True) + + def test_base85_valid(self): + # Test base85 with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + # Test decoding inputs with length 1 mod 5 + self.assertEqual(binascii.a2b_base85(self.type2test(b"a")), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b" b ")), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b"b/Y\"*,j'Nc")), b"test") + + def test_base85_invalid(self): + # Test base85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base85(self.type2test(data), **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)base85 overflow", data) + + # Test base85 with out-of-range encoded value + assertOverflow(b"}") + assertOverflow(b"|O") + assertOverflow(b"|Nt") + assertOverflow(b"|NsD") + assertOverflow(b"|NsC1") + assertOverflow(b"|NsC0~") + assertOverflow(b"|NsC0|NsC0|NsD0") + + def test_base85_pad(self): + # Test base85 with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_base85(self.type2test(b), pad=True) + b_pad = binascii.a2b_base85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_base85_strict_mode(self): + # Test base85 with strict mode on + def assertNonBase85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)bad base85 character"): + binascii.a2b_base85(data, strict_mode=True) + default_res = binascii.a2b_base85(data) + non_strict_res = binascii.a2b_base85(data, strict_mode=False) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonBase85Data(b"\xda", b"") + assertNonBase85Data(b"00\0\0", b"\0") + assertNonBase85Data(b"Z )*", b"ok") + assertNonBase85Data(b"bY*jNb0Hyq\n", b"tests!!~") + + def test_base85_newline(self): + # Test base85 newline parameter + b = self.type2test(b"t3s\t ") + self.assertEqual(binascii.b2a_base85(b), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=True), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=False), b"bTe}aAO") + + def test_z85_valid(self): + # Test Z85 with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_z85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_z85(a) + res += b + self.assertEqual(res, self.rawdata) + + # Test decoding inputs with length 1 mod 5 + self.assertEqual(binascii.a2b_z85(self.type2test(b"a")), b"") + self.assertEqual(binascii.a2b_z85(self.type2test(b" b ")), b"") + self.assertEqual(binascii.a2b_z85(self.type2test(b"B y,/;J_n\\c")), b"test") + + def test_z85_invalid(self): + # Test Z85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_z85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789abcdefghijklmnopqrstuvwxyz" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_z85(a) + res += b + self.assertEqual(res, self.rawdata) + + def test_z85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_z85(self.type2test(data), **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)z85 overflow", data) + + # Test Z85 with out-of-range encoded value + assertOverflow(b"%") + assertOverflow(b"%n") + assertOverflow(b"%nS") + assertOverflow(b"%nSc") + assertOverflow(b"%nSc1") + assertOverflow(b"%nSc0$") + assertOverflow(b"%nSc0%nSc0%nSD0") + + def test_z85_pad(self): + # Test Z85 with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_z85(self.type2test(b), pad=True) + b_pad = binascii.a2b_z85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_z85_strict_mode(self): + # Test Z85 with strict mode on + def assertNonZ85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)bad z85 character"): + binascii.a2b_z85(data, strict_mode=True) + default_res = binascii.a2b_z85(data) + non_strict_res = binascii.a2b_z85(data, strict_mode=False) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonZ85Data(b"\xda", b"") + assertNonZ85Data(b"00\0\0", b"\0") + assertNonZ85Data(b"z !/", b"ok") + assertNonZ85Data(b"By/JnB0hYQ\n", b"tests!!~") + + def test_z85_newline(self): + # Test Z85 newline parameter + b = self.type2test(b"t3s\t ") + self.assertEqual(binascii.b2a_z85(b), b"BtE$Aao\n") + self.assertEqual(binascii.b2a_z85(b, newline=True), b"BtE$Aao\n") + self.assertEqual(binascii.b2a_z85(b, newline=False), b"BtE$Aao") + def test_uu(self): MAX_UU = 45 for backtick in (True, False): diff --git a/Misc/ACKS b/Misc/ACKS index bb6b6bde822a4e..9a53355c001894 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1714,6 +1714,7 @@ Jendrik Seipp Michael Selik Yury Selivanov Fred Sells +James Seo Jiwon Seo IƱigo Serna Joakim Sernbrant diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-28-15-55-53.gh-issue-101178.26jYPs.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-28-15-55-53.gh-issue-101178.26jYPs.rst new file mode 100644 index 00000000000000..1859538896932d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-28-15-55-53.gh-issue-101178.26jYPs.rst @@ -0,0 +1,2 @@ +Add Ascii85, Base85, and Z85 support to :mod:`binascii` and improve the +performance of the base-85 converters in :mod:`base64`. diff --git a/Modules/binascii.c b/Modules/binascii.c index 13e4bc5be03ebd..5324569cef7f0f 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -105,6 +105,84 @@ static const unsigned char table_b2a_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const unsigned char table_a2b_base85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,62,-1,63, 64,65,66,-1, 67,68,69,70, -1,71,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,72, 73,74,75,76, + 77,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,-1, -1,-1,78,79, + 80,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,81, 82,83,84,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_a85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, + 15,16,17,18, 19,20,21,22, 23,24,25,26, 27,28,29,30, + 31,32,33,34, 35,36,37,38, 39,40,41,42, 43,44,45,46, + 47,48,49,50, 51,52,53,54, 55,56,57,58, 59,60,61,62, + 63,64,65,66, 67,68,69,70, 71,72,73,74, 75,76,77,78, + 79,80,81,82, 83,84,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_z85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,68,-1,84, 83,82,72,-1, 75,76,70,65, -1,63,62,69, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,64,-1, 73,66,74,71, + 81,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,77, -1,78,67,-1, + -1,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,79, -1,80,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_b2a_base85[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; + +static const unsigned char table_b2a_base85_a85[] = + "!\"#$%&\'()*+,-./0123456789:;<=>?@" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"; + +static const unsigned char table_b2a_base85_z85[] = + "0123456789abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/\x2a?&<>()[]{}@%$#"; /* clinic doesn't like '/' followed by '*' */ + +#define BASE85_A85_PREFIX '<' +#define BASE85_A85_AFFIX '~' +#define BASE85_A85_SUFFIX '>' +#define BASE85_A85_Z 0x00000000 +#define BASE85_A85_Y 0x20202020 + static const unsigned short crctab_hqx[256] = { 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, @@ -189,6 +267,28 @@ ascii_buffer_converter(PyObject *arg, Py_buffer *buf) return Py_CLEANUP_SUPPORTED; } +static Py_ssize_t +wraplines(unsigned char *data, Py_ssize_t size, size_t width) +{ + if ((size_t)size <= width) { + return size; + } + unsigned char *src = data + size; + Py_ssize_t newlines = (size - 1) / width; + Py_ssize_t line_len = size - newlines * width; + size += newlines; + unsigned char *dst = data + size; + + while ((src -= line_len) != data) { + dst -= line_len; + memmove(dst, src, line_len); + *--dst = '\n'; + line_len = width; + } + assert(dst == data + width); + return size; +} + #include "clinic/binascii.c.h" /*[clinic input] @@ -592,6 +692,511 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) return PyBytesWriter_FinishWithPointer(writer, ascii_data); } +/*[clinic input] +binascii.a2b_ascii85 + + data: ascii_buffer + / + * + fold_spaces: bool = False + Allow 'y' as a short form encoding four spaces. + wrap: bool = False + Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. + ignore: Py_buffer(c_default="NULL", py_default="b''") = None + An optional bytes-like object with input characters to be ignored. + +Decode Ascii85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore) +/*[clinic end generated code: output=6ab30f2a26d301a1 input=11c60c016d4f334b]*/ +{ + const unsigned char *ascii_data = data->buf; + Py_ssize_t ascii_len = data->len; + binascii_state *state = NULL; + + assert(ascii_len >= 0); + + /* Consume Ascii85 prefix and suffix if present. */ + if (wrap) { + if (ascii_len < 2 + || ascii_data[ascii_len - 2] != BASE85_A85_AFFIX + || ascii_data[ascii_len - 1] != BASE85_A85_SUFFIX) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Ascii85 encoded byte sequences must end with b'~>'"); + } + return NULL; + } + ascii_len -= 2; + if (ascii_len >= 2 + && ascii_data[0] == BASE85_A85_PREFIX + && ascii_data[1] == BASE85_A85_AFFIX) { + ascii_data += 2; + ascii_len -= 2; + } + } + + /* Allocate output buffer. */ + Py_ssize_t bin_len = ascii_len; + unsigned char this_ch = 0; + for (Py_ssize_t i = 0; i < ascii_len; i++) { + this_ch = ascii_data[i]; + if (this_ch == 'y' || this_ch == 'z') { + bin_len += 4; + } + } + bin_len = 4 * ((bin_len + 4) / 5); + + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { + return NULL; + } + unsigned char *bin_data = PyBytesWriter_GetData(writer); + if (bin_data == NULL) { + return NULL; + } + + /* Build ignore map. */ + unsigned char ignore_map[256] = {0}; + if (ignore->obj != NULL) { + const unsigned char *ignore_data = ignore->buf; + Py_ssize_t ignore_len = ignore->len; + for (Py_ssize_t i = 0; i < ignore_len; i++) { + this_ch = ignore_data[i]; + ignore_map[this_ch] = -1; + } + } + + uint32_t leftchar = 0; + int group_pos = 0; + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + unsigned char this_digit; + if (ascii_len > 0) { + this_ch = *ascii_data; + this_digit = table_a2b_base85_a85[this_ch]; + } + else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 + || (leftchar *= 85) > UINT32_MAX - this_digit) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, "Ascii85 overflow"); + } + goto error; + } + leftchar += this_digit; + group_pos++; + } + else if ((this_ch == 'y' && fold_spaces) || this_ch == 'z') { + if (group_pos != 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "'%c' inside Ascii85 5-tuple", this_ch); + } + goto error; + } + leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + group_pos = 5; + } + else if (!ignore_map[this_ch]) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "Non-Ascii85 digit found: %c", this_ch); + } + goto error; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Write current chunk. */ + Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return PyBytesWriter_FinishWithPointer(writer, bin_data); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + +/*[clinic input] +binascii.b2a_ascii85 + + data: Py_buffer + / + * + fold_spaces: bool = False + Emit 'y' as a short form encoding four spaces. + wrap: bool = False + Wrap result in '<~' and '~>' as in Adobe Ascii85. + width: unsigned_int(bitwise=True) = 0 + Split result into lines of provided width. + pad: bool = False + Pad input to a multiple of 4 before encoding. + +Ascii85-encode data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad) +/*[clinic end generated code: output=78426392ad3fc75b input=d5122dbab4dbb9f2]*/ +{ + const unsigned char *bin_data = data->buf; + Py_ssize_t bin_len = data->len; + + assert(bin_len >= 0); + + if (wrap && width == 1) { + width = 2; + } + + /* Allocate output buffer. + XXX: Do a pre-pass above some threshold estimate (cf. 'yz')? + */ + Py_ssize_t out_len = 5 * ((bin_len + 3) / 4); + if (wrap) { + out_len += 4; + } + if (!pad && (bin_len % 4)) { + out_len -= 4 - (bin_len % 4); + } + if (width && out_len) { + out_len += (out_len - 1) / width; + } + + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { + return NULL; + } + unsigned char *ascii_data = PyBytesWriter_GetData(writer); + + if (wrap) { + *ascii_data++ = BASE85_A85_PREFIX; + *ascii_data++ = BASE85_A85_AFFIX; + } + + /* Encode all full-length chunks. */ + for (; bin_len >= 4; bin_len -= 4, bin_data += 4) { + uint32_t leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) | + (bin_data[2] << 8) | bin_data[3]; + if (leftchar == BASE85_A85_Z) { + *ascii_data++ = 'z'; + } + else if (fold_spaces && leftchar == BASE85_A85_Y) { + *ascii_data++ = 'y'; + } + else { + ascii_data[4] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + ascii_data[3] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + ascii_data[2] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + ascii_data[1] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + ascii_data[0] = table_b2a_base85_a85[leftchar]; + + ascii_data += 5; + } + } + + /* Encode partial-length final chunk. */ + if (bin_len > 0) { + uint32_t leftchar = 0; + for (Py_ssize_t i = 0; i < 4; i++) { + leftchar <<= 8; /* Pad with zero when encoding. */ + if (i < bin_len) { + leftchar |= *bin_data++; + } + } + if (pad && leftchar == BASE85_A85_Z) { + *ascii_data++ = 'z'; + } + else { + Py_ssize_t group_len = pad ? 5 : bin_len + 1; + for (Py_ssize_t i = 4; i >= 0; i--) { + if (i < group_len) { + ascii_data[i] = table_b2a_base85_a85[leftchar % 85]; + } + leftchar /= 85; + } + ascii_data += group_len; + } + } + + if (wrap) { + *ascii_data++ = BASE85_A85_AFFIX; + *ascii_data++ = BASE85_A85_SUFFIX; + } + + if (width && out_len) { + unsigned char *start = PyBytesWriter_GetData(writer); + ascii_data = start + wraplines(start, ascii_data - start, width); + if (wrap && ascii_data[-2] == '\n') { + assert(ascii_data[-1] == BASE85_A85_SUFFIX); + assert(ascii_data[-3] == BASE85_A85_AFFIX); + ascii_data[-3] = '\n'; + ascii_data[-2] = BASE85_A85_AFFIX; + } + } + + return PyBytesWriter_FinishWithPointer(writer, ascii_data); +} + +static PyObject * +internal_a2b_base85(PyObject *module, Py_buffer *data, int strict_mode, + const unsigned char table_a2b[], const char *name) +{ + const unsigned char *ascii_data = data->buf; + Py_ssize_t ascii_len = data->len; + binascii_state *state = NULL; + + assert(ascii_len >= 0); + + /* Allocate output buffer. */ + Py_ssize_t bin_len = 4 * ((ascii_len + 4) / 5); + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { + return NULL; + } + unsigned char *bin_data = PyBytesWriter_GetData(writer); + + uint32_t leftchar = 0; + int group_pos = 0; + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + unsigned char this_digit; + if (ascii_len > 0) { + unsigned char this_ch = *ascii_data; + this_digit = table_a2b[this_ch]; + } + else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 + || (leftchar *= 85) > UINT32_MAX - this_digit) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "%s overflow in hunk starting at byte %d", + name, (data->len - ascii_len) / 5 * 5); + } + goto error; + } + leftchar += this_digit; + group_pos++; + } + else if (strict_mode) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, "bad %s character at position %d", + name, data->len - ascii_len); + } + goto error; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Write current chunk. */ + Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return PyBytesWriter_FinishWithPointer(writer, bin_data); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + +static PyObject * +internal_b2a_base85(PyObject *module, Py_buffer *data, int pad, int newline, + const unsigned char table_b2a[]) +{ + const unsigned char *bin_data = data->buf; + Py_ssize_t bin_len = data->len; + + assert(bin_len >= 0); + + /* Allocate output buffer. */ + Py_ssize_t out_len = 5 * ((bin_len + 3) / 4); + if (!pad && (bin_len % 4)) { + out_len -= 4 - (bin_len % 4); + } + if (newline) { + out_len++; + } + + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { + return NULL; + } + unsigned char *ascii_data = PyBytesWriter_GetData(writer); + + /* Encode all full-length chunks. */ + for (; bin_len >= 4; bin_len -= 4, bin_data += 4) { + uint32_t leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) | + (bin_data[2] << 8) | bin_data[3]; + + ascii_data[4] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[3] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[2] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[1] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[0] = table_b2a[leftchar]; + + ascii_data += 5; + } + + /* Encode partial-length final chunk. */ + if (bin_len > 0) { + uint32_t leftchar = 0; + for (Py_ssize_t i = 0; i < 4; i++) { + leftchar <<= 8; /* Pad with zero when encoding. */ + if (i < bin_len) { + leftchar |= *bin_data++; + } + } + Py_ssize_t group_len = pad ? 5 : bin_len + 1; + for (Py_ssize_t i = 4; i >= 0; i--) { + if (i < group_len) { + ascii_data[i] = table_b2a[leftchar % 85]; + } + leftchar /= 85; + } + ascii_data += group_len; + } + + if (newline) { + *ascii_data++ = '\n'; + } + + return PyBytesWriter_FinishWithPointer(writer, ascii_data); +} + +/*[clinic input] +binascii.a2b_base85 + + data: ascii_buffer + / + * + strict_mode: bool = False + When set to True, bytes that are not in the Base85 alphabet + are not allowed. + +Decode a line of Base85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode) +/*[clinic end generated code: output=337b9418636f30f4 input=d19293f194c8cb78]*/ +{ + return internal_a2b_base85(module, data, strict_mode, + table_a2b_base85, "Base85"); +} + +/*[clinic input] +binascii.b2a_base85 + + data: Py_buffer + / + * + pad: bool = False + Pad input to a multiple of 4 before encoding. + newline: bool = True + Append a newline to the result. + +Base85-code line of data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline) +/*[clinic end generated code: output=56936eb231e15dc0 input=3899d4f5c3a589a0]*/ +{ + return internal_b2a_base85(module, data, pad, newline, table_b2a_base85); +} + +/*[clinic input] +binascii.a2b_z85 + + data: ascii_buffer + / + * + strict_mode: bool = False + When set to True, bytes that are not in the Z85 alphabet + are not allowed. + +Decode a line of Z85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_z85_impl(PyObject *module, Py_buffer *data, int strict_mode) +/*[clinic end generated code: output=a2083e8f05d38960 input=a0d5afbf2aebee4d]*/ +{ + return internal_a2b_base85(module, data, strict_mode, + table_a2b_base85_z85, "Z85"); +} + +/*[clinic input] +binascii.b2a_z85 + + data: Py_buffer + / + * + pad: bool = False + Pad input to a multiple of 4 before encoding. + newline: bool = True + Append a newline to the result. + +Z85-code line of data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_z85_impl(PyObject *module, Py_buffer *data, int pad, + int newline) +/*[clinic end generated code: output=a61636b3f618fc1d input=f71c473209eb8f41]*/ +{ + return internal_b2a_base85(module, data, pad, newline, + table_b2a_base85_z85); +} /*[clinic input] binascii.crc_hqx @@ -1250,6 +1855,12 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF + BINASCII_B2A_ASCII85_METHODDEF + BINASCII_A2B_ASCII85_METHODDEF + BINASCII_A2B_BASE85_METHODDEF + BINASCII_B2A_BASE85_METHODDEF + BINASCII_A2B_Z85_METHODDEF + BINASCII_B2A_Z85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index ce29e0d11a45cd..aeeb97e6516e0c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -267,6 +267,578 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P return return_value; } +PyDoc_STRVAR(binascii_a2b_ascii85__doc__, +"a2b_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" ignore=b\'\')\n" +"--\n" +"\n" +"Decode Ascii85 data.\n" +"\n" +" fold_spaces\n" +" Allow \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" ignore\n" +" An optional bytes-like object with input characters to be ignored."); + +#define BINASCII_A2B_ASCII85_METHODDEF \ + {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore); + +static PyObject * +binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(ignore), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "ignore", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + Py_buffer ignore = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &ignore, PyBUF_SIMPLE) != 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_ascii85_impl(module, &data, fold_spaces, wrap, &ignore); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + /* Cleanup for ignore */ + if (ignore.obj) { + PyBuffer_Release(&ignore); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_ascii85__doc__, +"b2a_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" width=0, pad=False)\n" +"--\n" +"\n" +"Ascii85-encode data.\n" +"\n" +" fold_spaces\n" +" Emit \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Wrap result in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" width\n" +" Split result into lines of provided width.\n" +" pad\n" +" Pad input to a multiple of 4 before encoding."); + +#define BINASCII_B2A_ASCII85_METHODDEF \ + {"b2a_ascii85", _PyCFunction_CAST(binascii_b2a_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_ascii85__doc__}, + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad); + +static PyObject * +binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(width), &_Py_ID(pad), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "width", "pad", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + unsigned int width = 0; + int pad = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[3]) { + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[3], &width, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + pad = PyObject_IsTrue(args[4]); + if (pad < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_ascii85_impl(module, &data, fold_spaces, wrap, width, pad); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_a2b_base85__doc__, +"a2b_base85($module, data, /, *, strict_mode=False)\n" +"--\n" +"\n" +"Decode a line of Base85 data.\n" +"\n" +" strict_mode\n" +" When set to True, bytes that are not in the Base85 alphabet\n" +" are not allowed."); + +#define BINASCII_A2B_BASE85_METHODDEF \ + {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode); + +static PyObject * +binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(strict_mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "strict_mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int strict_mode = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + strict_mode = PyObject_IsTrue(args[1]); + if (strict_mode < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_base85_impl(module, &data, strict_mode); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_base85__doc__, +"b2a_base85($module, data, /, *, pad=False, newline=True)\n" +"--\n" +"\n" +"Base85-code line of data.\n" +"\n" +" pad\n" +" Pad input to a multiple of 4 before encoding.\n" +" newline\n" +" Append a newline to the result."); + +#define BINASCII_B2A_BASE85_METHODDEF \ + {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__}, + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline); + +static PyObject * +binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pad), &_Py_ID(newline), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "pad", "newline", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int pad = 0; + int newline = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + pad = PyObject_IsTrue(args[1]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + newline = PyObject_IsTrue(args[2]); + if (newline < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_base85_impl(module, &data, pad, newline); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_a2b_z85__doc__, +"a2b_z85($module, data, /, *, strict_mode=False)\n" +"--\n" +"\n" +"Decode a line of Z85 data.\n" +"\n" +" strict_mode\n" +" When set to True, bytes that are not in the Z85 alphabet\n" +" are not allowed."); + +#define BINASCII_A2B_Z85_METHODDEF \ + {"a2b_z85", _PyCFunction_CAST(binascii_a2b_z85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_z85__doc__}, + +static PyObject * +binascii_a2b_z85_impl(PyObject *module, Py_buffer *data, int strict_mode); + +static PyObject * +binascii_a2b_z85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(strict_mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "strict_mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_z85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int strict_mode = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + strict_mode = PyObject_IsTrue(args[1]); + if (strict_mode < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_z85_impl(module, &data, strict_mode); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_z85__doc__, +"b2a_z85($module, data, /, *, pad=False, newline=True)\n" +"--\n" +"\n" +"Z85-code line of data.\n" +"\n" +" pad\n" +" Pad input to a multiple of 4 before encoding.\n" +" newline\n" +" Append a newline to the result."); + +#define BINASCII_B2A_Z85_METHODDEF \ + {"b2a_z85", _PyCFunction_CAST(binascii_b2a_z85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_z85__doc__}, + +static PyObject * +binascii_b2a_z85_impl(PyObject *module, Py_buffer *data, int pad, + int newline); + +static PyObject * +binascii_b2a_z85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pad), &_Py_ID(newline), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "pad", "newline", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_z85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int pad = 0; + int newline = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + pad = PyObject_IsTrue(args[1]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + newline = PyObject_IsTrue(args[2]); + if (newline < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_z85_impl(module, &data, pad, newline); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + PyDoc_STRVAR(binascii_crc_hqx__doc__, "crc_hqx($module, data, crc, /)\n" "--\n" @@ -812,4 +1384,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=fba6a71e0d7d092f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d5a4d7558811b890 input=a9049054013a1b77]*/