aztec_code_generator/test_aztec_code_generator.py at master · dlenski/aztec_code_generator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env python3
#-*- coding: utf-8 -*-

import unittest
from aztec_code_generator import (
    reed_solomon, find_optimal_sequence, optimal_sequence_to_bits, get_data_codewords, encoding_to_eci,
    configs,
    Mode, Latch, Shift, Misc,
    AztecCode,
)

import codecs
from tempfile import NamedTemporaryFile

try:
    import cairosvg
    from io import BytesIO
except ImportError:
    cairosvg = None

try:
    import pyrxing
except ImportError:
    pyrxing = None

def b(*l):
    return [(ord(c) if len(c)==1 else c.encode()) if isinstance(c, str) else c for c in l]

class Test(unittest.TestCase):
    """
    Test aztec_code_generator module
    """

    def test_config_size_correctness(self):
        """ Verify the correctness of the layers and codewords in the symbol size configs """
        for (size, compact), config in configs.items():
            # Start with total matrix size
            bits_avail = size * size

            # Subtract bits occupied by the core (bullseye + orientation marks)
            core_size = 11 if compact else 15
            bits_avail -= core_size * core_size

            if not compact:
                # Number of horizontal and vertical lines in reference grid
                n_ref_lines = 1 + 2*(size // 32)

                # Subtract bits occupied by the horizontal lines,
                # without double-counting the core bits
                bits_avail -= n_ref_lines*size - core_size

                # Subtract bits occupied by the vertical lines,
                # without double-counting either the core bits or
                # those already counted in the horizontal lines
                bits_avail -= n_ref_lines*size - core_size - (n_ref_lines * n_ref_lines - 1)

            # Verify correctness of layers and bits available
            expected_bits_avail = ((88 if compact else 112) + 16 * config.layers) * config.layers
            self.assertEqual(expected_bits_avail, bits_avail,
                             f"{expected_bits_avail} bits should fit in {config.layers} layers of {size}x{size} "
                             f"{'compact' if compact else 'full'} symbol, but we calculated {bits_avail}")

            # Verify correctness of codewords in config
            cw_avail = bits_avail // config.cw_bits
            self.assertEqual(cw_avail, config.codewords,
                             f"{cw_avail} codewords should fit in {size}x{size} "
                             f"{'compact' if compact else 'full'} symbol, but config has {config.codewords}")

    def test_reed_solomon(self):
        """ Test reed_solomon function """
        cw = []
        reed_solomon(cw, 0, 0, 0, 0)
        self.assertEqual(cw, [])
        cw = [0, 0] + [0, 0]
        reed_solomon(cw, 2, 2, 16, 19)
        self.assertEqual(cw, [0, 0, 0, 0])
        cw = [9, 50, 1, 41, 47, 2, 39, 37, 1, 27] + [0, 0, 0, 0, 0, 0, 0]
        reed_solomon(cw, 10, 7, 64, 67)
        self.assertEqual(cw, [9, 50, 1, 41, 47, 2, 39, 37, 1, 27, 38, 50, 8, 16, 10, 20, 40])
        cw = [0, 9] + [0, 0, 0, 0, 0]
        reed_solomon(cw, 2, 5, 16, 19)
        self.assertEqual(cw, [0, 9, 12, 2, 3, 1, 9])

    def test_find_optimal_sequence_ascii_strings(self):
        """ Test find_optimal_sequence function for ASCII strings """
        self.assertEqual(find_optimal_sequence(''), b())
        self.assertEqual(find_optimal_sequence('ABC'), b('A', 'B', 'C'))
        self.assertEqual(find_optimal_sequence('abc'), b(Latch.LOWER, 'a', 'b', 'c'))
        self.assertEqual(find_optimal_sequence('Wikipedia, the free encyclopedia'), b(
            'W', Latch.LOWER, 'i', 'k', 'i', 'p', 'e', 'd', 'i', 'a', Shift.PUNCT, ', ', 't', 'h', 'e',
            ' ', 'f', 'r', 'e', 'e', ' ', 'e', 'n', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'd', 'i', 'a'))
        self.assertEqual(find_optimal_sequence('Code 2D!'), b(
            'C', Latch.LOWER, 'o', 'd', 'e', Latch.DIGIT, ' ', '2', Shift.UPPER, 'D', Shift.PUNCT, '!'))
        self.assertEqual(find_optimal_sequence('!#$%&?'), b(Latch.MIXED, Latch.PUNCT, '!', '#', '$', '%', '&', '?'))

        self.assertIn(find_optimal_sequence('. : '), (
            b(Shift.PUNCT, '. ', Shift.PUNCT, ': '),
            b(Latch.MIXED, Latch.PUNCT, '. ', ': ') ))
        self.assertEqual(find_optimal_sequence('\r\n\r\n\r\n'), b(Latch.MIXED, Latch.PUNCT, '\r\n', '\r\n', '\r\n'))
        self.assertEqual(find_optimal_sequence('Code 2D!'), b(
            'C', Latch.LOWER, 'o', 'd', 'e', Latch.DIGIT, ' ', '2', Shift.UPPER, 'D', Shift.PUNCT, '!'))
        self.assertEqual(find_optimal_sequence('test 1!test 2!'), b(
            Latch.LOWER, 't', 'e', 's', 't', Latch.DIGIT, ' ', '1', Shift.PUNCT, '!', Latch.UPPER,
            Latch.LOWER, 't', 'e', 's', 't', Latch.DIGIT, ' ', '2', Shift.PUNCT, '!'))
        self.assertEqual(find_optimal_sequence('Abc-123X!Abc-123X!'), b(
            'A', Latch.LOWER, 'b', 'c', Latch.DIGIT, Shift.PUNCT, '-', '1', '2', '3', Latch.UPPER, 'X', Shift.PUNCT, '!',
            'A', Latch.LOWER, 'b', 'c', Latch.DIGIT, Shift.PUNCT, '-', '1', '2', '3', Shift.UPPER, 'X', Shift.PUNCT, '!'))
        self.assertEqual(find_optimal_sequence('ABCabc1a2b3e'), b(
            'A', 'B', 'C', Latch.LOWER, 'a', 'b', 'c', Shift.BINARY, 5, '1', 'a', '2', 'b', '3', 'e'))
        self.assertEqual(find_optimal_sequence('ABCabc1a2b3eBC'), b(
            'A', 'B', 'C', Latch.LOWER, 'a', 'b', 'c', Shift.BINARY, 6, '1', 'a', '2', 'b', '3', 'e', Latch.DIGIT, Latch.UPPER, 'B', 'C'))
        self.assertEqual(find_optimal_sequence('abcABC'), b(
            Latch.LOWER, 'a', 'b', 'c', Latch.DIGIT, Latch.UPPER, 'A', 'B', 'C'))
        self.assertEqual(find_optimal_sequence('0a|5Tf.l'), b(
            Shift.BINARY, 5, '0', 'a', '|', '5', 'T', Latch.LOWER, 'f', Shift.PUNCT, '.', 'l'))
        self.assertEqual(find_optimal_sequence('*V1\x0c {Pa'), b(
            Shift.PUNCT, '*', 'V', Shift.BINARY, 5, '1', '\x0c', ' ', '{', 'P', Latch.LOWER, 'a'))
        self.assertEqual(find_optimal_sequence('~Fxlb"I4'), b(
            Shift.BINARY, 7, '~', 'F', 'x', 'l', 'b', '"', 'I', Latch.DIGIT, '4'))
        self.assertEqual(find_optimal_sequence('\\+=R?1'), b(
            Latch.MIXED, '\\', Latch.PUNCT, '+', '=', Latch.UPPER, 'R', Latch.DIGIT, Shift.PUNCT, '?', '1'))
        self.assertEqual(find_optimal_sequence('0123456789:;<=>'), b(
            Latch.DIGIT, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', Latch.UPPER, Latch.MIXED, Latch.PUNCT, ':', ';', '<', '=', '>'))

    def test_encodings_canonical(self):
        for encoding in encoding_to_eci:
            self.assertEqual(encoding, codecs.lookup(encoding).name)

    def _optimal_eci_sequence(self, charset):
        eci = encoding_to_eci[charset]
        ecis = str(eci)
        return [ Shift.PUNCT, Misc.FLG, len(ecis), eci ]

    def test_find_optimal_sequence_non_ASCII_strings(self):
        """ Test find_optimal_sequence function for non-ASCII strings"""

        # Implicit iso8859-1 without ECI:
        self.assertEqual(find_optimal_sequence('Français'), b(
            'F', Latch.LOWER, 'r', 'a', 'n', Shift.BINARY, 1, 0xe7, 'a', 'i', 's'))

        # ECI: explicit iso8859-1, cp1252 (Windows-1252), and utf-8
        self.assertEqual(find_optimal_sequence('Français', 'iso8859-1'), self._optimal_eci_sequence('iso8859-1') + b(
            'F', Latch.LOWER, 'r', 'a', 'n', Shift.BINARY, 1, 0xe7, 'a', 'i', 's'))
        self.assertEqual(find_optimal_sequence('€800', 'cp1252'), self._optimal_eci_sequence('cp1252') + b(
            Shift.BINARY, 1, 0x80, Latch.DIGIT, '8', '0', '0'))
        self.assertEqual(find_optimal_sequence('Français', 'utf-8'), self._optimal_eci_sequence('utf-8') + b(
            'F', Latch.LOWER, 'r', 'a', 'n', Shift.BINARY, 2, 0xc3, 0xa7, 'a', 'i', 's'))

    def test_find_optimal_sequence_bytes(self):
        """ Test find_optimal_sequence function for byte strings """

        self.assertEqual(find_optimal_sequence(b'a' + b'\xff' * 31 + b'A'), b(
            Shift.BINARY, 0, 1, 'a') + [0xff] * 31 + b('A'))
        self.assertEqual(find_optimal_sequence(b'abc' + b'\xff' * 32 + b'A'), b(
            Latch.LOWER, 'a', 'b', 'c', Shift.BINARY, 0, 1) + [0xff] * 32 + b(Latch.DIGIT, Latch.UPPER, 'A'))
        self.assertEqual(find_optimal_sequence(b'abc' + b'\xff' * 31 + b'@\\\\'), b(
            Latch.LOWER, 'a', 'b', 'c', Shift.BINARY, 31) + [0xff] * 31 + b(Latch.MIXED, '@', '\\', '\\'))
        self.assertEqual(find_optimal_sequence(b'!#$%&?\xff'), b(
            Latch.MIXED, Latch.PUNCT, '!', '#', '$', '%', '&', '?', Latch.UPPER, Shift.BINARY, 1, '\xff'))
        self.assertEqual(find_optimal_sequence(b'!#$%&\xff'), b(Shift.BINARY, 6, '!', '#', '$', '%', '&', '\xff'))
        self.assertEqual(find_optimal_sequence(b'@\xff'), b(Shift.BINARY, 2, '@', '\xff'))
        self.assertEqual(find_optimal_sequence(b'. @\xff'), b(Shift.PUNCT, '. ', Shift.BINARY, 2, '@', '\xff'))

    def test_find_optimal_sequence_CRLF_bug(self):
        """ Demonstrate a known bug in find_optimal_sequence (https://github.com/dlenski/aztec_code_generator/pull/4)

        This is a much more minimal example of https://github.com/delimitry/aztec_code_generator/issues/7

        The string '\t<\r\n':
          SHOULD be sequenced as:          Latch.MIXED '\t' Latch.PUNCT < '\r' '\n'
          but is incorrectly sequenced as: Latch.MIXED '\t' Shift.PUNCT < '\r\n'

        ... which is impossible since no encoding of the 2 byte sequence b'\r\n' exists in MIXED mode. """

        self.assertEqual(find_optimal_sequence(b'\t<\r\n'), b(
            Latch.MIXED, '\t', Latch.PUNCT, '<', '\r\n'
        ))

    def test_encoding_failure_long_sequence_FF(self):
        """ Demonstrate a now-fixed bug in find_suitable_matrix_size

        Per https://github.com/dlenski/aztec_code_generator/issues/7#issuecomment-4193498761,
        "when encoding 212 bytes 0xFF with `ec_percent=10` ... encoding is impossible"
        """
        AztecCode(b'\xff'*212, ec_percent=10)

    def test_encoding_failure_long_sequence_00(self):
        """ Demonstrate a now-fixed bug in find_suitable_matrix_size

        Per https://github.com/dlenski/aztec_code_generator/issues/7#issuecomment-4193498761,
        this also happens when the input "contains long sequences of 0x00"
        """
        AztecCode(b'\0'*212, ec_percent=10)

    def test_optimal_sequence_to_bits(self):
        """ Test optimal_sequence_to_bits function """
        self.assertEqual(optimal_sequence_to_bits(b()), '')
        self.assertEqual(optimal_sequence_to_bits(b(Shift.PUNCT)), '00000')
        self.assertEqual(optimal_sequence_to_bits(b('A')), '00010')
        self.assertEqual(optimal_sequence_to_bits(b(Shift.BINARY, 1, '\xff')), '111110000111111111')
        self.assertEqual(optimal_sequence_to_bits(b(Shift.BINARY, 0, 1) + [0xff] * 32), '111110000000000000001' + '11111111'*32)
        self.assertEqual(optimal_sequence_to_bits(b(Shift.PUNCT, Misc.FLG, 0, 'A')), '000000000000000010')
        self.assertEqual(optimal_sequence_to_bits(b(Shift.PUNCT, Misc.FLG, 1, 3, 'A')), '0000000000001' + '0101' + '00010') # FLG(1) '3'
        self.assertEqual(optimal_sequence_to_bits(b(Shift.PUNCT, Misc.FLG, 6, 3, 'A')), '0000000000110' + '0010'*5 + '0101' + '00010') # FLG(6) '000003'

    def test_get_data_codewords(self):
        """ Test get_data_codewords function """
        self.assertEqual(get_data_codewords('000010', 6), [0b000010])
        self.assertEqual(get_data_codewords('111100', 6), [0b111100])
        self.assertEqual(get_data_codewords('111110', 6), [0b111110, 0b011111])
        self.assertEqual(get_data_codewords('000000', 6), [0b000001, 0b011111])
        self.assertEqual(get_data_codewords('111111', 6), [0b111110, 0b111110])
        self.assertEqual(get_data_codewords('111101111101', 6), [0b111101, 0b111101])

    def _encode_and_decode(self, data, *args, **kwargs):
        with NamedTemporaryFile(suffix='.png') as f:
            code = AztecCode(data, *args, **kwargs)
            code.save(f, module_size=5)
            f.flush()

            result = pyrxing.read_barcode(f.name) #, **(dict(encoding=None) if isinstance(data, bytes) else {}))
            assert result is not None
            self.assertEqual(data, result.text)

    def _encode_and_decode_svg(self, data, *args, **kwargs):
        with NamedTemporaryFile(suffix='.png') as f:
            svgf = BytesIO()
            code = AztecCode(data, *args, **kwargs)
            code.save(svgf, module_size=5, format='SVG')
            f.write(cairosvg.svg2png(bytestring=svgf.getvalue()))
            f.flush()

            result = pyrxing.read_barcode(f.name)
            assert result is not None
            self.assertEqual(data, result.text.encode('iso8859-1') if isinstance(data, bytes) else result.text)

    @unittest.skipUnless(pyrxing, reason='Python module pyrxing cannot be imported; cannot test decoding.')
    def test_barcode_readability(self):
        self._encode_and_decode('Wikipedia, the free encyclopedia', ec_percent=0)
        self._encode_and_decode('Wow. Much error. Very correction. Amaze', ec_percent=95)
        self._encode_and_decode('¿Cuánto cuesta?')

    @unittest.skipUnless(pyrxing, reason='Python module pyrxing cannot be imported; cannot test decoding.')
    @unittest.skipUnless(cairosvg, reason='Python module cairosvg cannot be imported; cannot test SVG decoding.')
    def test_barcode_readability_svg(self):
        self._encode_and_decode_svg('An Aztec barcode symbol in SVG format', ec_percent=5)

    @unittest.skipUnless(pyrxing, reason='Python module pyrxing cannot be imported; cannot test decoding.')
    def test_barcode_readability_eci(self):
        self._encode_and_decode('The price is €4', encoding='utf-8')
        self._encode_and_decode('אין לי מושג', encoding='iso8859-8')

    #####
    # Tests for previously-found bugs that have now been fixed
    #####

    def test_find_optimal_sequence_CRLF_bug(self):
        """ Demonstrate a now-fixed bug in find_optimal_sequence (https://github.com/dlenski/aztec_code_generator/pull/4)

        This is a much more minimal example of https://github.com/delimitry/aztec_code_generator/issues/7

        The string '\t<\r\n':
          SHOULD be sequenced as:          Latch.MIXED '\t' Latch.PUNCT '<' '\r\n'
          but is incorrectly sequenced as: Latch.MIXED '\t' Shift.PUNCT '<' '\r\n'

        ... which is impossible since no encoding of the 2 byte sequence b'\r\n' exists in MIXED mode. """

        self.assertEqual(find_optimal_sequence(b'\t<\r\n'), b(
            Latch.MIXED, '\t', Latch.PUNCT, '<', '\r\n'
        ))
        self.assertEqual(find_optimal_sequence(b'\t<\r\n\x01\x01'), b(
            Latch.MIXED, '\t', Shift.PUNCT, '<', '\r', '\n', '\x01', '\x01'
        ))
        self.assertEqual(find_optimal_sequence(b'\t<\r\nAA'), b(
            Latch.MIXED, '\t', Latch.PUNCT, '<', '\r\n', Latch.UPPER, 'A', 'A'
        ))

    def test_encoding_failure_long_sequence_FF(self):
        """ Demonstrate a now-fixed bug in find_suitable_matrix_size

        Per https://github.com/dlenski/aztec_code_generator/issues/7#issuecomment-4193498761,
        "when encoding 212 bytes 0xFF with `ec_percent=10` ... encoding is impossible"
        """
        AztecCode(b'\xff'*212, ec_percent=10)

    def test_encoding_failure_long_sequence_00(self):
        """ Demonstrate a now-fixed bug in find_suitable_matrix_size

        Per https://github.com/dlenski/aztec_code_generator/issues/7#issuecomment-4193498761,
        this also happens when the input "contains long sequences of 0x00"
        """
        AztecCode(b'\0'*212, ec_percent=10)

if __name__ == '__main__':
    unittest.main(verbosity=2)