Skip to content
This repository was archived by the owner on Jan 22, 2026. It is now read-only.

Commit 8d3813c

Browse files
committed
Drop invalid content fields
1 parent 543ada4 commit 8d3813c

2 files changed

Lines changed: 34 additions & 21 deletions

File tree

moztelemetry/heka/message_parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ def _parse_heka_record(record):
4848
try:
4949
string = zlib.decompress(field.value_bytes[0], 16+zlib.MAX_WBITS)
5050
except zlib.error:
51-
string = field.value_bytes[0].decode('utf-8')
51+
try:
52+
string = field.value_bytes[0].decode('utf-8')
53+
except UnicodeDecodeError:
54+
# There is no associated payload
55+
break
5256
payload = {"content": string}
5357
break
5458

tests/heka/test_message_parser.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -218,23 +218,32 @@ def test_landfill_utf8_content():
218218
assert json.dumps(parsed) == json.dumps(expected)
219219

220220

221-
def test_landfill_invalid_content_raises_exception():
222-
223-
with pytest.raises(UnicodeDecodeError):
224-
message = Message(
225-
timestamp=1,
226-
type="t",
227-
hostname="h",
228-
payload=None,
229-
fields=[
230-
Field(
231-
name="content",
232-
value_string=None,
233-
# impossible unicode byte sequence
234-
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
235-
value_bytes=['\xfe\xfe\xff\xff'],
236-
value_type=1
237-
)
238-
]
239-
)
240-
message_parser._parse_heka_record(Record(message))
221+
def test_landfill_invalid_content_is_empty():
222+
message = Message(
223+
timestamp=1,
224+
type="t",
225+
hostname="h",
226+
payload=None,
227+
fields=[
228+
Field(
229+
name="content",
230+
value_string=None,
231+
# impossible unicode byte sequence
232+
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
233+
value_bytes=['\xfe\xfe\xff\xff'],
234+
value_type=1
235+
)
236+
]
237+
)
238+
239+
expected = {
240+
"meta": {
241+
"Timestamp": 1,
242+
"Type": "t",
243+
"Hostname": "h",
244+
}
245+
}
246+
247+
parsed = message_parser._parse_heka_record(Record(message))
248+
249+
assert json.dumps(parsed) == json.dumps(expected)

0 commit comments

Comments
 (0)