From ca7e929fb886dd480723b58d0a41761ed14955ad Mon Sep 17 00:00:00 2001 From: Akshat Nehra Date: Sat, 13 Jun 2026 01:04:41 +0000 Subject: [PATCH] MDEV-39933 JSON_NORMALIZE returns wrong result for invalid JSON with embedded NUL json_normalize() uses strlen() to determine the length of the charset- converted string, but strlen() stops at the first NUL byte. When the input contains embedded NUL bytes after charset conversion (e.g., from latin1 to utf8mb4), the converted string is silently truncated to the valid JSON prefix, causing JSON_NORMALIZE to return a normalized result instead of NULL. Fix: use the return value of my_convert() as the actual converted length instead of strlen(). my_convert() returns the number of bytes written, which correctly accounts for embedded NUL bytes. All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- mysql-test/main/mdev_39933.result | 43 +++++++++++++++++++++++++++++++ mysql-test/main/mdev_39933.test | 33 ++++++++++++++++++++++++ strings/json_normalize.c | 5 ++-- 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 mysql-test/main/mdev_39933.result create mode 100644 mysql-test/main/mdev_39933.test diff --git a/mysql-test/main/mdev_39933.result b/mysql-test/main/mdev_39933.result new file mode 100644 index 0000000000000..8f49ee48e4668 --- /dev/null +++ b/mysql-test/main/mdev_39933.result @@ -0,0 +1,43 @@ +# +# MDEV-39933: Incorrect result of JSON_NORMALIZE on invalid json data +# +# JSON with trailing literal junk (no NUL) - should return NULL +SELECT JSON_NORMALIZE('{"a":1}0junk'); +JSON_NORMALIZE('{"a":1}0junk') +NULL +# JSON_VALID correctly rejects JSON with embedded NUL byte + trailing junk +SELECT JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); +JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)) +0 +# JSON_NORMALIZE should also return NULL for invalid JSON with embedded NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)) +NULL +# Additional case: just a NUL after valid JSON +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)) +NULL +# Valid JSON through conversion should still work +SELECT JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)); +JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)) +{"a":1.0E0} +# Multi-byte source charset (utf16) +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)) +NULL +# NUL embedded inside a JSON string value +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)) +NULL +# JSON array with trailing NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)) +NULL +# Nested object - valid through conversion +SELECT JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)); +JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)) +{"a":{"b":1.0E0}} +# Empty string +SELECT JSON_NORMALIZE(CONVERT('' USING latin1)); +JSON_NORMALIZE(CONVERT('' USING latin1)) +NULL diff --git a/mysql-test/main/mdev_39933.test b/mysql-test/main/mdev_39933.test new file mode 100644 index 0000000000000..ce9b4d7da9abc --- /dev/null +++ b/mysql-test/main/mdev_39933.test @@ -0,0 +1,33 @@ +--echo # +--echo # MDEV-39933: Incorrect result of JSON_NORMALIZE on invalid json data +--echo # + +--echo # JSON with trailing literal junk (no NUL) - should return NULL +SELECT JSON_NORMALIZE('{"a":1}0junk'); + +--echo # JSON_VALID correctly rejects JSON with embedded NUL byte + trailing junk +SELECT JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); + +--echo # JSON_NORMALIZE should also return NULL for invalid JSON with embedded NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); + +--echo # Additional case: just a NUL after valid JSON +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)); + +--echo # Valid JSON through conversion should still work +SELECT JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)); + +--echo # Multi-byte source charset (utf16) +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)); + +--echo # NUL embedded inside a JSON string value +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)); + +--echo # JSON array with trailing NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)); + +--echo # Nested object - valid through conversion +SELECT JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)); + +--echo # Empty string +SELECT JSON_NORMALIZE(CONVERT('' USING latin1)); diff --git a/strings/json_normalize.c b/strings/json_normalize.c index 81575f07069bc..257f0e41fe499 100644 --- a/strings/json_normalize.c +++ b/strings/json_normalize.c @@ -1030,15 +1030,14 @@ json_normalize(DYNAMIC_STRING *result, if (!s_utf8) return 1; memset(s_utf8, 0x00, in_size); - my_convert(s_utf8, (uint32)in_size, &my_charset_utf8mb4_bin, - s, (uint32)size, cs, &convert_err); + in_size= my_convert(s_utf8, (uint32)in_size, &my_charset_utf8mb4_bin, + s, (uint32)size, cs, &convert_err); if (convert_err) { my_free(s_utf8); return 1; } in= s_utf8; - in_size= strlen(s_utf8); }