From 30d1d5647ccdf31606d8706ecf8c4b4829415da1 Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Thu, 27 Nov 2025 23:22:08 -0300 Subject: [PATCH 1/8] feat: add simdjson subproject chore: stop ignoring c and cpp files They are generated in the build directory. fix: statically link simdjson --- .gitignore | 13 ++++--------- pandas/_libs/meson.build | 6 ++++++ pyproject.toml | 1 + subprojects/simdjson.wrap | 13 +++++++++++++ 4 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 subprojects/simdjson.wrap diff --git a/.gitignore b/.gitignore index a4a21293ab1ee..a5d4b2c404436 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,10 @@ monkeytype.sqlite3 # meson editable install folder .mesonpy +# Meson # +######### +/subprojects/* +!/subprojects/*.wrap # OS generated files # ###################### @@ -103,15 +107,6 @@ pandas/io/*.dat pandas/io/*.json scikits -# Generated Sources # -##################### -!skts.c -*.c -*.cpp -!pandas/_libs/src/**/*.c -!pandas/_libs/src/**/*.h -!pandas/_libs/include/**/*.h - # Unit / Performance Testing # ############################## asv_bench/env/ diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 33fc65e5034d0..302c65305f226 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -1,3 +1,9 @@ +simdjson_proj = subproject( + 'simdjson', + default_options: ['default_library=static'], +) +simdjson_dep = simdjson_proj.get_variable('simdjson_dep') + _algos_take_helper = custom_target( 'algos_take_helper_pxi', output: 'algos_take_helper.pxi', diff --git a/pyproject.toml b/pyproject.toml index 12a5633f136cc..c859343a94516 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,6 +142,7 @@ parentdir_prefix = "pandas-" [tool.meson-python.args] setup = ['--vsenv'] # For Windows +install = ['--skip-subprojects'] [tool.cibuildwheel] skip = ["*_i686", "*_ppc64le", "*_s390x"] diff --git a/subprojects/simdjson.wrap b/subprojects/simdjson.wrap new file mode 100644 index 0000000000000..dcfc8100c6c7a --- /dev/null +++ b/subprojects/simdjson.wrap @@ -0,0 +1,13 @@ +[wrap-file] +directory = simdjson-4.2.2 +source_url = https://github.com/simdjson/simdjson/archive/refs/tags/v4.2.2.tar.gz +source_filename = simdjson-4.2.2.tar.gz +source_hash = 3efae22cb41f83299fe0b2e8a187af543d3dda93abbb910586f897df670f9eaa +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/simdjson_4.2.2-1/simdjson-4.2.2.tar.gz +patch_filename = simdjson_4.2.2-1_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/simdjson_4.2.2-1/get_patch +patch_hash = ff4008b3547e71510cb49b159458ec5b7eee129832b9d9168eafea7b3c51ac34 +wrapdb_version = 4.2.2-1 + +[provide] +dependency_names = simdjson From fd89ab5065451dc903ebb6e04c30a111c5612fcd Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Fri, 28 Nov 2025 02:52:44 -0300 Subject: [PATCH 2/8] feat: create simdjson binding --- pandas/_libs/json.pyi | 7 - pandas/_libs/meson.build | 4 +- pandas/_libs/simdjson.pyi | 8 + pandas/_libs/src/parser/json.cpp | 198 +++ .../src/vendored/ujson/lib/ultrajsondec.c | 1221 ----------------- .../src/vendored/ujson/python/JSONtoObj.c | 171 --- .../_libs/src/vendored/ujson/python/ujson.c | 4 - pandas/io/json/__init__.py | 4 +- pandas/io/json/_json.py | 14 +- pandas/io/json/_table_schema.py | 4 +- .../json/test_json_table_schema_ext_dtype.py | 2 +- pandas/tests/io/json/test_ujson.py | 202 +-- 12 files changed, 329 insertions(+), 1510 deletions(-) create mode 100644 pandas/_libs/simdjson.pyi create mode 100644 pandas/_libs/src/parser/json.cpp delete mode 100644 pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c delete mode 100644 pandas/_libs/src/vendored/ujson/python/JSONtoObj.c diff --git a/pandas/_libs/json.pyi b/pandas/_libs/json.pyi index 349320d69d707..cd4af7ce2cefc 100644 --- a/pandas/_libs/json.pyi +++ b/pandas/_libs/json.pyi @@ -14,10 +14,3 @@ def ujson_dumps( default_handler: None | Callable[[Any], str | float | bool | list | dict | None] = ..., ) -> str: ... -def ujson_loads( - s: str, - precise_float: bool = ..., - numpy: bool = ..., - dtype: None = ..., - labelled: bool = ..., -) -> Any: ... diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 302c65305f226..ef7736a96ea62 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -128,11 +128,10 @@ libs_sources = { 'sources': [ 'src/vendored/ujson/python/ujson.c', 'src/vendored/ujson/python/objToJSON.c', - 'src/vendored/ujson/python/JSONtoObj.c', 'src/vendored/ujson/lib/ultrajsonenc.c', - 'src/vendored/ujson/lib/ultrajsondec.c', ], }, + 'simdjson': {'sources': ['src/parser/json.cpp'], 'deps': simdjson_dep}, 'ops': {'sources': ['ops.pyx']}, 'ops_dispatch': {'sources': ['ops_dispatch.pyx']}, 'properties': {'sources': ['properties.pyx']}, @@ -195,6 +194,7 @@ sources_to_install = [ 'properties.pyi', 'reshape.pyi', 'sas.pyi', + 'simdjson.pyi', 'sparse.pyi', 'testing.pyi', 'tslib.pyi', diff --git a/pandas/_libs/simdjson.pyi b/pandas/_libs/simdjson.pyi new file mode 100644 index 0000000000000..f47ebba1442b0 --- /dev/null +++ b/pandas/_libs/simdjson.pyi @@ -0,0 +1,8 @@ +from typing import ( + Any, +) + +def simdjson_loads( + s: str | bytes, + precise_float: bool = ..., +) -> Any: ... diff --git a/pandas/_libs/src/parser/json.cpp b/pandas/_libs/src/parser/json.cpp new file mode 100644 index 0000000000000..a7ca4baa94738 --- /dev/null +++ b/pandas/_libs/src/parser/json.cpp @@ -0,0 +1,198 @@ +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "simdjson.h" + +namespace pandas { +namespace json { +using namespace simdjson; + +ondemand::parser parser; + +static PyObject *build_python_object(ondemand::value element); + +static PyObject *object_to_dict(ondemand::object element) { + PyObject *dict = PyDict_New(); + for (auto field : element) { + std::string_view key = field.unescaped_key(); + PyObject *value = build_python_object(field.value()); + + if (!value) { + Py_DECREF(dict); + return NULL; + } + + PyObject *key_py = PyUnicode_FromStringAndSize(key.data(), key.size()); + PyDict_SetItem(dict, key_py, value); + Py_DECREF(key_py); + Py_DECREF(value); + } + + return dict; +} + +static PyObject *array_to_list(ondemand::array element) { + PyObject *list = PyList_New(0); + for (auto child : element) { + PyObject *tmp = build_python_object(child.value()); + if (!tmp) { + Py_DECREF(list); + return NULL; + } + + if (PyList_Append(list, tmp) != 0) { + Py_DECREF(list); + Py_DECREF(tmp); + return NULL; + } + + Py_DECREF(tmp); + } + return list; +} + +static PyObject *big_int_to_pylong(ondemand::value element) { + std::string_view s = element.raw_json_token(); + std::string null_terminated_s(s); + return PyLong_FromString(null_terminated_s.c_str(), NULL, 10); +} + +static PyObject *json_number_to_pyobject(ondemand::value element) { + ondemand::number num = element.get_number(); + switch (num.get_number_type()) { + case ondemand::number_type::signed_integer: + return PyLong_FromLongLong(num.get_int64()); + break; + case ondemand::number_type::unsigned_integer: + return PyLong_FromUnsignedLongLong(num.get_uint64()); + break; + case ondemand::number_type::floating_point_number: + return PyFloat_FromDouble(num.get_double()); + break; + case ondemand::number_type::big_integer: + return big_int_to_pylong(element); + break; + } +} + +static PyObject *json_str_to_pyobject(ondemand::value element) { + std::string_view s = element.get_string(true); + return PyUnicode_FromStringAndSize(s.data(), s.size()); +} + +static PyObject *build_python_object(ondemand::value element) { + switch (element.type()) { + case ondemand::json_type::object: + return object_to_dict(element.get_object()); + break; + case ondemand::json_type::array: + return array_to_list(element.get_array()); + break; + case ondemand::json_type::boolean: + return element.get_bool() ? Py_True : Py_False; + break; + case ondemand::json_type::null: + return Py_None; + case ondemand::json_type::string: + return json_str_to_pyobject(element); + break; + case ondemand::json_type::number: + return json_number_to_pyobject(element); + break; + case ondemand::json_type::unknown: + // TODO: improve error hadling + PyErr_Format(PyExc_ValueError, "Some error occourred"); + break; + } + + return NULL; +} + +} // namespace json +} // namespace pandas + +extern "C" { + +PyObject *json_loads(PyObject *Py_UNUSED(self), PyObject *args, + PyObject *kwargs) { + static const char *kwlist[] = {"obj", "precise_float", NULL}; + const char *buf; + Py_ssize_t len; + int *precise_float; // Unused. It's declared for compatibility with old parser + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|b", kwlist, &buf, &len, + &precise_float)) { + return NULL; + } + + PyObject *ret; + try { + simdjson::padded_string padded_json(buf, len); + simdjson::ondemand::document doc = + pandas::json::parser.iterate(padded_json); + switch (doc.type()) { + case simdjson::fallback::ondemand::json_type::null: + ret = Py_None; + break; + case simdjson::fallback::ondemand::json_type::boolean: + ret = doc.get_bool() ? Py_True : Py_False; + break; + case simdjson::fallback::ondemand::json_type::number: { + simdjson::ondemand::number num = doc.get_number(); + switch (num.get_number_type()) { + case simdjson::ondemand::number_type::signed_integer: + ret = PyLong_FromLongLong(num.get_int64()); + break; + case simdjson::ondemand::number_type::unsigned_integer: + ret = PyLong_FromUnsignedLongLong(num.get_uint64()); + break; + case simdjson::ondemand::number_type::floating_point_number: + ret = PyFloat_FromDouble(num.get_double()); + break; + case simdjson::ondemand::number_type::big_integer: + PyErr_Format(PyExc_ValueError, "Overflow"); + return NULL; + } + break; + } + case simdjson::fallback::ondemand::json_type::string: { + std::string_view s = doc.get_string(); + ret = PyUnicode_FromStringAndSize(s.data(), s.size()); + break; + } + default: + simdjson::ondemand::value val = doc; + ret = pandas::json::build_python_object(val); + break; + } + } catch (simdjson::simdjson_error &error) { + Py_XDECREF(ret); + ret = NULL; + // TODO: get location or token where error occourred + PyErr_Format(PyExc_ValueError, "JSON parsing error: %s", error.what()); + return NULL; + } + + return ret; +} + +static PyMethodDef json_methods[] = { + {"simdjson_loads", (PyCFunction)(void (*)(void))json_loads, + METH_VARARGS | METH_KEYWORDS, "Parse JSON string using simdjson"}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; + +static struct PyModuleDef json_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.simdjson", + .m_doc = "simdjson python binding", + .m_size = 0, + .m_methods = json_methods, + .m_slots = NULL, + .m_traverse = NULL, + .m_clear = NULL, + .m_free = NULL, +}; + +PyMODINIT_FUNC PyInit_simdjson(void) { return PyModuleDef_Init(&json_module); } + +} // extern "C" diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c deleted file mode 100644 index bf389b4dce1d0..0000000000000 --- a/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c +++ /dev/null @@ -1,1221 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -* Neither the name of the ESN Social Software AB nor the -names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE -LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights -reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms -* Copyright (c) 1988-1993 The Regents of the University of California. -* Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -// Licence at LICENSES/ULTRAJSON_LICENSE - -#include "pandas/vendored/ujson/lib/ultrajson.h" -#include -#include -#include -#include -#include -#include -#include - -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif -#ifndef NULL -#define NULL 0 -#endif - -struct DecoderState { - char *start; - char *end; - wchar_t *escStart; - wchar_t *escEnd; - int escHeap; - int lastType; - JSUINT32 objDepth; - void *prv; - JSONObjectDecoder *dec; -}; - -JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds); -typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); - -static JSOBJ SetError(struct DecoderState *ds, int offset, - const char *message) { - ds->dec->errorOffset = ds->start + offset; - ds->dec->errorStr = (char *)message; - return NULL; -} - -double createDouble(double intNeg, double intValue, double frcValue, - int frcDecimalCount) { - static const double g_pow10[] = {1.0, - 0.1, - 0.01, - 0.001, - 0.0001, - 0.00001, - 0.000001, - 0.0000001, - 0.00000001, - 0.000000001, - 0.0000000001, - 0.00000000001, - 0.000000000001, - 0.0000000000001, - 0.00000000000001, - 0.000000000000001}; - return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; -} - -JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { - char *end; - double value; - errno = 0; - - value = strtod(ds->start, &end); - - if (errno == ERANGE) { - return SetError(ds, -1, "Range error when decoding numeric as double"); - } - - ds->start = end; - return ds->dec->newDouble(ds->prv, value); -} - -JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { - int intNeg = 1; - JSUINT64 intValue; - JSUINT64 prevIntValue; - int chr; - int decimalCount = 0; - double frcValue = 0.0; - double expNeg; - double expValue; - char *offset = ds->start; - - JSUINT64 overflowLimit = LLONG_MAX; - - if (*(offset) == 'I') { - goto DECODE_INF; - } else if (*(offset) == 'N') { - goto DECODE_NAN; - } else if (*(offset) == '-') { - offset++; - intNeg = -1; - overflowLimit = LLONG_MIN; - if (*(offset) == 'I') { - goto DECODE_INF; - } - } - - // Scan integer part - intValue = 0; - - while (1) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - // PERF: Don't do 64-bit arithmetic here unless we have to - prevIntValue = intValue; - intValue = intValue * 10ULL + (JSLONG)(chr - 48); - - if (intNeg == 1 && prevIntValue > intValue) { - return SetError(ds, -1, "Value is too big!"); - } else if (intNeg == -1 && intValue > overflowLimit) { - return SetError(ds, -1, - overflowLimit == LLONG_MAX ? "Value is too big!" - : "Value is too small"); - } - - offset++; - break; - } - case '.': { - offset++; - goto DECODE_FRACTION; - break; - } - case 'e': - case 'E': { - offset++; - goto DECODE_EXPONENT; - break; - } - - default: { - goto BREAK_INT_LOOP; - break; - } - } - } - -BREAK_INT_LOOP: - - ds->lastType = JT_INT; - ds->start = offset; - - if (intNeg == 1 && (intValue & 0x8000000000000000ULL) != 0) - return ds->dec->newUnsignedLong(ds->prv, intValue); - else if ((intValue >> 31)) - return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); - else - return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); - -DECODE_FRACTION: - - if (ds->dec->preciseFloat) { - return decodePreciseFloat(ds); - } - - // Scan fraction part - frcValue = 0.0; - for (;;) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { - frcValue = frcValue * 10.0 + (double)(chr - 48); - decimalCount++; - } - offset++; - break; - } - case 'e': - case 'E': { - offset++; - goto DECODE_EXPONENT; - break; - } - default: { - goto BREAK_FRC_LOOP; - } - } - } - -BREAK_FRC_LOOP: - // FIXME: Check for arithmetic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - return ds->dec->newDouble( - ds->prv, - createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); - -DECODE_EXPONENT: - if (ds->dec->preciseFloat) { - return decodePreciseFloat(ds); - } - - expNeg = 1.0; - - if (*(offset) == '-') { - expNeg = -1.0; - offset++; - } else if (*(offset) == '+') { - expNeg = +1.0; - offset++; - } - - expValue = 0.0; - - for (;;) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - expValue = expValue * 10.0 + (double)(chr - 48); - offset++; - break; - } - default: { - goto BREAK_EXP_LOOP; - } - } - } - -DECODE_NAN: - offset++; - if (*(offset++) != 'a') - goto SET_NAN_ERROR; - if (*(offset++) != 'N') - goto SET_NAN_ERROR; - - ds->lastType = JT_NULL; - ds->start = offset; - return ds->dec->newNull(ds->prv); - -SET_NAN_ERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); - -DECODE_INF: - offset++; - if (*(offset++) != 'n') - goto SET_INF_ERROR; - if (*(offset++) != 'f') - goto SET_INF_ERROR; - if (*(offset++) != 'i') - goto SET_INF_ERROR; - if (*(offset++) != 'n') - goto SET_INF_ERROR; - if (*(offset++) != 'i') - goto SET_INF_ERROR; - if (*(offset++) != 't') - goto SET_INF_ERROR; - if (*(offset++) != 'y') - goto SET_INF_ERROR; - - ds->start = offset; - - if (intNeg == 1) { - ds->lastType = JT_POS_INF; - return ds->dec->newPosInf(ds->prv); - } else { - ds->lastType = JT_NEG_INF; - return ds->dec->newNegInf(ds->prv); - } - -SET_INF_ERROR: - if (intNeg == 1) { - const char *msg = "Unexpected character found when decoding 'Infinity'"; - return SetError(ds, -1, msg); - } else { - const char *msg = "Unexpected character found when decoding '-Infinity'"; - return SetError(ds, -1, msg); - } - -BREAK_EXP_LOOP: - // FIXME: Check for arithmetic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - return ds->dec->newDouble( - ds->prv, - createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * - pow(10.0, expValue * expNeg)); -} - -JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'r') - goto SETERROR; - if (*(offset++) != 'u') - goto SETERROR; - if (*(offset++) != 'e') - goto SETERROR; - - ds->lastType = JT_TRUE; - ds->start = offset; - return ds->dec->newTrue(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'true'"); -} - -JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'a') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; - if (*(offset++) != 's') - goto SETERROR; - if (*(offset++) != 'e') - goto SETERROR; - - ds->lastType = JT_FALSE; - ds->start = offset; - return ds->dec->newFalse(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'false'"); -} - -JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'u') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; - - ds->lastType = JT_NULL; - ds->start = offset; - return ds->dec->newNull(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'null'"); -} - -void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { - char *offset; - - for (offset = ds->start; (ds->end - offset) > 0; offset++) { - switch (*offset) { - case ' ': - case '\t': - case '\r': - case '\n': - break; - - default: - ds->start = offset; - return; - } - } - - if (offset == ds->end) { - ds->start = ds->end; - } -} - -enum DECODESTRINGSTATE { - DS_ISNULL = 0x32, - DS_ISQUOTE, - DS_ISESCAPE, - DS_UTFLENERROR, -}; - -static const JSUINT8 g_decoderLookup[256] = { - /* 0x00 */ DS_ISNULL, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x10 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x20 */ 1, - 1, - DS_ISQUOTE, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x30 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x40 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x50 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - DS_ISESCAPE, - 1, - 1, - 1, - /* 0x60 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x70 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x80 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x90 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xa0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xb0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xc0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xd0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xe0 */ 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - /* 0xf0 */ 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, -}; - -JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { - JSUTF16 sur[2] = {0}; - int iSur = 0; - int index; - wchar_t *escOffset; - wchar_t *escStart; - size_t escLen = (ds->escEnd - ds->escStart); - JSUINT8 *inputOffset; - JSUINT8 oct; - JSUTF32 ucs; - ds->lastType = JT_INVALID; - ds->start++; - - if ((size_t)(ds->end - ds->start) > escLen) { - size_t newSize = (ds->end - ds->start); - - if (ds->escHeap) { - if (newSize > (SIZE_MAX / sizeof(wchar_t))) { - return SetError(ds, -1, "Could not reserve memory block"); - } - escStart = - (wchar_t *)ds->dec->realloc(ds->escStart, newSize * sizeof(wchar_t)); - if (!escStart) { - ds->dec->free(ds->escStart); - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escStart = escStart; - } else { - wchar_t *oldStart = ds->escStart; - if (newSize > (SIZE_MAX / sizeof(wchar_t))) { - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escStart = (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); - if (!ds->escStart) { - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escHeap = 1; - memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); - } - - ds->escEnd = ds->escStart + newSize; - } - - escOffset = ds->escStart; - inputOffset = (JSUINT8 *)ds->start; - - for (;;) { - switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { - case DS_ISNULL: { - return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'"); - } - case DS_ISQUOTE: { - ds->lastType = JT_UTF8; - inputOffset++; - ds->start += ((char *)inputOffset - (ds->start)); - return ds->dec->newString(ds->prv, ds->escStart, escOffset); - } - case DS_UTFLENERROR: { - return SetError(ds, -1, - "Invalid UTF-8 sequence length when decoding 'string'"); - } - case DS_ISESCAPE: - inputOffset++; - switch (*inputOffset) { - case '\\': - *(escOffset++) = L'\\'; - inputOffset++; - continue; - case '\"': - *(escOffset++) = L'\"'; - inputOffset++; - continue; - case '/': - *(escOffset++) = L'/'; - inputOffset++; - continue; - case 'b': - *(escOffset++) = L'\b'; - inputOffset++; - continue; - case 'f': - *(escOffset++) = L'\f'; - inputOffset++; - continue; - case 'n': - *(escOffset++) = L'\n'; - inputOffset++; - continue; - case 'r': - *(escOffset++) = L'\r'; - inputOffset++; - continue; - case 't': - *(escOffset++) = L'\t'; - inputOffset++; - continue; - - case 'u': { - int index; - inputOffset++; - - for (index = 0; index < 4; index++) { - switch (*inputOffset) { - case '\0': - return SetError(ds, -1, - "Unterminated unicode " - "escape sequence when " - "decoding 'string'"); - default: - return SetError(ds, -1, - "Unexpected character in " - "unicode escape sequence " - "when decoding 'string'"); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - sur[iSur] = (sur[iSur] << 4) + (JSUTF16)(*inputOffset - '0'); - break; - - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16)(*inputOffset - 'a'); - break; - - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16)(*inputOffset - 'A'); - break; - } - - inputOffset++; - } - - if (iSur == 0) { - if ((sur[iSur] & 0xfc00) == 0xd800) { - // First of a surrogate pair, continue parsing - iSur++; - break; - } - (*escOffset++) = (wchar_t)sur[iSur]; - iSur = 0; - } else { - // Decode pair - if ((sur[1] & 0xfc00) != 0xdc00) { - return SetError(ds, -1, - "Unpaired high surrogate when " - "decoding 'string'"); - } -#if WCHAR_MAX == 0xffff - (*escOffset++) = (wchar_t)sur[0]; - (*escOffset++) = (wchar_t)sur[1]; -#else - (*escOffset++) = (wchar_t)0x10000 + - (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); -#endif - iSur = 0; - } - break; - } - - case '\0': - return SetError(ds, -1, - "Unterminated escape sequence when " - "decoding 'string'"); - default: - return SetError(ds, -1, - "Unrecognized escape sequence when " - "decoding 'string'"); - } - break; - - case 1: { - *(escOffset++) = (wchar_t)(*inputOffset++); - break; - } - - case 2: { - ucs = (*inputOffset++) & 0x1f; - ucs <<= 6; - if (((*inputOffset) & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - ucs |= (*inputOffset++) & 0x3f; - if (ucs < 0x80) - return SetError(ds, -1, - "Overlong 2 byte UTF-8 sequence detected " - "when decoding 'string'"); - *(escOffset++) = (wchar_t)ucs; - break; - } - - case 3: { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x0f; - - for (index = 0; index < 2; index++) { - ucs <<= 6; - oct = (*inputOffset++); - - if ((oct & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - - ucs |= oct & 0x3f; - } - - if (ucs < 0x800) - return SetError(ds, -1, - "Overlong 3 byte UTF-8 sequence detected " - "when encoding string"); - *(escOffset++) = (wchar_t)ucs; - break; - } - - case 4: { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x07; - - for (index = 0; index < 3; index++) { - ucs <<= 6; - oct = (*inputOffset++); - - if ((oct & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - - ucs |= oct & 0x3f; - } - - if (ucs < 0x10000) - return SetError(ds, -1, - "Overlong 4 byte UTF-8 sequence detected " - "when decoding 'string'"); - -#if WCHAR_MAX == 0xffff - if (ucs >= 0x10000) { - ucs -= 0x10000; - *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; - *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; - } else { - *(escOffset++) = (wchar_t)ucs; - } -#else - *(escOffset++) = (wchar_t)ucs; -#endif - break; - } - } - } -} - -JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { - JSOBJ itemValue; - JSOBJ newObj; - int len; - ds->objDepth++; - if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { - return SetError(ds, -1, "Reached object decoding depth limit"); - } - - newObj = ds->dec->newArray(ds->prv, ds->dec); - len = 0; - - ds->lastType = JT_INVALID; - ds->start++; - - for (;;) { - SkipWhitespace(ds); - - if ((*ds->start) == ']') { - ds->objDepth--; - if (len == 0) { - ds->start++; - return ds->dec->endArray(ds->prv, newObj); - } - - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError( - ds, -1, "Unexpected character found when decoding array value (1)"); - } - - itemValue = decode_any(ds); - - if (itemValue == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - SkipWhitespace(ds); - - switch (*(ds->start++)) { - case ']': { - ds->objDepth--; - return ds->dec->endArray(ds->prv, newObj); - } - case ',': - break; - - default: - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError( - ds, -1, "Unexpected character found when decoding array value (2)"); - } - - len++; - } -} - -JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { - JSOBJ itemName; - JSOBJ itemValue; - JSOBJ newObj; - - ds->objDepth++; - if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { - return SetError(ds, -1, "Reached object decoding depth limit"); - } - - newObj = ds->dec->newObject(ds->prv, ds->dec); - - ds->start++; - - for (;;) { - SkipWhitespace(ds); - - if ((*ds->start) == '}') { - ds->objDepth--; - ds->start++; - return ds->dec->endObject(ds->prv, newObj); - } - - ds->lastType = JT_INVALID; - itemName = decode_any(ds); - - if (itemName == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - if (ds->lastType != JT_UTF8) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return SetError( - ds, -1, "Key name of object must be 'string' when decoding 'object'"); - } - - SkipWhitespace(ds); - - if (*(ds->start++) != ':') { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return SetError(ds, -1, "No ':' found when decoding object value"); - } - - SkipWhitespace(ds); - - itemValue = decode_any(ds); - - if (itemValue == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return NULL; - } - - if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - ds->dec->releaseObject(ds->prv, itemValue, ds->dec); - return NULL; - } - - SkipWhitespace(ds); - - switch (*(ds->start++)) { - case '}': { - ds->objDepth--; - return ds->dec->endObject(ds->prv, newObj); - } - case ',': - break; - - default: - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError(ds, -1, - "Unexpected character found when decoding object value"); - } - } -} - -JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { - for (;;) { - switch (*ds->start) { - case '\"': - return decode_string(ds); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case 'I': - case 'N': - case '-': - return decode_numeric(ds); - - case '[': - return decode_array(ds); - case '{': - return decode_object(ds); - case 't': - return decode_true(ds); - case 'f': - return decode_false(ds); - case 'n': - return decode_null(ds); - - case ' ': - case '\t': - case '\r': - case '\n': - // White space - ds->start++; - break; - - default: - return SetError(ds, -1, "Expected object or value"); - } - } -} - -JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, - size_t cbBuffer) { - /* - FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode - escaping doesn't run into the wall each time */ - char *locale; - struct DecoderState ds; - wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; - JSOBJ ret; - - ds.start = (char *)buffer; - ds.end = ds.start + cbBuffer; - - ds.escStart = escBuffer; - ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); - ds.escHeap = 0; - ds.prv = dec->prv; - ds.dec = dec; - ds.dec->errorStr = NULL; - ds.dec->errorOffset = NULL; - ds.objDepth = 0; - - ds.dec = dec; - - locale = setlocale(LC_NUMERIC, NULL); - if (!locale) { - return SetError(&ds, -1, "setlocale call failed"); - } - - if (strcmp(locale, "C")) { - size_t len = strlen(locale) + 1; - char *saved_locale = malloc(len); - if (saved_locale == NULL) { - return SetError(&ds, -1, "Could not reserve memory block"); - } - memcpy(saved_locale, locale, len); - setlocale(LC_NUMERIC, "C"); - ret = decode_any(&ds); - setlocale(LC_NUMERIC, saved_locale); - free(saved_locale); - } else { - ret = decode_any(&ds); - } - - if (ds.escHeap) { - dec->free(ds.escStart); - } - - SkipWhitespace(&ds); - - if (ds.start != ds.end && ret) { - dec->releaseObject(ds.prv, ret, ds.dec); - return SetError(&ds, -1, "Trailing data"); - } - - return ret; -} diff --git a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c deleted file mode 100644 index ef6f1104a1fb9..0000000000000 --- a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c +++ /dev/null @@ -1,171 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights -reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms - * Copyright (c) 1988-1993 The Regents of the University of California. - * Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -// Licence at LICENSES/ULTRAJSON_LICENSE - -#define PY_SSIZE_T_CLEAN -#include - -#include "pandas/vendored/ujson/lib/ultrajson.h" - -static int Object_objectAddKey(void *Py_UNUSED(prv), JSOBJ obj, JSOBJ name, - JSOBJ value) { - int ret = PyDict_SetItem(obj, name, value); - Py_DECREF((PyObject *)name); - Py_DECREF((PyObject *)value); - return ret == 0 ? 1 : 0; -} - -static int Object_arrayAddItem(void *Py_UNUSED(prv), JSOBJ obj, JSOBJ value) { - int ret = PyList_Append(obj, value); - Py_DECREF((PyObject *)value); - return ret == 0 ? 1 : 0; -} - -static JSOBJ Object_newString(void *Py_UNUSED(prv), wchar_t *start, - wchar_t *end) { - return PyUnicode_FromWideChar(start, (end - start)); -} - -static JSOBJ Object_newTrue(void *Py_UNUSED(prv)) { Py_RETURN_TRUE; } - -static JSOBJ Object_newFalse(void *Py_UNUSED(prv)) { Py_RETURN_FALSE; } - -static JSOBJ Object_newNull(void *Py_UNUSED(prv)) { Py_RETURN_NONE; } - -static JSOBJ Object_newPosInf(void *Py_UNUSED(prv)) { - return PyFloat_FromDouble(Py_HUGE_VAL); -} - -static JSOBJ Object_newNegInf(void *Py_UNUSED(prv)) { - return PyFloat_FromDouble(-Py_HUGE_VAL); -} - -static JSOBJ Object_newObject(void *Py_UNUSED(prv), void *Py_UNUSED(decoder)) { - return PyDict_New(); -} - -static JSOBJ Object_endObject(void *Py_UNUSED(prv), JSOBJ obj) { return obj; } - -static JSOBJ Object_newArray(void *Py_UNUSED(prv), void *Py_UNUSED(decoder)) { - return PyList_New(0); -} - -static JSOBJ Object_endArray(void *Py_UNUSED(prv), JSOBJ obj) { return obj; } - -static JSOBJ Object_newInteger(void *Py_UNUSED(prv), JSINT32 value) { - return PyLong_FromLong(value); -} - -static JSOBJ Object_newLong(void *Py_UNUSED(prv), JSINT64 value) { - return PyLong_FromLongLong(value); -} - -static JSOBJ Object_newUnsignedLong(void *Py_UNUSED(prv), JSUINT64 value) { - return PyLong_FromUnsignedLongLong(value); -} - -static JSOBJ Object_newDouble(void *Py_UNUSED(prv), double value) { - return PyFloat_FromDouble(value); -} - -static void Object_releaseObject(void *Py_UNUSED(prv), JSOBJ obj, - void *Py_UNUSED(decoder)) { - Py_XDECREF(((PyObject *)obj)); -} - -PyObject *JSONToObj(PyObject *Py_UNUSED(self), PyObject *args, - PyObject *kwargs) { - JSONObjectDecoder dec = {.newString = Object_newString, - .objectAddKey = Object_objectAddKey, - .arrayAddItem = Object_arrayAddItem, - .newTrue = Object_newTrue, - .newFalse = Object_newFalse, - .newNull = Object_newNull, - .newPosInf = Object_newPosInf, - .newNegInf = Object_newNegInf, - .newObject = Object_newObject, - .endObject = Object_endObject, - .newArray = Object_newArray, - .endArray = Object_endArray, - .newInt = Object_newInteger, - .newLong = Object_newLong, - .newUnsignedLong = Object_newUnsignedLong, - .newDouble = Object_newDouble, - .releaseObject = Object_releaseObject, - .malloc = PyObject_Malloc, - .free = PyObject_Free, - .realloc = PyObject_Realloc, - .errorStr = NULL, - .errorOffset = NULL, - .preciseFloat = 0, - .prv = NULL}; - - char *kwlist[] = {"obj", "precise_float", NULL}; - char *buf; - Py_ssize_t len; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|b", kwlist, &buf, &len, - &dec.preciseFloat)) { - return NULL; - } - - PyObject *ret = JSON_DecodeObject(&dec, buf, len); - - if (PyErr_Occurred()) { - if (ret) { - Py_DECREF((PyObject *)ret); - } - return NULL; - } - - if (dec.errorStr) { - /* - FIXME: It's possible to give a much nicer error message here with actual - failing element in input etc*/ - - PyErr_Format(PyExc_ValueError, "%s", dec.errorStr); - - if (ret) { - Py_DECREF((PyObject *)ret); - } - - return NULL; - } - - return ret; -} diff --git a/pandas/_libs/src/vendored/ujson/python/ujson.c b/pandas/_libs/src/vendored/ujson/python/ujson.c index 2ee084b9304f4..2d08471d06cc0 100644 --- a/pandas/_libs/src/vendored/ujson/python/ujson.c +++ b/pandas/_libs/src/vendored/ujson/python/ujson.c @@ -60,10 +60,6 @@ static PyMethodDef ujsonMethods[] = { {"ujson_dumps", (PyCFunction)(void (*)(void))objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, - {"ujson_loads", (PyCFunction)(void (*)(void))JSONToObj, - METH_VARARGS | METH_KEYWORDS, - "Converts JSON as string to dict object structure. Use precise_float=True " - "to use high precision float decoder."}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index 39f78e26d6041..dbff1d23d86b5 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,15 +1,15 @@ from pandas.io.json._json import ( read_json, + simdjson_loads, to_json, ujson_dumps, - ujson_loads, ) from pandas.io.json._table_schema import build_table_schema __all__ = [ "build_table_schema", "read_json", + "simdjson_loads", "to_json", "ujson_dumps", - "ujson_loads", ] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 985a192eb79f4..767379f96b97d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -24,8 +24,10 @@ from pandas._libs import lib from pandas._libs.json import ( ujson_dumps, - ujson_loads, ) + +# TODO: rename simdjson to json after done refactoring +from pandas._libs.simdjson import simdjson_loads from pandas._libs.tslibs import iNaT from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError @@ -1326,7 +1328,7 @@ class SeriesParser(Parser): _split_keys = ("name", "index", "data") def _parse(self) -> Series: - data = ujson_loads(self.json, precise_float=self.precise_float) + data = simdjson_loads(self.json, precise_float=self.precise_float) if self.orient == "split": decoded = {str(k): v for k, v in data.items()} @@ -1351,7 +1353,9 @@ def _parse(self) -> DataFrame: if orient == "split": decoded = { str(k): v - for k, v in ujson_loads(json, precise_float=self.precise_float).items() + for k, v in simdjson_loads( + json, precise_float=self.precise_float + ).items() } self.check_keys_split(decoded) orig_names = [ @@ -1365,7 +1369,7 @@ def _parse(self) -> DataFrame: return DataFrame(dtype=None, **decoded) elif orient == "index": return DataFrame.from_dict( - ujson_loads(json, precise_float=self.precise_float), + simdjson_loads(json, precise_float=self.precise_float), dtype=None, orient="index", ) @@ -1374,7 +1378,7 @@ def _parse(self) -> DataFrame: else: # includes orient == "columns" return DataFrame( - ujson_loads(json, precise_float=self.precise_float), dtype=None + simdjson_loads(json, precise_float=self.precise_float), dtype=None ) def _try_convert_types(self, obj: DataFrame) -> DataFrame: diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 1723be3de6e82..b408578433171 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -16,7 +16,7 @@ from pandas._config import option_context from pandas._libs import lib -from pandas._libs.json import ujson_loads +from pandas._libs.simdjson import simdjson_loads from pandas._libs.tslibs import timezones from pandas.util._exceptions import find_stack_level @@ -371,7 +371,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame: build_table_schema : Inverse function. pandas.read_json """ - table = ujson_loads(json, precise_float=precise_float) + table = simdjson_loads(json, precise_float=precise_float) col_order = [field["name"] for field in table["schema"]["fields"]] df = DataFrame(table["data"], columns=col_order)[col_order] diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 2b775a43aa321..2aadcc0f41634 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -280,7 +280,7 @@ def test_json_ext_dtype_reading(self): "type":"integer", "extDtype":"Int64" } - ], + ] }, "data":[ { diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 9470841c87abb..5c932843c0577 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -11,6 +11,7 @@ import numpy as np import pytest +from pandas._libs import simdjson import pandas._libs.json as ujson from pandas.compat import IS64 @@ -69,7 +70,7 @@ class TestUltraJSONTests: def test_encode_decimal(self, value, double_precision): sut = decimal.Decimal(value) encoded = ujson.ujson_dumps(sut, double_precision=double_precision) - decoded = ujson.ujson_loads(encoded) + decoded = simdjson.simdjson_loads(encoded) assert decoded == value @pytest.mark.parametrize("ensure_ascii", [True, False]) @@ -87,7 +88,7 @@ def helper(expected_output, **encode_kwargs): assert output == expected_output assert string_input == json.loads(output) - assert string_input == ujson.ujson_loads(output) + assert string_input == simdjson.simdjson_loads(output) # Default behavior assumes encode_html_chars=False. helper(not_html_encoded) @@ -105,7 +106,7 @@ def test_double_long_numbers(self, long_number): sut = {"a": long_number} encoded = ujson.ujson_dumps(sut, double_precision=15) - decoded = ujson.ujson_loads(encoded) + decoded = simdjson.simdjson_loads(encoded) assert sut == decoded def test_encode_non_c_locale(self): @@ -115,36 +116,38 @@ def test_encode_non_c_locale(self): for new_locale in ("it_IT.UTF-8", "Italian_Italy"): if tm.can_set_locale(new_locale, lc_category): with tm.set_locale(new_locale, lc_category): - assert ujson.ujson_loads(ujson.ujson_dumps(4.78e60)) == 4.78e60 - assert ujson.ujson_loads("4.78", precise_float=True) == 4.78 + assert ( + simdjson.simdjson_loads(ujson.ujson_dumps(4.78e60)) == 4.78e60 + ) + assert simdjson.simdjson_loads("4.78", precise_float=True) == 4.78 break def test_decimal_decode_test_precise(self): sut = {"a": 4.56} encoded = ujson.ujson_dumps(sut) - decoded = ujson.ujson_loads(encoded, precise_float=True) + decoded = simdjson.simdjson_loads(encoded, precise_float=True) assert sut == decoded def test_encode_double_tiny_exponential(self): num = 1e-40 - assert num == ujson.ujson_loads(ujson.ujson_dumps(num)) + assert num == simdjson.simdjson_loads(ujson.ujson_dumps(num)) num = 1e-100 - assert num == ujson.ujson_loads(ujson.ujson_dumps(num)) + assert num == simdjson.simdjson_loads(ujson.ujson_dumps(num)) num = -1e-45 - assert num == ujson.ujson_loads(ujson.ujson_dumps(num)) + assert num == simdjson.simdjson_loads(ujson.ujson_dumps(num)) num = -1e-145 - assert np.allclose(num, ujson.ujson_loads(ujson.ujson_dumps(num))) + assert np.allclose(num, simdjson.simdjson_loads(ujson.ujson_dumps(num))) @pytest.mark.parametrize("unicode_key", ["key1", "بن"]) def test_encode_dict_with_unicode_keys(self, unicode_key): unicode_dict = {unicode_key: "value1"} - assert unicode_dict == ujson.ujson_loads(ujson.ujson_dumps(unicode_dict)) + assert unicode_dict == simdjson.simdjson_loads(ujson.ujson_dumps(unicode_dict)) @pytest.mark.parametrize("double_input", [math.pi, -math.pi]) def test_encode_double_conversion(self, double_input): output = ujson.ujson_dumps(double_input) assert round(double_input, 5) == round(json.loads(output), 5) - assert round(double_input, 5) == round(ujson.ujson_loads(output), 5) + assert round(double_input, 5) == round(simdjson.simdjson_loads(output), 5) def test_encode_with_decimal(self): decimal_input = 1.0 @@ -157,28 +160,28 @@ def test_encode_array_of_nested_arrays(self): output = ujson.ujson_dumps(nested_input) assert nested_input == json.loads(output) - assert nested_input == ujson.ujson_loads(output) + assert nested_input == simdjson.simdjson_loads(output) def test_encode_array_of_doubles(self): doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 output = ujson.ujson_dumps(doubles_input) assert doubles_input == json.loads(output) - assert doubles_input == ujson.ujson_loads(output) + assert doubles_input == simdjson.simdjson_loads(output) def test_double_precision(self): double_input = 30.012345678901234 output = ujson.ujson_dumps(double_input, double_precision=15) assert double_input == json.loads(output) - assert double_input == ujson.ujson_loads(output) + assert double_input == simdjson.simdjson_loads(output) for double_precision in (3, 9): output = ujson.ujson_dumps(double_input, double_precision=double_precision) rounded_input = round(double_input, double_precision) assert rounded_input == json.loads(output) - assert rounded_input == ujson.ujson_loads(output) + assert rounded_input == simdjson.simdjson_loads(output) @pytest.mark.parametrize( "invalid_val", @@ -205,7 +208,7 @@ def test_encode_string_conversion2(self): output = ujson.ujson_dumps(string_input) assert string_input == json.loads(output) - assert string_input == ujson.ujson_loads(output) + assert string_input == simdjson.simdjson_loads(output) assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"' @pytest.mark.parametrize( @@ -214,7 +217,7 @@ def test_encode_string_conversion2(self): ) def test_encode_unicode_conversion(self, unicode_input): enc = ujson.ujson_dumps(unicode_input) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert enc == json.dumps(unicode_input) assert dec == json.loads(enc) @@ -222,7 +225,7 @@ def test_encode_unicode_conversion(self, unicode_input): def test_encode_control_escaping(self): escaped_input = "\x19" enc = ujson.ujson_dumps(escaped_input) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert escaped_input == dec assert enc == json.dumps(escaped_input) @@ -230,7 +233,7 @@ def test_encode_control_escaping(self): def test_encode_unicode_surrogate_pair(self): surrogate_input = "\xf0\x90\x8d\x86" enc = ujson.ujson_dumps(surrogate_input) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert enc == json.dumps(surrogate_input) assert dec == json.loads(enc) @@ -238,7 +241,7 @@ def test_encode_unicode_surrogate_pair(self): def test_encode_unicode_4bytes_utf8(self): four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.ujson_dumps(four_bytes_input) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert enc == json.dumps(four_bytes_input) assert dec == json.loads(enc) @@ -247,7 +250,7 @@ def test_encode_unicode_4bytes_utf8highest(self): four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" enc = ujson.ujson_dumps(four_bytes_input) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert enc == json.dumps(four_bytes_input) assert dec == json.loads(enc) @@ -267,7 +270,7 @@ def test_encode_array_in_array(self): assert arr_in_arr_input == json.loads(output) assert output == json.dumps(arr_in_arr_input) - assert arr_in_arr_input == ujson.ujson_loads(output) + assert arr_in_arr_input == simdjson.simdjson_loads(output) @pytest.mark.parametrize( "num_input", @@ -281,28 +284,28 @@ def test_encode_num_conversion(self, num_input): output = ujson.ujson_dumps(num_input) assert num_input == json.loads(output) assert output == json.dumps(num_input) - assert num_input == ujson.ujson_loads(output) + assert num_input == simdjson.simdjson_loads(output) def test_encode_list_conversion(self): list_input = [1, 2, 3, 4] output = ujson.ujson_dumps(list_input) assert list_input == json.loads(output) - assert list_input == ujson.ujson_loads(output) + assert list_input == simdjson.simdjson_loads(output) def test_encode_dict_conversion(self): dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} output = ujson.ujson_dumps(dict_input) assert dict_input == json.loads(output) - assert dict_input == ujson.ujson_loads(output) + assert dict_input == simdjson.simdjson_loads(output) @pytest.mark.parametrize("builtin_value", [None, True, False]) def test_encode_builtin_values_conversion(self, builtin_value): output = ujson.ujson_dumps(builtin_value) assert builtin_value == json.loads(output) assert output == json.dumps(builtin_value) - assert builtin_value == ujson.ujson_loads(output) + assert builtin_value == simdjson.simdjson_loads(output) def test_encode_datetime_conversion(self): datetime_input = datetime.datetime.fromtimestamp(time.time()) @@ -310,7 +313,7 @@ def test_encode_datetime_conversion(self): expected = calendar.timegm(datetime_input.utctimetuple()) assert int(expected) == json.loads(output) - assert int(expected) == ujson.ujson_loads(output) + assert int(expected) == simdjson.simdjson_loads(output) def test_encode_date_conversion(self): date_input = datetime.date.fromtimestamp(time.time()) @@ -320,7 +323,7 @@ def test_encode_date_conversion(self): expected = calendar.timegm(tup) assert int(expected) == json.loads(output) - assert int(expected) == ujson.ujson_loads(output) + assert int(expected) == simdjson.simdjson_loads(output) @pytest.mark.parametrize( "test", @@ -356,16 +359,16 @@ def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val).as_unit("ns") - roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="s")) + roundtrip = simdjson.simdjson_loads(ujson.ujson_dumps(val, date_unit="s")) assert roundtrip == stamp._value // 10**9 - roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ms")) + roundtrip = simdjson.simdjson_loads(ujson.ujson_dumps(val, date_unit="ms")) assert roundtrip == stamp._value // 10**6 - roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="us")) + roundtrip = simdjson.simdjson_loads(ujson.ujson_dumps(val, date_unit="us")) assert roundtrip == stamp._value // 10**3 - roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ns")) + roundtrip = simdjson.simdjson_loads(ujson.ujson_dumps(val, date_unit="ns")) assert roundtrip == stamp._value msg = "Invalid value 'foo' for option 'date_unit'" @@ -376,7 +379,7 @@ def test_encode_to_utf8(self): unencoded = "\xe6\x97\xa5\xd1\x88" enc = ujson.ujson_dumps(unencoded, ensure_ascii=False) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) assert enc == json.dumps(unencoded, ensure_ascii=False) assert dec == json.loads(enc) @@ -384,8 +387,8 @@ def test_encode_to_utf8(self): def test_decode_from_unicode(self): unicode_input = '{"obj": 31337}' - dec1 = ujson.ujson_loads(unicode_input) - dec2 = ujson.ujson_loads(str(unicode_input)) + dec1 = simdjson.simdjson_loads(unicode_input) + dec2 = simdjson.simdjson_loads(str(unicode_input)) assert dec1 == dec2 @@ -409,7 +412,7 @@ def test_decode_jibberish(self): jibberish = "fdsa sda v9sa fdsa" msg = "Unexpected character found when decoding 'false'" with pytest.raises(ValueError, match=msg): - ujson.ujson_loads(jibberish) + simdjson.simdjson_loads(jibberish) @pytest.mark.parametrize( "broken_json", @@ -423,12 +426,12 @@ def test_decode_jibberish(self): def test_decode_broken_json(self, broken_json): msg = "Expected object or value" with pytest.raises(ValueError, match=msg): - ujson.ujson_loads(broken_json) + simdjson.simdjson_loads(broken_json) @pytest.mark.parametrize("too_big_char", ["[", "{"]) def test_decode_depth_too_big(self, too_big_char): with pytest.raises(ValueError, match="Reached object decoding depth limit"): - ujson.ujson_loads(too_big_char * (1024 * 1024)) + simdjson.simdjson_loads(too_big_char * (1024 * 1024)) @pytest.mark.parametrize( "bad_string", @@ -446,7 +449,7 @@ def test_decode_bad_string(self, bad_string): "Unmatched ''\"' when when decoding 'string'" ) with pytest.raises(ValueError, match=msg): - ujson.ujson_loads(bad_string) + simdjson.simdjson_loads(bad_string) @pytest.mark.parametrize( "broken_json, err_msg", @@ -462,7 +465,7 @@ def test_decode_bad_string(self, bad_string): def test_decode_broken_json_leak(self, broken_json, err_msg): for _ in range(1000): with pytest.raises(ValueError, match=re.escape(err_msg)): - ujson.ujson_loads(broken_json) + simdjson.simdjson_loads(broken_json) @pytest.mark.parametrize( "invalid_dict", @@ -479,11 +482,11 @@ def test_decode_invalid_dict(self, invalid_dict): "Expected object or value" ) with pytest.raises(ValueError, match=msg): - ujson.ujson_loads(invalid_dict) + simdjson.simdjson_loads(invalid_dict) @pytest.mark.parametrize("numeric_int_as_str", ["31337", "-31337"]) def test_decode_numeric_int(self, numeric_int_as_str): - assert int(numeric_int_as_str) == ujson.ujson_loads(numeric_int_as_str) + assert int(numeric_int_as_str) == simdjson.simdjson_loads(numeric_int_as_str) def test_encode_null_character(self): wrapped_input = "31337 \x00 1337" @@ -491,19 +494,19 @@ def test_encode_null_character(self): assert wrapped_input == json.loads(output) assert output == json.dumps(wrapped_input) - assert wrapped_input == ujson.ujson_loads(output) + assert wrapped_input == simdjson.simdjson_loads(output) alone_input = "\x00" output = ujson.ujson_dumps(alone_input) assert alone_input == json.loads(output) assert output == json.dumps(alone_input) - assert alone_input == ujson.ujson_loads(output) + assert alone_input == simdjson.simdjson_loads(output) assert '" \\u0000\\r\\n "' == ujson.ujson_dumps(" \u0000\r\n ") def test_decode_null_character(self): wrapped_input = '"31337 \\u0000 31337"' - assert ujson.ujson_loads(wrapped_input) == json.loads(wrapped_input) + assert simdjson.simdjson_loads(wrapped_input) == json.loads(wrapped_input) def test_encode_list_long_conversion(self): long_input = [ @@ -517,7 +520,7 @@ def test_encode_list_long_conversion(self): output = ujson.ujson_dumps(long_input) assert long_input == json.loads(output) - assert long_input == ujson.ujson_loads(output) + assert long_input == simdjson.simdjson_loads(output) @pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615]) def test_encode_long_conversion(self, long_input): @@ -525,7 +528,7 @@ def test_encode_long_conversion(self, long_input): assert long_input == json.loads(output) assert output == json.dumps(long_input) - assert long_input == ujson.ujson_loads(output) + assert long_input == simdjson.simdjson_loads(output) @pytest.mark.parametrize("bigNum", [2**64, -(2**63) - 1]) def test_dumps_ints_larger_than_maxsize(self, bigNum): @@ -536,25 +539,25 @@ def test_dumps_ints_larger_than_maxsize(self, bigNum): ValueError, match="Value is too big|Value is too small", ): - assert ujson.ujson_loads(encoding) == bigNum + assert simdjson.simdjson_loads(encoding) == bigNum @pytest.mark.parametrize( "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"] ) def test_decode_numeric_int_exp(self, int_exp): - assert ujson.ujson_loads(int_exp) == json.loads(int_exp) + assert simdjson.simdjson_loads(int_exp) == json.loads(int_exp) def test_loads_non_str_bytes_raises(self): msg = "a bytes-like object is required, not 'NoneType'" with pytest.raises(TypeError, match=msg): - ujson.ujson_loads(None) + simdjson.simdjson_loads(None) @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1]) def test_decode_number_with_32bit_sign_bit(self, val): # Test that numbers that fit within 32 bits but would have the # sign bit set (2**31 <= x < 2**32) are decoded properly. doc = f'{{"id": {val}}}' - assert ujson.ujson_loads(doc)["id"] == val + assert simdjson.simdjson_loads(doc)["id"] == val def test_encode_big_escape(self): # Make sure no Exception is raised. @@ -570,7 +573,7 @@ def test_decode_big_escape(self): quote = b'"' escape_input = quote + (base * 1024 * 1024 * 2) + quote - ujson.ujson_loads(escape_input) + simdjson.simdjson_loads(escape_input) def test_to_dict(self): d = {"key": 31337} @@ -582,7 +585,7 @@ def toDict(self): o = DictTest() output = ujson.ujson_dumps(o) - dec = ujson.ujson_loads(output) + dec = simdjson.simdjson_loads(output) assert dec == d def test_default_handler(self): @@ -619,7 +622,7 @@ def my_int_handler(_): return 42 assert ( - ujson.ujson_loads( + simdjson.simdjson_loads( ujson.ujson_dumps(_TestObject("foo"), default_handler=my_int_handler) ) == 42 @@ -628,14 +631,14 @@ def my_int_handler(_): def my_obj_handler(_): return datetime.datetime(2013, 2, 3) - assert ujson.ujson_loads( + assert simdjson.simdjson_loads( ujson.ujson_dumps(datetime.datetime(2013, 2, 3)) - ) == ujson.ujson_loads( + ) == simdjson.simdjson_loads( ujson.ujson_dumps(_TestObject("foo"), default_handler=my_obj_handler) ) obj_list = [_TestObject("foo"), _TestObject("bar")] - assert json.loads(json.dumps(obj_list, default=str)) == ujson.ujson_loads( + assert json.loads(json.dumps(obj_list, default=str)) == simdjson.simdjson_loads( ujson.ujson_dumps(obj_list, default_handler=str) ) @@ -652,7 +655,7 @@ def e(self): # JSON keys should be all non-callable non-underscore attributes, see GH-42768 test_object = _TestObject(a=1, b=2, _c=3, d=4) - assert ujson.ujson_loads(ujson.ujson_dumps(test_object)) == { + assert simdjson.simdjson_loads(ujson.ujson_dumps(test_object)) == { "a": 1, "b": 2, "d": 4, @@ -667,27 +670,30 @@ class TestNumpyJSONTests: @pytest.mark.parametrize("bool_input", [True, False]) def test_bool(self, bool_input): b = bool(bool_input) - assert ujson.ujson_loads(ujson.ujson_dumps(b)) == b + assert simdjson.simdjson_loads(ujson.ujson_dumps(b)) == b def test_bool_array(self): bool_array = np.array( [True, False, True, True, False, True, False, False], dtype=bool ) - output = np.array(ujson.ujson_loads(ujson.ujson_dumps(bool_array)), dtype=bool) + output = np.array( + simdjson.simdjson_loads(ujson.ujson_dumps(bool_array)), dtype=bool + ) tm.assert_numpy_array_equal(bool_array, output) def test_int(self, any_int_numpy_dtype): klass = np.dtype(any_int_numpy_dtype).type num = klass(1) - assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num + assert klass(simdjson.simdjson_loads(ujson.ujson_dumps(num))) == num def test_int_array(self, any_int_numpy_dtype): arr = np.arange(100, dtype=int) arr_input = arr.astype(any_int_numpy_dtype) arr_output = np.array( - ujson.ujson_loads(ujson.ujson_dumps(arr_input)), dtype=any_int_numpy_dtype + simdjson.simdjson_loads(ujson.ujson_dumps(arr_input)), + dtype=any_int_numpy_dtype, ) tm.assert_numpy_array_equal(arr_input, arr_output) @@ -704,20 +710,22 @@ def test_int_max(self, any_int_numpy_dtype): else: num = np.iinfo(any_int_numpy_dtype).max - assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num + assert klass(simdjson.simdjson_loads(ujson.ujson_dumps(num))) == num def test_float(self, float_numpy_dtype): klass = np.dtype(float_numpy_dtype).type num = klass(256.2013) - assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num + assert klass(simdjson.simdjson_loads(ujson.ujson_dumps(num))) == num def test_float_array(self, float_numpy_dtype): arr = np.arange(12.5, 185.72, 1.7322, dtype=float) float_input = arr.astype(float_numpy_dtype) float_output = np.array( - ujson.ujson_loads(ujson.ujson_dumps(float_input, double_precision=15)), + simdjson.simdjson_loads( + ujson.ujson_dumps(float_input, double_precision=15) + ), dtype=float_numpy_dtype, ) tm.assert_almost_equal(float_input, float_output) @@ -727,7 +735,8 @@ def test_float_max(self, float_numpy_dtype): num = klass(np.finfo(float_numpy_dtype).max / 10) tm.assert_almost_equal( - klass(ujson.ujson_loads(ujson.ujson_dumps(num, double_precision=15))), num + klass(simdjson.simdjson_loads(ujson.ujson_dumps(num, double_precision=15))), + num, ) def test_array_basic(self): @@ -735,7 +744,7 @@ def test_array_basic(self): arr = arr.reshape((2, 2, 2, 2, 3, 2)) tm.assert_numpy_array_equal( - np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr + np.array(simdjson.simdjson_loads(ujson.ujson_dumps(arr))), arr ) @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)]) @@ -744,7 +753,7 @@ def test_array_reshaped(self, shape): arr = arr.reshape(shape) tm.assert_numpy_array_equal( - np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr + np.array(simdjson.simdjson_loads(ujson.ujson_dumps(arr))), arr ) def test_array_list(self): @@ -760,7 +769,7 @@ def test_array_list(self): {"key": "val"}, ] arr = np.array(arr_list, dtype=object) - result = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=object) + result = np.array(simdjson.simdjson_loads(ujson.ujson_dumps(arr)), dtype=object) tm.assert_numpy_array_equal(result, arr) def test_array_float(self): @@ -769,7 +778,7 @@ def test_array_float(self): arr = np.arange(100.202, 200.202, 1, dtype=dtype) arr = arr.reshape((5, 5, 4)) - arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype) + arr_out = np.array(simdjson.simdjson_loads(ujson.ujson_dumps(arr)), dtype=dtype) tm.assert_almost_equal(arr, arr_out) def test_0d_array(self): @@ -801,7 +810,7 @@ def test_dataframe(self, orient): encode_kwargs = {} if orient is None else {"orient": orient} assert (df.dtypes == dtype).all() - output = ujson.ujson_loads(ujson.ujson_dumps(df, **encode_kwargs)) + output = simdjson.simdjson_loads(ujson.ujson_dumps(df, **encode_kwargs)) assert (df.dtypes == dtype).all() # Ensure proper DataFrame initialization. @@ -832,10 +841,10 @@ def test_dataframe_nested(self, orient): kwargs = {} if orient is None else {"orient": orient} exp = { - "df1": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)), - "df2": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)), + "df1": simdjson.simdjson_loads(ujson.ujson_dumps(df, **kwargs)), + "df2": simdjson.simdjson_loads(ujson.ujson_dumps(df, **kwargs)), } - assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp + assert simdjson.simdjson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp def test_series(self, orient): dtype = np.int64 @@ -849,7 +858,7 @@ def test_series(self, orient): encode_kwargs = {} if orient is None else {"orient": orient} - output = ujson.ujson_loads(ujson.ujson_dumps(s, **encode_kwargs)) + output = simdjson.simdjson_loads(ujson.ujson_dumps(s, **encode_kwargs)) assert s.dtype == dtype if orient == "split": @@ -877,19 +886,19 @@ def test_series_nested(self, orient): kwargs = {} if orient is None else {"orient": orient} exp = { - "s1": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)), - "s2": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)), + "s1": simdjson.simdjson_loads(ujson.ujson_dumps(s, **kwargs)), + "s2": simdjson.simdjson_loads(ujson.ujson_dumps(s, **kwargs)), } - assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp + assert simdjson.simdjson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp def test_index(self): i = Index([23, 45, 18, 98, 43, 11], name="index") # Column indexed. - output = Index(ujson.ujson_loads(ujson.ujson_dumps(i)), name="index") + output = Index(simdjson.simdjson_loads(ujson.ujson_dumps(i)), name="index") tm.assert_index_equal(i, output) - dec = _clean_dict(ujson.ujson_loads(ujson.ujson_dumps(i, orient="split"))) + dec = _clean_dict(simdjson.simdjson_loads(ujson.ujson_dumps(i, orient="split"))) output = Index(**dec) tm.assert_index_equal(i, output) @@ -899,17 +908,18 @@ def test_index(self): assert i.name == output.name output = Index( - ujson.ujson_loads(ujson.ujson_dumps(i, orient="values")), name="index" + simdjson.simdjson_loads(ujson.ujson_dumps(i, orient="values")), name="index" ) tm.assert_index_equal(i, output) output = Index( - ujson.ujson_loads(ujson.ujson_dumps(i, orient="records")), name="index" + simdjson.simdjson_loads(ujson.ujson_dumps(i, orient="records")), + name="index", ) tm.assert_index_equal(i, output) output = Index( - ujson.ujson_loads(ujson.ujson_dumps(i, orient="index")), name="index" + simdjson.simdjson_loads(ujson.ujson_dumps(i, orient="index")), name="index" ) tm.assert_index_equal(i, output) @@ -922,11 +932,13 @@ def test_datetime_index(self): ) encoded = ujson.ujson_dumps(rng, date_unit=date_unit) - decoded = DatetimeIndex(np.array(ujson.ujson_loads(encoded))) + decoded = DatetimeIndex(np.array(simdjson.simdjson_loads(encoded))) tm.assert_index_equal(rng, decoded) ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - decoded = Series(ujson.ujson_loads(ujson.ujson_dumps(ts, date_unit=date_unit))) + decoded = Series( + simdjson.simdjson_loads(ujson.ujson_dumps(ts, date_unit=date_unit)) + ) idx_values = decoded.index.values.astype(np.int64) decoded.index = DatetimeIndex(idx_values) @@ -947,15 +959,15 @@ def test_decode_invalid_array(self, invalid_arr): "Unexpected character found when decoding array value" ) with pytest.raises(ValueError, match=msg): - ujson.ujson_loads(invalid_arr) + simdjson.simdjson_loads(invalid_arr) @pytest.mark.parametrize("arr", [[], [31337]]) def test_decode_array(self, arr): - assert arr == ujson.ujson_loads(str(arr)) + assert arr == simdjson.simdjson_loads(str(arr)) @pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808]) def test_decode_extreme_numbers(self, extreme_num): - assert extreme_num == ujson.ujson_loads(str(extreme_num)) + assert extreme_num == simdjson.simdjson_loads(str(extreme_num)) @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-(2**63) - 1}"]) def test_decode_too_extreme_numbers(self, too_extreme_num): @@ -963,14 +975,14 @@ def test_decode_too_extreme_numbers(self, too_extreme_num): ValueError, match="Value is too big|Value is too small", ): - ujson.ujson_loads(too_extreme_num) + simdjson.simdjson_loads(too_extreme_num) def test_decode_with_trailing_whitespaces(self): - assert {} == ujson.ujson_loads("{}\n\t ") + assert {} == simdjson.simdjson_loads("{}\n\t ") def test_decode_with_trailing_non_whitespaces(self): with pytest.raises(ValueError, match="Trailing data"): - ujson.ujson_loads("{}\n\t a") + simdjson.simdjson_loads("{}\n\t a") @pytest.mark.parametrize("value", [f"{2**64}", f"{-(2**63) - 1}"]) def test_decode_array_with_big_int(self, value): @@ -978,7 +990,7 @@ def test_decode_array_with_big_int(self, value): ValueError, match="Value is too big|Value is too small", ): - ujson.ujson_loads(value) + simdjson.simdjson_loads(value) @pytest.mark.parametrize( "float_number", @@ -998,7 +1010,7 @@ def test_decode_array_with_big_int(self, value): def test_decode_floating_point(self, sign, float_number): float_number *= sign tm.assert_almost_equal( - float_number, ujson.ujson_loads(str(float_number)), rtol=1e-15 + float_number, simdjson.simdjson_loads(str(float_number)), rtol=1e-15 ) def test_encode_big_set(self): @@ -1012,7 +1024,7 @@ def test_encode_empty_set(self): def test_encode_set(self): s = {1, 2, 3, 4, 5, 6, 7, 8, 9} enc = ujson.ujson_dumps(s) - dec = ujson.ujson_loads(enc) + dec = simdjson.simdjson_loads(enc) for v in dec: assert v in s From 2360026ea2a20511f4a21d5b71d0b9e1f0703201 Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 15:54:29 -0300 Subject: [PATCH 3/8] fix: fix gcc warnings --- pandas/_libs/src/parser/json.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/parser/json.cpp b/pandas/_libs/src/parser/json.cpp index a7ca4baa94738..e67b471512f30 100644 --- a/pandas/_libs/src/parser/json.cpp +++ b/pandas/_libs/src/parser/json.cpp @@ -59,20 +59,22 @@ static PyObject *big_int_to_pylong(ondemand::value element) { static PyObject *json_number_to_pyobject(ondemand::value element) { ondemand::number num = element.get_number(); + PyObject *result; switch (num.get_number_type()) { case ondemand::number_type::signed_integer: - return PyLong_FromLongLong(num.get_int64()); + result = PyLong_FromLongLong(num.get_int64()); break; case ondemand::number_type::unsigned_integer: - return PyLong_FromUnsignedLongLong(num.get_uint64()); + result = PyLong_FromUnsignedLongLong(num.get_uint64()); break; case ondemand::number_type::floating_point_number: - return PyFloat_FromDouble(num.get_double()); + result = PyFloat_FromDouble(num.get_double()); break; case ondemand::number_type::big_integer: - return big_int_to_pylong(element); + result = big_int_to_pylong(element); break; } + return result; } static PyObject *json_str_to_pyobject(ondemand::value element) { @@ -124,7 +126,7 @@ PyObject *json_loads(PyObject *Py_UNUSED(self), PyObject *args, return NULL; } - PyObject *ret; + PyObject *ret = NULL; try { simdjson::padded_string padded_json(buf, len); simdjson::ondemand::document doc = From e2751a1ed2dbeffa6d1a284764350a731e95b5c6 Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 15:58:33 -0300 Subject: [PATCH 4/8] fix: remove fallback --- pandas/_libs/src/parser/json.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/parser/json.cpp b/pandas/_libs/src/parser/json.cpp index e67b471512f30..ab4dea3a40f1b 100644 --- a/pandas/_libs/src/parser/json.cpp +++ b/pandas/_libs/src/parser/json.cpp @@ -132,13 +132,13 @@ PyObject *json_loads(PyObject *Py_UNUSED(self), PyObject *args, simdjson::ondemand::document doc = pandas::json::parser.iterate(padded_json); switch (doc.type()) { - case simdjson::fallback::ondemand::json_type::null: + case simdjson::ondemand::json_type::null: ret = Py_None; break; - case simdjson::fallback::ondemand::json_type::boolean: + case simdjson::ondemand::json_type::boolean: ret = doc.get_bool() ? Py_True : Py_False; break; - case simdjson::fallback::ondemand::json_type::number: { + case simdjson::ondemand::json_type::number: { simdjson::ondemand::number num = doc.get_number(); switch (num.get_number_type()) { case simdjson::ondemand::number_type::signed_integer: @@ -156,7 +156,7 @@ PyObject *json_loads(PyObject *Py_UNUSED(self), PyObject *args, } break; } - case simdjson::fallback::ondemand::json_type::string: { + case simdjson::ondemand::json_type::string: { std::string_view s = doc.get_string(); ret = PyUnicode_FromStringAndSize(s.data(), s.size()); break; From cbfacda07d6a63ec79b90bb60ed6205ed507eeac Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 17:36:49 -0300 Subject: [PATCH 5/8] fix: follow meson docs for subproject --- pandas/_libs/meson.build | 5 +---- pyproject.toml | 5 ++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index ef7736a96ea62..d5b7da3e93d43 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -1,7 +1,4 @@ -simdjson_proj = subproject( - 'simdjson', - default_options: ['default_library=static'], -) +simdjson_proj = subproject('simdjson') simdjson_dep = simdjson_proj.get_variable('simdjson_dep') _algos_take_helper = custom_target( diff --git a/pyproject.toml b/pyproject.toml index c859343a94516..7241ff4c17fe1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -141,7 +141,10 @@ tag_prefix = "v" parentdir_prefix = "pandas-" [tool.meson-python.args] -setup = ['--vsenv'] # For Windows +setup = [ + '--vsenv', # For Windows + '--default-library=static' +] install = ['--skip-subprojects'] [tool.cibuildwheel] From cef7b6fbb005b255f4c497bd42602aadc6fdb07b Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 17:54:47 -0300 Subject: [PATCH 6/8] fix: bump meson-python to respect `--skip-subprojects` --- .github/workflows/unit-tests.yml | 6 +++--- ci/deps/actions-311-minimum_versions.yaml | 2 +- ci/deps/actions-311.yaml | 2 +- ci/deps/actions-312.yaml | 2 +- ci/deps/actions-313-downstream_compat.yaml | 2 +- ci/deps/actions-313-numpydev.yaml | 2 +- ci/deps/actions-313-pyarrownightly.yaml | 2 +- ci/deps/actions-313.yaml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- requirements-dev.txt | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8406d167a41ca..2a89a1d880d48 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -240,7 +240,7 @@ jobs: run: | /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate - python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 + python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.14.0 python -m pip install numpy -Csetup-args="-Dallow-noblas=true" python -m pip install --no-cache-dir versioneer[toml] cython==3.0.10 python-dateutil pytest>=8.3.4 pytest-xdist>=3.6.1 hypothesis>=6.116.0 python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" @@ -278,7 +278,7 @@ jobs: run: | /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate - python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 + python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.14.0 meson[ninja]==1.2.1 python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=8.3.4 pytest-xdist>=3.6.1 hypothesis>=6.116.0 python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" python -m pip list --no-cache-dir @@ -348,7 +348,7 @@ jobs: - name: Build Environment run: | python --version - python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 + python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.14.0 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.116.0 pytest>=8.3.4 pytest-xdist>=3.6.1 pytest-cov python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" diff --git a/ci/deps/actions-311-minimum_versions.yaml b/ci/deps/actions-311-minimum_versions.yaml index 70d1363bb2fed..1596af37edc21 100644 --- a/ci/deps/actions-311-minimum_versions.yaml +++ b/ci/deps/actions-311-minimum_versions.yaml @@ -10,7 +10,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index b33e631ee2eee..95be0e897e7ea 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -8,7 +8,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index 063b72d990988..5cada579810f1 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -8,7 +8,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/ci/deps/actions-313-downstream_compat.yaml b/ci/deps/actions-313-downstream_compat.yaml index ac12811bbdfbd..c7df77732f967 100644 --- a/ci/deps/actions-313-downstream_compat.yaml +++ b/ci/deps/actions-313-downstream_compat.yaml @@ -9,7 +9,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/ci/deps/actions-313-numpydev.yaml b/ci/deps/actions-313-numpydev.yaml index 7810ef3f3540b..93d2f1508e870 100644 --- a/ci/deps/actions-313-numpydev.yaml +++ b/ci/deps/actions-313-numpydev.yaml @@ -7,7 +7,7 @@ dependencies: # build dependencies - versioneer - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 - cython<4.0.0a0 # test dependencies diff --git a/ci/deps/actions-313-pyarrownightly.yaml b/ci/deps/actions-313-pyarrownightly.yaml index be7fade7f3900..00976b6ad53f4 100644 --- a/ci/deps/actions-313-pyarrownightly.yaml +++ b/ci/deps/actions-313-pyarrownightly.yaml @@ -8,7 +8,7 @@ dependencies: - versioneer - meson=1.2.1 - cython<4.0.0a0 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/ci/deps/actions-313.yaml b/ci/deps/actions-313.yaml index 414b11aacdfc2..6f104506e6e9c 100644 --- a/ci/deps/actions-313.yaml +++ b/ci/deps/actions-313.yaml @@ -8,7 +8,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/environment.yml b/environment.yml index 0d917be67e0c5..8db4dcbf66ab8 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - versioneer - cython<4.0.0a0 - meson=1.2.1 - - meson-python=0.13.1 + - meson-python=0.14.0 # test dependencies - pytest>=8.3.4 diff --git a/pyproject.toml b/pyproject.toml index 7241ff4c17fe1..a7efeba34fc6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ # Minimum requirements for the build system to execute. # See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ - "meson-python>=0.13.1", + "meson-python>=0.14.0", "meson>=1.2.1,<2", "wheel", "Cython<4.0.0a0", # Note: sync with setup.py, environment.yml and asv.conf.json diff --git a/requirements-dev.txt b/requirements-dev.txt index 87384ee738cf3..72e35e62d432e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,7 +5,7 @@ pip versioneer[toml] cython<4.0.0a0 meson[ninja]==1.2.1 -meson-python==0.13.1 +meson-python==0.14.0 pytest>=8.3.4 pytest-cov pytest-xdist>=3.6.1 From 1fac6f9da14670d71e9793ab0b04c398bf55efbe Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 18:53:25 -0300 Subject: [PATCH 7/8] fix: cast kwargs to char ** for older python --- pandas/_libs/src/parser/json.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/parser/json.cpp b/pandas/_libs/src/parser/json.cpp index ab4dea3a40f1b..49a12369a67ea 100644 --- a/pandas/_libs/src/parser/json.cpp +++ b/pandas/_libs/src/parser/json.cpp @@ -121,8 +121,10 @@ PyObject *json_loads(PyObject *Py_UNUSED(self), PyObject *args, const char *buf; Py_ssize_t len; int *precise_float; // Unused. It's declared for compatibility with old parser - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|b", kwlist, &buf, &len, - &precise_float)) { + if (!PyArg_ParseTupleAndKeywords( + args, kwargs, "s#|b", + const_cast(kwlist), // cast for python<3.13 + &buf, &len, &precise_float)) { return NULL; } From 91528fcb267c0a9b14a84cd133c99749ac35c229 Mon Sep 17 00:00:00 2001 From: Alvaro-Kothe Date: Sun, 30 Nov 2025 19:29:46 -0300 Subject: [PATCH 8/8] fix: use c++20 standard because of windows --- meson.build | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 156dbb6c63e7c..ab1838b8a09c7 100644 --- a/meson.build +++ b/meson.build @@ -7,7 +7,12 @@ project( version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(), license: 'BSD-3', meson_version: '>=1.2.1', - default_options: ['buildtype=release', 'c_std=c11', 'warning_level=2'], + default_options: [ + 'buildtype=release', + 'c_std=c11', + 'cpp_std=c++20', + 'warning_level=2', + ], ) fs = import('fs')