diff --git a/.github/workflows/big_endian.yml b/.github/workflows/big_endian.yml index a54c9938..d6824d6e 100644 --- a/.github/workflows/big_endian.yml +++ b/.github/workflows/big_endian.yml @@ -22,7 +22,7 @@ permissions: jobs: big_endian_tests: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 continue-on-error: true strategy: fail-fast: false diff --git a/quaddtype/meson.build b/quaddtype/meson.build index 78a94867..6ad8841a 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -25,8 +25,9 @@ if sleef_dep.found() and sleef_dep.type_name() != 'internal' # SLEEF found system-wide - verify quad-precision support cpp = meson.get_compiler('cpp') sleefquad_lib = cpp.find_library('sleefquad', required: false) + tlfloat_lib = cpp.find_library('tlfloat', required: false) - if sleefquad_lib.found() + if sleefquad_lib.found() and tlfloat_lib.found() sleefquad_test_code = ''' #include @@ -40,7 +41,7 @@ if sleef_dep.found() and sleef_dep.type_name() != 'internal' # this should compile and link quad_works = cpp.links( sleefquad_test_code, - dependencies: [sleef_dep, sleefquad_lib], + dependencies: [sleef_dep, sleefquad_lib, tlfloat_lib], name: 'SLEEF quad-precision support' ) @@ -48,6 +49,9 @@ if sleef_dep.found() and sleef_dep.type_name() != 'internal' sleefquad_dep = declare_dependency( dependencies: [sleef_dep, sleefquad_lib] ) + tlfloat_dep = declare_dependency( + dependencies: [tlfloat_lib] + ) use_system_sleef = true else fallback_reason = 'System-wide SLEEF installation found but a test for quad precision support failed.' @@ -65,6 +69,7 @@ else sleef_subproj = subproject('sleef') sleef_dep = sleef_subproj.get_variable('sleef_dep') sleefquad_dep = sleef_subproj.get_variable('sleefquad_dep') + tlfloat_dep = sleef_subproj.get_variable('tlfloat_dep') warning(fallback_reason) message('Proceeding with vendored SLEEF subproject instead') endif @@ -84,7 +89,7 @@ message('Using NumPy version: @0@'.format(numpy_version)) npymath_path = incdir_numpy / '..' / 'lib' npymath_lib = c.find_library('npymath', dirs: npymath_path) -dependencies = [py_dep, qblas_dep, sleef_dep, sleefquad_dep, npymath_lib] +dependencies = [py_dep, qblas_dep, sleef_dep, sleefquad_dep, tlfloat_dep, npymath_lib] # Add OpenMP dependency (optional, for threading) openmp_dep = dependency('openmp', required: false, static: false) @@ -99,6 +104,11 @@ if not is_windows if cpp.has_argument('-fext-numeric-literals') add_project_arguments('-fext-numeric-literals', language: 'cpp') endif + + # Suppress warnings from system headers (sleefquad.h has many unused inline functions) + if cpp.has_argument('-Wno-unused-function') + add_project_arguments('-Wno-unused-function', language: ['c', 'cpp']) + endif endif # Thread-local storage detection (borrowed from NumPy) diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c index dac8fa0b..b9a896c8 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.c +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -26,6 +26,11 @@ Modifications are specific to support the SLEEF_QUAD #include "scalar.h" +// Undefine NPY_TLS if already defined (avoid redefinition warning) +#ifdef NPY_TLS + #undef NPY_TLS +#endif + #ifdef __cplusplus #define NPY_TLS thread_local #elif defined(HAVE_THREAD_LOCAL) @@ -2005,8 +2010,6 @@ PyObject * Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int pad_right) { - npy_double v; - if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) { QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; if (quad_obj->backend == BACKEND_SLEEF) { @@ -2028,8 +2031,6 @@ PyObject * Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int exp_digits) { - npy_double val; - if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) { QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; if (quad_obj->backend == BACKEND_SLEEF) { diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index 8c902171..43a87963 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -13,13 +13,13 @@ typedef Sleef_quad (*unary_op_quad_def)(const Sleef_quad *); // Unary Quad operations with 2 outputs (for modf, frexp) typedef void (*unary_op_2out_quad_def)(const Sleef_quad *, Sleef_quad *, Sleef_quad *); -static Sleef_quad +[[maybe_unused]] static Sleef_quad quad_negative(const Sleef_quad *op) { return Sleef_negq1(*op); } -static Sleef_quad +[[maybe_unused]] static Sleef_quad quad_positive(const Sleef_quad *op) { return *op; @@ -929,7 +929,7 @@ static inline Sleef_quad quad_set_words64(int64_t hx, uint64_t lx) static inline Sleef_quad quad_nextafter(const Sleef_quad *x, const Sleef_quad *y) { - int64_t hx, hy, ix, iy; + int64_t hx, hy, ix; uint64_t lx, ly; quad_get_words64(&hx, &lx, *x); @@ -937,7 +937,7 @@ quad_nextafter(const Sleef_quad *x, const Sleef_quad *y) // extracting absolute value ix = hx & 0x7fffffffffffffffLL; - iy = hy & 0x7fffffffffffffffLL; + (void)ly; // unused but needed for quad_get_words64 // NaN if either is NaN if (Sleef_iunordq1(*x, *y)) { diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index eae25690..8c257dbc 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -191,12 +191,21 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) else if (PyUnicode_Check(value)) { const char *s = PyUnicode_AsUTF8(value); char *endptr = NULL; - int err = cstring_to_quad(s, backend, &self->value, &endptr, true); + int err = NumPyOS_ascii_strtoq(s, backend, &self->value, &endptr); if (err < 0) { PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); Py_DECREF(self); return NULL; } + // Skip trailing whitespace (matches Python's float() behavior) + while (ascii_isspace(*endptr)) { + endptr++; + } + if (*endptr != '\0') { + PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); + Py_DECREF(self); + return NULL; + } } else if (PyBytes_Check(value)) { const char *s = PyBytes_AsString(value); @@ -205,12 +214,21 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) return NULL; } char *endptr = NULL; - int err = cstring_to_quad(s, backend, &self->value, &endptr, true); + int err = NumPyOS_ascii_strtoq(s, backend, &self->value, &endptr); if (err < 0) { PyErr_SetString(PyExc_ValueError, "Unable to parse bytes to QuadPrecision"); Py_DECREF(self); return NULL; } + // Skip trailing whitespace (matches Python's float() behavior) + while (ascii_isspace(*endptr)) { + endptr++; + } + if (*endptr != '\0') { + PyErr_SetString(PyExc_ValueError, "Unable to parse bytes to QuadPrecision"); + Py_DECREF(self); + return NULL; + } } else if (PyLong_Check(value)) { return quad_from_py_int(value, backend, self); @@ -320,19 +338,6 @@ QuadPrecision_str(QuadPrecisionObject *self) return PyUnicode_FromString(buffer); } -static PyObject * -QuadPrecision_repr(QuadPrecisionObject *self) -{ - PyObject *str = QuadPrecision_str(self); - if (str == NULL) { - return NULL; - } - const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; - PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str); - Py_DECREF(str); - return res; -} - static PyObject * QuadPrecision_repr_dragon4(QuadPrecisionObject *self) { diff --git a/quaddtype/numpy_quaddtype/src/umath/umath.cpp b/quaddtype/numpy_quaddtype/src/umath/umath.cpp index 0c72d32e..a075c4b8 100644 --- a/quaddtype/numpy_quaddtype/src/umath/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath/umath.cpp @@ -25,60 +25,6 @@ extern "C" { #include "comparison_ops.h" #include "matmul.h" -// helper debugging function -static const char * -get_dtype_name(PyArray_DTypeMeta *dtype) -{ - if (dtype == &QuadPrecDType) { - return "QuadPrecDType"; - } - else if (dtype == &PyArray_BoolDType) { - return "BoolDType"; - } - else if (dtype == &PyArray_ByteDType) { - return "ByteDType"; - } - else if (dtype == &PyArray_UByteDType) { - return "UByteDType"; - } - else if (dtype == &PyArray_ShortDType) { - return "ShortDType"; - } - else if (dtype == &PyArray_UShortDType) { - return "UShortDType"; - } - else if (dtype == &PyArray_IntDType) { - return "IntDType"; - } - else if (dtype == &PyArray_UIntDType) { - return "UIntDType"; - } - else if (dtype == &PyArray_LongDType) { - return "LongDType"; - } - else if (dtype == &PyArray_ULongDType) { - return "ULongDType"; - } - else if (dtype == &PyArray_LongLongDType) { - return "LongLongDType"; - } - else if (dtype == &PyArray_ULongLongDType) { - return "ULongLongDType"; - } - else if (dtype == &PyArray_FloatDType) { - return "FloatDType"; - } - else if (dtype == &PyArray_DoubleDType) { - return "DoubleDType"; - } - else if (dtype == &PyArray_LongDoubleDType) { - return "LongDoubleDType"; - } - else { - return "UnknownDType"; - } -} - int init_quad_umath(void) { diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index 6690605c..e45595d4 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -134,12 +134,17 @@ NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_val } } - // Set NaN value (sign is ignored for NaN) + // Set NaN value with sign preserved if (backend == BACKEND_SLEEF) { - out_value->sleef_value = QUAD_PRECISION_NAN; + Sleef_quad nan_val = QUAD_PRECISION_NAN; + // Apply sign to NaN (negative NaN has sign bit set) + if (sign < 0) { + nan_val = Sleef_negq1(nan_val); + } + out_value->sleef_value = nan_val; } else { - out_value->longdouble_value = nanl(""); + out_value->longdouble_value = sign < 0 ? -nanl("") : nanl(""); } if (endptr) { @@ -157,15 +162,98 @@ int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_va char **endptr, bool require_full_parse) { if(backend == BACKEND_SLEEF) { - out_value->sleef_value = Sleef_strtoq(str, endptr); + // SLEEF 4.0's Sleef_strtoq doesn't properly set endptr to indicate + // where parsing stopped. It always sets endptr to the end of the string. + // We need to manually validate and track the parse position. + + const char *p = str; + + // Skip leading whitespace + while (ascii_isspace(*p)) { + p++; + } + + // Handle optional sign + if (*p == '+' || *p == '-') { + p++; + } + + // Must have at least one digit or decimal point followed by digit + int has_digits = 0; + + // Parse integer part + while (ascii_isdigit(*p)) { + has_digits = 1; + p++; + } + + // Parse decimal point and fractional part + if (*p == '.') { + p++; + while (ascii_isdigit(*p)) { + has_digits = 1; + p++; + } + } + + // Must have at least one digit somewhere + if (!has_digits) { + if (endptr) *endptr = (char *)str; + return -1; + } + + // Parse optional exponent + if (*p == 'e' || *p == 'E') { + const char *exp_start = p; + p++; + + // Optional sign in exponent + if (*p == '+' || *p == '-') { + p++; + } + + // Must have at least one digit in exponent + if (!ascii_isdigit(*p)) { + // Invalid exponent, backtrack + p = exp_start; + } else { + while (ascii_isdigit(*p)) { + p++; + } + } + } + + // Now p points to where valid parsing ends + // SLEEF 4.0's Sleef_strtoq has a bug where it doesn't properly stop at whitespace + // or other delimiters. We need to create a null-terminated substring. + size_t len = p - str; + char *temp = (char *)malloc(len + 1); + if (!temp) { + if (endptr) *endptr = (char *)str; + return -1; + } + memcpy(temp, str, len); + temp[len] = '\0'; + + // Call Sleef_strtoq with the bounded string + char *sleef_endptr; + out_value->sleef_value = Sleef_strtoq(temp, &sleef_endptr); + free(temp); + + // Set endptr to our calculated position + if (endptr) { + *endptr = (char *)p; + } + } else { out_value->longdouble_value = strtold(str, endptr); } - if(*endptr == str) + + if(endptr && *endptr == str) return -1; // parse error - nothing was parsed // If full parse is required - if(require_full_parse && **endptr != '\0') + if(require_full_parse && endptr && **endptr != '\0') return -1; // parse error - characters remain to be converted return 0; // success diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index b6f5e043..b3d3b922 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -1,12 +1,11 @@ #!/bin/bash set -x -if [ -d "build/" ]; then - rm -r build - rm -rf dist/ - rm -rf subprojects/qblas - rm -rf subprojects/sleef -fi +rm -rf build/ +rm -rf dist/ +rm -rf subprojects/qblas +rm -rf subprojects/sleef +rm -rf .mesonpy-* python -m pip uninstall -y numpy_quaddtype python -m pip install . -vv --no-build-isolation 2>&1 | tee build_log.txt diff --git a/quaddtype/subprojects/packagefiles/sleef/meson.build b/quaddtype/subprojects/packagefiles/sleef/meson.build index 68d9384f..4e3713d6 100644 --- a/quaddtype/subprojects/packagefiles/sleef/meson.build +++ b/quaddtype/subprojects/packagefiles/sleef/meson.build @@ -1,4 +1,4 @@ -project('sleef', version: '3.8') +project('sleef') cmake = find_program('cmake') ninja = find_program('ninja', 'make', required: false) @@ -8,11 +8,21 @@ sleef_install_dir = 'sleef_install' # turning off parallel build in windows parallel_flag = ['--parallel'] +build_config_flag = [] if host_machine.system() == 'windows' parallel_flag = [] + # On Windows with MSVC (multi-config generator), must specify --config Release + # to match the main project's release build and avoid runtime library mismatches + build_config_flag = ['--config', 'Release'] endif # For building sleef with TSan, delete the sleef subproject and follow the README instructions to build sleef externally. +# Enable SIMD extensions that are OFF by default but required by qblas (will change in future) +sleef_simd_flags = [] +if host_machine.cpu_family() == 'x86_64' or host_machine.cpu_family() == 'x86' + sleef_simd_flags = ['-DSLEEF_ENABLE_SSE2=ON'] +endif + sleef_configure = run_command([ cmake, '-S', meson.current_source_dir(), @@ -24,14 +34,14 @@ sleef_configure = run_command([ '-DSLEEF_BUILD_INLINE_HEADERS=OFF', '-DCMAKE_POSITION_INDEPENDENT_CODE=ON', '-DCMAKE_INSTALL_PREFIX=' + meson.current_build_dir() / sleef_install_dir -], check: false, capture: true) +] + sleef_simd_flags, check: false, capture: true) if sleef_configure.returncode() != 0 error('SLEEF CMake configuration failed: ' + sleef_configure.stderr()) endif sleef_build_target = custom_target('sleef_build', - command: [cmake, '--build', meson.current_build_dir() / sleef_build_dir, '--target', 'install'] + parallel_flag, + command: [cmake, '--build', meson.current_build_dir() / sleef_build_dir, '--target', 'install'] + build_config_flag + parallel_flag, output: 'sleef_built.stamp', console: true, build_always_stale: true, @@ -59,7 +69,13 @@ sleef_dep = declare_dependency( ) sleefquad_dep = declare_dependency( - dependencies: [sleef_build_dep], + dependencies: [sleef_build_dep, sleef_dep], compile_args: compile_args_list, link_args: ['-L' + meson.current_build_dir() / sleef_install_dir / 'lib', '-L' + meson.current_build_dir() / sleef_install_dir / 'lib64', '-lsleefquad'] +) + +tlfloat_dep = declare_dependency( + dependencies: [sleef_build_dep], + compile_args: compile_args_list, + link_args: ['-L' + meson.current_build_dir() / sleef_install_dir / 'lib', '-L' + meson.current_build_dir() / sleef_install_dir / 'lib64', '-ltlfloat'] ) \ No newline at end of file diff --git a/quaddtype/subprojects/sleef.wrap b/quaddtype/subprojects/sleef.wrap index 8e87d722..bac3126e 100644 --- a/quaddtype/subprojects/sleef.wrap +++ b/quaddtype/subprojects/sleef.wrap @@ -1,9 +1,10 @@ [wrap-git] directory=sleef url=https://github.com/shibatch/sleef.git -revision=3.8 +revision=43a0252ba9331adc7fb10755021f802863678c38 patch_directory=sleef [provide] sleef = sleef_dep -sleefquad = sleefquad_dep \ No newline at end of file +sleefquad = sleefquad_dep +tlfloat = tlfloat_dep \ No newline at end of file diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 86f6f8fa..1585f6fd 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -404,14 +404,15 @@ def test_bytes_backend_consistency(self, backend, bytes_val): # Leading whitespace is OK (consumed by parser) (b" 1.0", "1.0"), (b" 3.14", "3.14"), + # Trailing whitespace is OK (matches Python's float() behavior) + (b"1.0 ", "1.0"), + (b"1.0 ", "1.0"), ]) def test_bytes_whitespace_valid(self, bytes_val, expected_str): """Test handling of valid whitespace in bytes input.""" assert QuadPrecision(bytes_val) == QuadPrecision(expected_str) @pytest.mark.parametrize("invalid_bytes", [ - b"1.0 ", # Trailing whitespace - b"1.0 ", # Multiple trailing spaces b"1 .0", # Internal whitespace b"1. 0", # Internal whitespace ]) @@ -5131,4 +5132,73 @@ def test_large_array_casting(dtype, size): # check roundtrip roundtrip = quad_arr.astype(dtype) - np.testing.assert_array_equal(arr, roundtrip) \ No newline at end of file + np.testing.assert_array_equal(arr, roundtrip) + + +class TestBinaryOpsEdgeCases: + """Edge case tests for binary operations - SLEEF 3.8 had inconsistency where (0 + x) != (x + 0). + + In SLEEF 3.8: + - (0 + 6.724206286224186953608055004397316e-4932) gave half the expected value + - (6.724206286224186953608055004397316e-4932 + 0) was correct + + SLEEF 4.0 fixes this inconsistency. + """ + + # The exact values from the bug + SMALL_VALUE = "6.724206286224186953608055004397316e-4932" + ZERO = 0.0 + + @pytest.mark.parametrize('op', [ + ("add", "fadd"), + ("subtract", "fsub"), + ("multiply", "fmul"), + ("divide", "fdiv"), + ("mod", "fmod"), + ("power", "power"), + ("hypot", "hypot"), + ("atan2", "atan2"), + ]) + def test_binary_op_consistency(self, op): + """Test that abs(x op y) == abs(y op x) - catches half-value bugs.""" + mp.prec = 113 + small = QuadPrecision(self.SMALL_VALUE) + zero = QuadPrecision(self.ZERO) + + mp_small = mp.mpf(self.SMALL_VALUE) + mp_zero = mp.mpf(self.ZERO) + + quad_op_str, mpmath_op_str = op + quad_op, mpmath_op = getattr(np, quad_op_str), getattr(mp, mpmath_op_str) + quad_result = quad_op(zero, small) + mpmath_result = mpmath_op(mp_zero, mp_small) + + # Use mp.nstr to get full precision (50 digits for quad precision) + mpmath_result = QuadPrecision(mp.nstr(mpmath_result, 50)) + + # Handle NaN cases + if np.isnan(mpmath_result): + assert np.isnan(quad_result), f"Expected NaN for {quad_op_str}, got {quad_result}" + return + + # Handle infinity cases + if np.isinf(mpmath_result): + assert np.isinf(quad_result), f"Expected inf for {quad_op_str}, got {quad_result}" + assert np.sign(mpmath_result) == np.sign(quad_result), f"Infinity sign mismatch for {quad_op_str}" + return + + # For finite non-zero results + np.testing.assert_allclose(quad_result, mpmath_result, rtol=1e-32, atol=1e-34, + err_msg=f"Value mismatch for {quad_op_str}, expected {mpmath_result}, got {quad_result}") + + + def test_add_regression_zero_plus_small(self): + """The exact SLEEF 3.8 bug: 0 + small_value gave half the expected.""" + x = QuadPrecision(self.SMALL_VALUE) + y = QuadPrecision(self.ZERO) + + result_yx = y + x # 0 + x (this was buggy in SLEEF 3.8) + result_xy = x + y # x + 0 (this was correct in SLEEF 3.8) + + assert result_yx == result_xy, f"0 + x = {result_yx}, but x + 0 = {result_xy}" + assert result_yx == x, f"0 + x = {result_yx}, expected {x}"