diff --git a/simde/arm/neon/cvt_n.h b/simde/arm/neon/cvt_n.h index 3574a3f6c..9e5d0cc17 100644 --- a/simde/arm/neon/cvt_n.h +++ b/simde/arm/neon/cvt_n.h @@ -22,6 +22,10 @@ * * Copyright: * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + * + * Note: pow(2, n) does not generate proper (exact) results with rounding + * modes other than round-to-nearest. + * See https://github.com/simd-everywhere/simde/issues/1260 */ #if !defined(SIMDE_ARM_NEON_CVT_N_H) @@ -40,7 +44,7 @@ simde_vcvth_n_u16_f16(simde_float16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { return simde_vcvth_u16_f16( simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)))); + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, (UINT32_C(1) << n)))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n) @@ -57,7 +61,7 @@ simde_vcvth_n_f16_s16(int16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { return simde_float16_from_float32( HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n))); + HEDLEY_STATIC_CAST(simde_float64_t, a) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n) @@ -74,7 +78,7 @@ simde_vcvth_n_f16_u16(uint16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { return simde_float16_from_float32( HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n))); + HEDLEY_STATIC_CAST(simde_float64_t, a) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n) @@ -89,7 +93,7 @@ SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vcvts_n_s32_f32(simde_float32_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n) @@ -103,7 +107,7 @@ SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcvts_n_u32_f32(simde_float32_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n) @@ -118,7 +122,7 @@ simde_float32_t simde_vcvts_n_f32_s32(int32_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { return HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)); + HEDLEY_STATIC_CAST(simde_float64_t, a) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n) @@ -133,7 +137,7 @@ simde_float32_t simde_vcvts_n_f32_u32(uint32_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { return HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)); + HEDLEY_STATIC_CAST(simde_float64_t, a) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n) @@ -147,7 +151,7 @@ SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vcvtd_n_s64_f64(simde_float64_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return simde_vcvtd_s64_f64(a * simde_math_pow(2, n)); + return simde_vcvtd_s64_f64(a * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n) @@ -161,7 +165,7 @@ SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vcvtd_n_u64_f64(simde_float64_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return simde_vcvtd_u64_f64(a * simde_math_pow(2, n)); + return simde_vcvtd_u64_f64(a * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n) @@ -175,7 +179,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vcvtd_n_f64_s64(int64_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n); + return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n) @@ -189,7 +193,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vcvtd_n_f64_u64(uint64_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n); + return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n) @@ -208,7 +212,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } return simde_int32x2_from_private(r_); @@ -230,7 +234,7 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n)); + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_int64x1_from_private(r_); @@ -254,7 +258,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)))); } return simde_uint16x4_from_private(r_); @@ -277,7 +281,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } return simde_uint32x2_from_private(r_); @@ -299,7 +303,7 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n)); + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_uint64x1_from_private(r_); @@ -322,7 +326,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } return simde_int32x4_from_private(r_); @@ -344,7 +348,7 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n)); + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_int64x2_from_private(r_); @@ -368,7 +372,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)))); } return simde_uint16x8_from_private(r_); @@ -391,7 +395,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))); } return simde_uint32x4_from_private(r_); @@ -414,7 +418,7 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n)); + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_uint64x2_from_private(r_); @@ -437,7 +441,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float16x4_from_private(r_); @@ -460,7 +464,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float16x4_from_private(r_); @@ -483,7 +487,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float16x8_from_private(r_); @@ -506,7 +510,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / simde_math_pow(2, n)))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float16x8_from_private(r_); @@ -529,7 +533,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } return simde_float32x2_from_private(r_); @@ -551,7 +555,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } return simde_float32x2_from_private(r_); @@ -573,7 +577,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float64x1_from_private(r_); @@ -595,7 +599,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float64x2_from_private(r_); @@ -617,7 +621,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float64x1_from_private(r_); @@ -639,7 +643,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n)))); } return simde_float64x2_from_private(r_); @@ -661,7 +665,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } return simde_float32x4_from_private(r_); @@ -683,7 +687,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / HEDLEY_STATIC_CAST(simde_float64_t, (UINT64_C(1) << n))); } return simde_float32x4_from_private(r_);