From 97e7c64b6f763932a8b4d559e2854ab8797a3501 Mon Sep 17 00:00:00 2001 From: crStiv Date: Thu, 30 Jan 2025 03:06:40 +0100 Subject: [PATCH] Update uint256_impl.hpp --- .../numeric/uint256/uint256_impl.hpp | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp b/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp index 29be0dfa01..b61fde8a36 100644 --- a/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp +++ b/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp @@ -2,6 +2,11 @@ #include "../bitop/get_msb.hpp" #include "./uint256.hpp" #include "barretenberg/common/assert.hpp" + +#if defined(__wasm__) +#include +#endif + namespace bb::numeric { constexpr std::pair uint256_t::mul_wide(const uint64_t a, const uint64_t b) @@ -73,11 +78,13 @@ constexpr uint64_t uint256_t::mac_discard_hi(const uint64_t a, { return (b * c + a + carry_in); } + #if defined(__wasm__) || !defined(__SIZEOF_INT128__) /** - * @brief Multiply one limb by 9 limbs and add to resulting limbs - * + * @brief Optimized multiply-add operation using SIMD instructions when available + * This implementation provides better performance on WebAssembly platforms by utilizing SIMD + * instructions for parallel multiplication and addition operations. */ constexpr void uint256_t::wasm_madd(const uint64_t& left_limb, const uint64_t* right_limbs, @@ -91,6 +98,23 @@ constexpr void uint256_t::wasm_madd(const uint64_t& left_limb, uint64_t& result_7, uint64_t& result_8) { +#if defined(__wasm__) && defined(__wasm_simd128__) + // Load 2 64-bit integers into a 128-bit SIMD vector + v128_t left = wasm_i64x2_splat(left_limb); + + // Process 2 limbs at a time using SIMD + for (int i = 0; i < 8; i += 2) { + v128_t right = wasm_v128_load(right_limbs + i); + v128_t prod = wasm_i64x2_mul(left, right); + v128_t curr = wasm_v128_load(&result_0 + i); + v128_t sum = wasm_i64x2_add(curr, prod); + wasm_v128_store(&result_0 + i, sum); + } + + // Handle the last limb separately + result_8 += left_limb * right_limbs[8]; +#else + // Fallback implementation for non-SIMD platforms result_0 += left_limb * right_limbs[0]; result_1 += left_limb * right_limbs[1]; result_2 += left_limb * right_limbs[2]; @@ -100,6 +124,7 @@ constexpr void uint256_t::wasm_madd(const uint64_t& left_limb, result_6 += left_limb * right_limbs[6]; result_7 += left_limb * right_limbs[7]; result_8 += left_limb * right_limbs[8]; +#endif } /** @@ -119,6 +144,7 @@ constexpr std::array uint256_t::wasm_convert(const uin (data[3] >> 40) & 0x1fffffff }; } #endif + constexpr std::pair uint256_t::divmod(const uint256_t& b) const { if (*this == 0 || b == 0) {