From a4ba73e7cf776490d2f99a527f4772dc707c14ed Mon Sep 17 00:00:00 2001 From: w1m024 Date: Wed, 13 Aug 2025 09:06:32 +0000 Subject: [PATCH 1/2] Add support for RISC-V Vector Extension - Add compile options and detection logic for RISC-V Vector Extension (RVV) - Implement RVV-optimized memory copy in the IncrementalCopy function - Add RISC-V toolchain configuration file Signed-off-by: w1m024 Co-authored-by: chenmiaoi Co-authored-by: gong-flying --- CMakeLists.txt | 45 +++++++++++++++++++++++++++++++++++++ cmake/config.h.in | 3 +++ cmake/toolchain-riscv.cmake | 13 +++++++++++ snappy.cc | 28 +++++++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 cmake/toolchain-riscv.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 68686f7..8a51917 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,8 @@ option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) +option(SNAPPY_REQUIRE_RVV "Target processors with RVV support." OFF) + option(SNAPPY_INSTALL "Install Snappy's header and library" ON) include(TestBigEndian) @@ -63,6 +65,8 @@ check_include_file("sys/time.h" HAVE_SYS_TIME_H) check_include_file("sys/uio.h" HAVE_SYS_UIO_H) check_include_file("unistd.h" HAVE_UNISTD_H) check_include_file("windows.h" HAVE_WINDOWS_H) +check_include_file("sse2rvv.h" HAVE_SSE2RISCV_INSTRINSIC_H) +check_include_file("riscv_vector.h" HAVE_RISCV_INSTRINSIC_H) include(CheckLibraryExists) check_library_exists(z zlibVersion "" HAVE_LIBZ) @@ -73,6 +77,7 @@ CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) +CHECK_CXX_COMPILER_FLAG("-march=rv64gcv" HAVE_CLANG_RVV) if(SNAPPY_REQUIRE_AVX2) if(HAVE_VISUAL_STUDIO_ARCH_AVX2) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") @@ -90,6 +95,10 @@ elseif (SNAPPY_REQUIRE_AVX) if(HAVE_CLANG_MAVX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") endif(HAVE_CLANG_MAVX) +elseif (SNAPPY_REQUIRE_RVV) + if(HAVE_CLANG_RVV) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv64gcv") + endif(HAVE_CLANG_RVV) endif(SNAPPY_REQUIRE_AVX2) include(CheckCXXSourceCompiles) @@ -115,6 +124,42 @@ int main() { return 0; }" SNAPPY_HAVE_SSSE3) +check_cxx_source_compiles(" +#include + +#define vreinterpretq_f64_m128i(x) \ + __riscv_vreinterpret_v_i64m1_i32m1(__riscv_vreinterpret_v_f64m1_i64m1(x)) +#define vreinterpretq_m128i_i64(x) __riscv_vreinterpret_v_i32m1_i64m1(x) +#define vreinterpretq_i64_m128i(x) __riscv_vreinterpret_v_i64m1_i32m1(x) +#define vreinterpretq_m128i_i8(x) __riscv_vreinterpret_v_i32m1_i8m1(x) +#define vreinterpretq_i8_m128i(x) __riscv_vreinterpret_v_i8m1_i32m1(x) + +int main() { + const vint32m1_t *src = 0; + vint32m1_t dest; + const vint32m1_t shuffle_mask = vreinterpretq_f64_m128i( + __riscv_vle64_v_f64m1((double const *)src, 2)); + + vint64m1_t addr = vreinterpretq_m128i_i64(*src); + vint64m1_t zeros = __riscv_vmv_v_x_i64m1(0, 2); + + vint32m1_t a = vreinterpretq_i64_m128i( + __riscv_vslideup_vx_i64m1_tu(addr, zeros, 1, 2)); + + vint8m1_t _a = vreinterpretq_m128i_i8(a); + vint8m1_t _b = vreinterpretq_m128i_i8(shuffle_mask); + vbool8_t mask_lt_zero = __riscv_vmslt_vx_i8m1_b8(_b, 0, 16); + vuint8m1_t idxs = + __riscv_vreinterpret_v_i8m1_u8m1(__riscv_vand_vx_i8m1(_b, 0xf, 16)); + vint8m1_t shuffle = __riscv_vrgather_vv_i8m1(_a, idxs, 16); + + const vint32m1_t pattern = vreinterpretq_i8_m128i( + __riscv_vmerge_vxm_i8m1(shuffle, 0, mask_lt_zero, 16)); + + dest = pattern; + return 0; +}" SNAPPY_HAVE_RVV) + check_cxx_source_compiles(" #include int main() { diff --git a/cmake/config.h.in b/cmake/config.h.in index 24f27ef..f8a2191 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -52,6 +52,9 @@ /* Define to 1 if you target processors with SSSE3+ and have . */ #cmakedefine01 SNAPPY_HAVE_SSSE3 +/* Define to 1 if you target processors with RVV and have . */ +#cmakedefine01 SNAPPY_HAVE_RVV + /* Define to 1 if you target processors with BMI2+ and have . */ #cmakedefine01 SNAPPY_HAVE_BMI2 diff --git a/cmake/toolchain-riscv.cmake b/cmake/toolchain-riscv.cmake new file mode 100644 index 0000000..4af8fb1 --- /dev/null +++ b/cmake/toolchain-riscv.cmake @@ -0,0 +1,13 @@ +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE=path/to/toolchain-riscv.cmake .. + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +# Specify the cross compiler +set(CMAKE_C_COMPILER "riscv64-unknown-linux-gnu-gcc") +set(CMAKE_CXX_COMPILER "riscv64-unknown-linux-gnu-g++") + +set(CMAKE_C_FLAGS "-march=rv64gcv -mabi=lp64d" CACHE INTERNAL "C compiler flags") +set(CMAKE_CXX_FLAGS "-march=rv64gcv -mabi=lp64d -static-libstdc++" CACHE INTERNAL "C++ compiler flags") + +include_directories(/usr/lib/gcc-cross/riscv64-linux-gnu/14/include) diff --git a/snappy.cc b/snappy.cc index ce1eef4..b56013b 100644 --- a/snappy.cc +++ b/snappy.cc @@ -62,6 +62,10 @@ #include #endif +#if SNAPPY_HAVE_RVV +#include +#endif + #if SNAPPY_HAVE_BMI2 // Please do not replace with . or with headers that assume more // advanced SSE versions without checking with all the OWNERS. @@ -252,6 +256,30 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; } return IncrementalCopySlow(src, op, op_limit); +#elif defined (SNAPPY_HAVE_RVV) + size_t bytes_to_copy = op_limit - op; + while (bytes_to_copy > 0) { + + size_t vl = __riscv_vsetvl_e8m1(bytes_to_copy); + + vuint8m1_t pattern_source = __riscv_vle8_v_u8m1( + reinterpret_cast(src), pattern_size); + vuint8m1_t indices_sequential = __riscv_vid_v_u8m1(vl); + vuint8m1_t indices_repeating = __riscv_vremu_vx_u8m1( + indices_sequential, pattern_size, vl); + + vuint8m1_t pattern_to_write = __riscv_vrgather_vv_u8m1( + pattern_source, indices_repeating, vl); + + __riscv_vse8_v_u8m1(reinterpret_cast(op), pattern_to_write, vl); + + op += vl; + bytes_to_copy -= vl; + } + + return op_limit; + + #else // !SNAPPY_HAVE_SSSE3 // If plenty of buffer space remains, expand the pattern to at least 8 // bytes. The way the following loop is written, we need 8 bytes of buffer From bb64817f3da1768f6d8be8e15b244f1b03e29551 Mon Sep 17 00:00:00 2001 From: w1m024 Date: Mon, 25 Aug 2025 15:51:35 +0800 Subject: [PATCH 2/2] Delete cmake/toolchain-riscv.cmake --- cmake/toolchain-riscv.cmake | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 cmake/toolchain-riscv.cmake diff --git a/cmake/toolchain-riscv.cmake b/cmake/toolchain-riscv.cmake deleted file mode 100644 index 4af8fb1..0000000 --- a/cmake/toolchain-riscv.cmake +++ /dev/null @@ -1,13 +0,0 @@ -# Usage: cmake -DCMAKE_TOOLCHAIN_FILE=path/to/toolchain-riscv.cmake .. - -set(CMAKE_SYSTEM_NAME Linux) -set(CMAKE_SYSTEM_PROCESSOR riscv64) - -# Specify the cross compiler -set(CMAKE_C_COMPILER "riscv64-unknown-linux-gnu-gcc") -set(CMAKE_CXX_COMPILER "riscv64-unknown-linux-gnu-g++") - -set(CMAKE_C_FLAGS "-march=rv64gcv -mabi=lp64d" CACHE INTERNAL "C compiler flags") -set(CMAKE_CXX_FLAGS "-march=rv64gcv -mabi=lp64d -static-libstdc++" CACHE INTERNAL "C++ compiler flags") - -include_directories(/usr/lib/gcc-cross/riscv64-linux-gnu/14/include)