Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)

option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF)

option(SNAPPY_REQUIRE_RVV "Target processors with RVV support." OFF)

option(SNAPPY_INSTALL "Install Snappy's header and library" ON)

include(TestBigEndian)
Expand All @@ -113,6 +115,8 @@ check_include_file("sys/time.h" HAVE_SYS_TIME_H)
check_include_file("sys/uio.h" HAVE_SYS_UIO_H)
check_include_file("unistd.h" HAVE_UNISTD_H)
check_include_file("windows.h" HAVE_WINDOWS_H)
check_include_file("sse2rvv.h" HAVE_SSE2RISCV_INSTRINSIC_H)
check_include_file("riscv_vector.h" HAVE_RISCV_INSTRINSIC_H)

include(CheckLibraryExists)
check_library_exists(z zlibVersion "" HAVE_LIBZ)
Expand All @@ -124,6 +128,7 @@ CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2)
CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2)
CHECK_CXX_COMPILER_FLAG("-march=rv64gcv" HAVE_CLANG_RVV)
if(SNAPPY_REQUIRE_AVX2)
if(HAVE_VISUAL_STUDIO_ARCH_AVX2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
Expand All @@ -141,6 +146,10 @@ elseif (SNAPPY_REQUIRE_AVX)
if(HAVE_CLANG_MAVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
endif(HAVE_CLANG_MAVX)
elseif (SNAPPY_REQUIRE_RVV)
if(HAVE_CLANG_RVV)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv64gcv")
endif(HAVE_CLANG_RVV)
endif(SNAPPY_REQUIRE_AVX2)

# Used by googletest.
Expand Down Expand Up @@ -185,6 +194,42 @@ int main() {
return 0;
}" SNAPPY_HAVE_SSSE3)

check_cxx_source_compiles("
#include <riscv_vector.h>

#define vreinterpretq_f64_m128i(x) \
__riscv_vreinterpret_v_i64m1_i32m1(__riscv_vreinterpret_v_f64m1_i64m1(x))
#define vreinterpretq_m128i_i64(x) __riscv_vreinterpret_v_i32m1_i64m1(x)
#define vreinterpretq_i64_m128i(x) __riscv_vreinterpret_v_i64m1_i32m1(x)
#define vreinterpretq_m128i_i8(x) __riscv_vreinterpret_v_i32m1_i8m1(x)
#define vreinterpretq_i8_m128i(x) __riscv_vreinterpret_v_i8m1_i32m1(x)

int main() {
const vint32m1_t *src = 0;
vint32m1_t dest;
const vint32m1_t shuffle_mask = vreinterpretq_f64_m128i(
__riscv_vle64_v_f64m1((double const *)src, 2));

vint64m1_t addr = vreinterpretq_m128i_i64(*src);
vint64m1_t zeros = __riscv_vmv_v_x_i64m1(0, 2);

vint32m1_t a = vreinterpretq_i64_m128i(
__riscv_vslideup_vx_i64m1_tu(addr, zeros, 1, 2));

vint8m1_t _a = vreinterpretq_m128i_i8(a);
vint8m1_t _b = vreinterpretq_m128i_i8(shuffle_mask);
vbool8_t mask_lt_zero = __riscv_vmslt_vx_i8m1_b8(_b, 0, 16);
vuint8m1_t idxs =
__riscv_vreinterpret_v_i8m1_u8m1(__riscv_vand_vx_i8m1(_b, 0xf, 16));
vint8m1_t shuffle = __riscv_vrgather_vv_i8m1(_a, idxs, 16);

const vint32m1_t pattern = vreinterpretq_i8_m128i(
__riscv_vmerge_vxm_i8m1(shuffle, 0, mask_lt_zero, 16));

dest = pattern;
return 0;
}" SNAPPY_HAVE_RVV)

check_cxx_source_compiles("
#include <immintrin.h>
int main() {
Expand Down
3 changes: 3 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
#cmakedefine01 SNAPPY_HAVE_SSSE3

/* Define to 1 if you target processors with RVV and have <riscv_vector.h>. */
#cmakedefine01 SNAPPY_HAVE_RVV

/* Define to 1 if you target processors with SSE4.2 and have <crc32intrin.h>. */
#cmakedefine01 SNAPPY_HAVE_X86_CRC32

Expand Down
26 changes: 25 additions & 1 deletion snappy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
#endif
#endif // !defined(SNAPPY_HAVE_X86_CRC32)

#if SNAPPY_HAVE_RVV
#include <riscv_vector.h>
#endif

#if !defined(SNAPPY_HAVE_NEON_CRC32)
#if SNAPPY_HAVE_NEON && defined(__ARM_FEATURE_CRC32)
#define SNAPPY_HAVE_NEON_CRC32 1
Expand Down Expand Up @@ -537,7 +541,27 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
} while (SNAPPY_PREDICT_TRUE(op < op_end));
}
return IncrementalCopySlow(op - pattern_size, op, op_limit);
#else // !SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
#elif defined (SNAPPY_HAVE_RVV)
size_t bytes_to_copy = op_limit - op;
while (bytes_to_copy > 0) {
size_t vl = __riscv_vsetvl_e8m1(bytes_to_copy);
vuint8m1_t pattern_source = __riscv_vle8_v_u8m1(
reinterpret_cast<const uint8_t*>(src), pattern_size);
vuint8m1_t indices_sequential = __riscv_vid_v_u8m1(vl);
vuint8m1_t indices_repeating = __riscv_vremu_vx_u8m1(
indices_sequential, pattern_size, vl);

vuint8m1_t pattern_to_write = __riscv_vrgather_vv_u8m1(
pattern_source, indices_repeating, vl);

__riscv_vse8_v_u8m1(reinterpret_cast<uint8_t*>(op), pattern_to_write, vl);

op += vl;
bytes_to_copy -= vl;
}
return op_limit;

#else
// If plenty of buffer space remains, expand the pattern to at least 8
// bytes. The way the following loop is written, we need 8 bytes of buffer
// space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
Expand Down