fast-pack · carlosqwqqwq · Jun 18, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -16,6 +16,16 @@ option(SIMDCOMP_NATIVE
        "Tune for the building machine (-march=native); enables AVX2/AVX-512 on \
 capable x86 hosts" ON)
 
+string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SIMDCOMP_SYSTEM_PROCESSOR_LOWER)
+set(SIMDCOMP_TARGET_IS_X86 FALSE)
+set(SIMDCOMP_TARGET_IS_RISCV FALSE)
+if(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^(x86_64|amd64|i[3-6]86)$")
+  set(SIMDCOMP_TARGET_IS_X86 TRUE)
+elseif(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^riscv")
+  set(SIMDCOMP_TARGET_IS_RISCV TRUE)
+  message(STATUS "RISC-V target detected; using scalar 128-bit compatibility shim")
+endif()
+
 # Default to an optimized build when the user did not pick one.
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
@@ -55,14 +65,20 @@ set_target_properties(simdcomp PROPERTIES
   SOVERSION ${PROJECT_VERSION_MAJOR}
   POSITION_INDEPENDENT_CODE ON)
 
+if(SIMDCOMP_TARGET_IS_RISCV)
+  target_compile_definitions(simdcomp PRIVATE __riscv=1 __riscv_xlen=64)
+endif()
+
 # -march=native (when requested and supported by the compiler).
 include(CheckCCompilerFlag)
 set(SIMDCOMP_HAS_MARCH_NATIVE FALSE)
-if(SIMDCOMP_NATIVE)
+if(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_X86)
   check_c_compiler_flag("-march=native" SIMDCOMP_HAS_MARCH_NATIVE)
   if(SIMDCOMP_HAS_MARCH_NATIVE)
     target_compile_options(simdcomp PRIVATE -march=native)
   endif()
+elseif(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_RISCV)
+  message(STATUS "Skipping -march=native for RISC-V target")
 endif()
 
 # Warnings, mirroring the previous Makefile, on GCC/Clang only.
@@ -77,6 +93,12 @@ function(simdcomp_apply_native target)
   endif()
 endfunction()
 
+function(simdcomp_apply_riscv_defs target)
+  if(SIMDCOMP_TARGET_IS_RISCV)
+    target_compile_definitions(${target} PRIVATE __riscv=1 __riscv_xlen=64)
+  endif()
+endfunction()
+
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
@@ -86,11 +108,13 @@ if(SIMDCOMP_BUILD_TESTS)
   add_executable(unit tests/unit.c)
   target_link_libraries(unit PRIVATE simdcomp)
   simdcomp_apply_native(unit)
+  simdcomp_apply_riscv_defs(unit)
   add_test(NAME unit COMMAND unit)
 
   add_executable(unit_chars tests/unit_chars.c)
   target_link_libraries(unit_chars PRIVATE simdcomp)
   simdcomp_apply_native(unit_chars)
+  simdcomp_apply_riscv_defs(unit_chars)
   add_test(NAME unit_chars COMMAND unit_chars)
 endif()
 
@@ -101,6 +125,7 @@ if(SIMDCOMP_BUILD_EXAMPLES)
   add_executable(example example/example.c)
   target_link_libraries(example PRIVATE simdcomp)
   simdcomp_apply_native(example)
+  simdcomp_apply_riscv_defs(example)
 endif()
 
 # ---------------------------------------------------------------------------
@@ -127,10 +152,12 @@ if(SIMDCOMP_BUILD_BENCHMARKS)
     CXX_STANDARD 17
     CXX_STANDARD_REQUIRED ON)
   simdcomp_apply_native(bitpackingbenchmark)
+  simdcomp_apply_riscv_defs(bitpackingbenchmark)
 
   add_executable(benchmark benchmarks/benchmark.c)
   target_link_libraries(benchmark PRIVATE simdcomp)
   simdcomp_apply_native(benchmark)
+  simdcomp_apply_riscv_defs(benchmark)
 endif()
 
 # ---------------------------------------------------------------------------

diff --git a/README.md b/README.md
@@ -13,7 +13,8 @@ This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
 On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
 which can easily translate into more than 8 decoded billions integers per second.
 
-It runs on both x86/x64 (SSE/AVX) and 64-bit ARM (NEON, e.g. Apple Silicon). See
+It runs on x86/x64 (SSE/AVX), 64-bit ARM (NEON, e.g. Apple Silicon), and
+RISC-V through a conservative scalar compatibility backend. See
 [Platforms](#platforms) below.
 
 This library is part of the [Awesome C](https://github.com/kozross/awesome-c) list of C resources.
@@ -39,6 +40,7 @@ Requirements
 
 - On x86/x64: your processor should support SSE4.1 (supported by most Intel and AMD processors released since 2008). The core bit-packing functions only require SSE2 (Pentium4 or better).
 - On ARM: an AArch64/ARM processor with NEON (e.g. Apple Silicon). The SSE intrinsics are mapped to NEON by our own self-contained shim (`include/neon128.h`); no third-party translation library is pulled in.
+- On RISC-V: the library builds through a small scalar 128-bit compatibility shim (`include/riscv128.h`). This preserves the existing API but does not provide RVV acceleration.
 - A C99 (or better) compiler, plus a C++17 compiler if you build the benchmarks.
 - CMake 3.14 or better.
 
@@ -47,7 +49,7 @@ For a plain C version that does not use SIMD instructions, see https://github.co
 Platforms
 ---------
 
-The library supports two SIMD backends behind the same API:
+The library supports three backends behind the same API:
 
 - **x86 / x64** — Intel/AMD SSE (with optional AVX2 and AVX-512 code paths,
   enabled automatically when you build with `-march=native` on a capable host).
@@ -57,10 +59,15 @@ The library supports two SIMD backends behind the same API:
   written directly against `<arm_neon.h>`; no third-party translation layer
   (such as sse2neon) is pulled in. The wider AVX2/AVX-512 paths are x86-only and
   are simply inactive on ARM.
-
-The public API is identical on both: it is selected automatically at compile
-time, so the same source (including the `__m128i`-based entry points) builds on
-either architecture.
+- **RISC-V** — the same 128-bit kernel sources build through a conservative
+  scalar compatibility shim in `include/riscv128.h`. This keeps the existing
+  `__m128i`-based API available on RISC-V without pulling in any x86 headers,
+  while leaving AVX2/AVX-512 inactive. It is a portability path, not an RVV
+  optimization backend.
+
+The public API is identical across these backends: it is selected automatically
+at compile time, so the same source (including the `__m128i`-based entry
+points) builds on each architecture.
 
 Usage
 -------

diff --git a/include/portability.h b/include/portability.h
@@ -78,6 +78,22 @@ typedef signed char int8_t;
     defined(_M_ARM64)
 /* ARM NEON: use our own SSE-on-NEON shim instead of the x86 intrinsics. */
 #include "neon128.h"
+#elif defined(__riscv)
+/* RISC-V: use a conservative scalar 128-bit shim; this is compatibility, not
+ * RVV acceleration. */
+#include "riscv128.h"
+#ifndef __SSE2__
+#define __SSE2__ 1
+#endif
+#ifndef __SSSE3__
+#define __SSSE3__ 1
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__ 1
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__ 1
+#endif
 #else
 #include <x86intrin.h>
 #endif