diff --git a/CMakeLists.txt b/CMakeLists.txt
index 353154f..e707d83 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,16 @@ option(SIMDCOMP_NATIVE
        "Tune for the building machine (-march=native); enables AVX2/AVX-512 on \
 capable x86 hosts" ON)
 
+string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SIMDCOMP_SYSTEM_PROCESSOR_LOWER)
+set(SIMDCOMP_TARGET_IS_X86 FALSE)
+set(SIMDCOMP_TARGET_IS_RISCV FALSE)
+if(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^(x86_64|amd64|i[3-6]86)$")
+  set(SIMDCOMP_TARGET_IS_X86 TRUE)
+elseif(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^riscv")
+  set(SIMDCOMP_TARGET_IS_RISCV TRUE)
+  message(STATUS "RISC-V target detected; using scalar 128-bit compatibility shim")
+endif()
+
 # Default to an optimized build when the user did not pick one.
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
@@ -55,14 +65,20 @@ set_target_properties(simdcomp PROPERTIES
   SOVERSION ${PROJECT_VERSION_MAJOR}
   POSITION_INDEPENDENT_CODE ON)
 
+if(SIMDCOMP_TARGET_IS_RISCV)
+  target_compile_definitions(simdcomp PRIVATE __riscv=1 __riscv_xlen=64)
+endif()
+
 # -march=native (when requested and supported by the compiler).
 include(CheckCCompilerFlag)
 set(SIMDCOMP_HAS_MARCH_NATIVE FALSE)
-if(SIMDCOMP_NATIVE)
+if(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_X86)
   check_c_compiler_flag("-march=native" SIMDCOMP_HAS_MARCH_NATIVE)
   if(SIMDCOMP_HAS_MARCH_NATIVE)
     target_compile_options(simdcomp PRIVATE -march=native)
   endif()
+elseif(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_RISCV)
+  message(STATUS "Skipping -march=native for RISC-V target")
 endif()
 
 # Warnings, mirroring the previous Makefile, on GCC/Clang only.
@@ -77,6 +93,12 @@ function(simdcomp_apply_native target)
   endif()
 endfunction()
 
+function(simdcomp_apply_riscv_defs target)
+  if(SIMDCOMP_TARGET_IS_RISCV)
+    target_compile_definitions(${target} PRIVATE __riscv=1 __riscv_xlen=64)
+  endif()
+endfunction()
+
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
@@ -86,11 +108,13 @@ if(SIMDCOMP_BUILD_TESTS)
   add_executable(unit tests/unit.c)
   target_link_libraries(unit PRIVATE simdcomp)
   simdcomp_apply_native(unit)
+  simdcomp_apply_riscv_defs(unit)
   add_test(NAME unit COMMAND unit)
 
   add_executable(unit_chars tests/unit_chars.c)
   target_link_libraries(unit_chars PRIVATE simdcomp)
   simdcomp_apply_native(unit_chars)
+  simdcomp_apply_riscv_defs(unit_chars)
   add_test(NAME unit_chars COMMAND unit_chars)
 endif()
 
@@ -101,6 +125,7 @@ if(SIMDCOMP_BUILD_EXAMPLES)
   add_executable(example example/example.c)
   target_link_libraries(example PRIVATE simdcomp)
   simdcomp_apply_native(example)
+  simdcomp_apply_riscv_defs(example)
 endif()
 
 # ---------------------------------------------------------------------------
@@ -127,10 +152,12 @@ if(SIMDCOMP_BUILD_BENCHMARKS)
     CXX_STANDARD 17
     CXX_STANDARD_REQUIRED ON)
   simdcomp_apply_native(bitpackingbenchmark)
+  simdcomp_apply_riscv_defs(bitpackingbenchmark)
 
   add_executable(benchmark benchmarks/benchmark.c)
   target_link_libraries(benchmark PRIVATE simdcomp)
   simdcomp_apply_native(benchmark)
+  simdcomp_apply_riscv_defs(benchmark)
 endif()
 
 # ---------------------------------------------------------------------------
diff --git a/README.md b/README.md
index 8937da0..ecb56e0 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,8 @@ This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
 On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
 which can easily translate into more than 8 decoded billions integers per second.
 
-It runs on both x86/x64 (SSE/AVX) and 64-bit ARM (NEON, e.g. Apple Silicon). See
+It runs on x86/x64 (SSE/AVX), 64-bit ARM (NEON, e.g. Apple Silicon), and
+RISC-V through a conservative scalar compatibility backend. See
 [Platforms](#platforms) below.
 
 This library is part of the [Awesome C](https://github.com/kozross/awesome-c) list of C resources.
@@ -39,6 +40,7 @@ Requirements
 
 - On x86/x64: your processor should support SSE4.1 (supported by most Intel and AMD processors released since 2008). The core bit-packing functions only require SSE2 (Pentium4 or better).
 - On ARM: an AArch64/ARM processor with NEON (e.g. Apple Silicon). The SSE intrinsics are mapped to NEON by our own self-contained shim (`include/neon128.h`); no third-party translation library is pulled in.
+- On RISC-V: the library builds through a small scalar 128-bit compatibility shim (`include/riscv128.h`). This preserves the existing API but does not provide RVV acceleration.
 - A C99 (or better) compiler, plus a C++17 compiler if you build the benchmarks.
 - CMake 3.14 or better.
 
@@ -47,7 +49,7 @@ For a plain C version that does not use SIMD instructions, see https://github.co
 Platforms
 ---------
 
-The library supports two SIMD backends behind the same API:
+The library supports three backends behind the same API:
 
 - **x86 / x64** — Intel/AMD SSE (with optional AVX2 and AVX-512 code paths,
   enabled automatically when you build with `-march=native` on a capable host).
@@ -57,10 +59,15 @@ The library supports two SIMD backends behind the same API:
   written directly against `<arm_neon.h>`; no third-party translation layer
   (such as sse2neon) is pulled in. The wider AVX2/AVX-512 paths are x86-only and
   are simply inactive on ARM.
-
-The public API is identical on both: it is selected automatically at compile
-time, so the same source (including the `__m128i`-based entry points) builds on
-either architecture.
+- **RISC-V** — the same 128-bit kernel sources build through a conservative
+  scalar compatibility shim in `include/riscv128.h`. This keeps the existing
+  `__m128i`-based API available on RISC-V without pulling in any x86 headers,
+  while leaving AVX2/AVX-512 inactive. It is a portability path, not an RVV
+  optimization backend.
+
+The public API is identical across these backends: it is selected automatically
+at compile time, so the same source (including the `__m128i`-based entry
+points) builds on each architecture.
 
 Usage
 -------
diff --git a/include/portability.h b/include/portability.h
index 032bd56..b650aab 100644
--- a/include/portability.h
+++ b/include/portability.h
@@ -78,6 +78,22 @@ typedef signed char int8_t;
     defined(_M_ARM64)
 /* ARM NEON: use our own SSE-on-NEON shim instead of the x86 intrinsics. */
 #include "neon128.h"
+#elif defined(__riscv)
+/* RISC-V: use a conservative scalar 128-bit shim; this is compatibility, not
+ * RVV acceleration. */
+#include "riscv128.h"
+#ifndef __SSE2__
+#define __SSE2__ 1
+#endif
+#ifndef __SSSE3__
+#define __SSSE3__ 1
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__ 1
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__ 1
+#endif
 #else
 #include <x86intrin.h>
 #endif
diff --git a/include/riscv128.h b/include/riscv128.h
new file mode 100644
index 0000000..d9d0fc4
--- /dev/null
+++ b/include/riscv128.h
@@ -0,0 +1,259 @@
+/**
+ * This code is released under a BSD License.
+ *
+ * riscv128.h -- a small, self-contained scalar implementation of the handful
+ * of 128-bit Intel SSE2/SSSE3/SSE4.1 intrinsics that simdcomp actually uses.
+ *
+ * This is intentionally conservative: it provides source compatibility for the
+ * existing 128-bit kernels on RISC-V, but it does not claim RVV acceleration.
+ * The wider AVX2/AVX-512 code paths remain x86-only.
+ */
+#ifndef SIMDCOMP_RISCV128_H_
+#define SIMDCOMP_RISCV128_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#if defined(__GNUC__) || defined(__clang__)
+#define SIMDCOMP_RISCV_INLINE __inline__ __attribute__((always_inline))
+#else
+#define SIMDCOMP_RISCV_INLINE inline
+#endif
+
+typedef union {
+  uint32_t u32[4];
+  int32_t s32[4];
+  uint8_t u8[16];
+  float f32[4];
+} __m128i;
+
+typedef __m128i __m128;
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_loadu_si128(const __m128i *p) {
+  __m128i out;
+  memcpy(&out, p, sizeof(out));
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_load_si128(const __m128i *p) {
+  return _mm_loadu_si128(p);
+}
+
+static SIMDCOMP_RISCV_INLINE void _mm_storeu_si128(__m128i *p, __m128i a) {
+  memcpy(p, &a, sizeof(a));
+}
+
+static SIMDCOMP_RISCV_INLINE void _mm_store_si128(__m128i *p, __m128i a) {
+  _mm_storeu_si128(p, a);
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_setzero_si128(void) {
+  __m128i out;
+  memset(&out, 0, sizeof(out));
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_set1_epi32(int v) {
+  __m128i out;
+  out.s32[0] = v;
+  out.s32[1] = v;
+  out.s32[2] = v;
+  out.s32[3] = v;
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_setr_epi32(int e0, int e1, int e2,
+                                                    int e3) {
+  __m128i out;
+  out.s32[0] = e0;
+  out.s32[1] = e1;
+  out.s32[2] = e2;
+  out.s32[3] = e3;
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) {
+  __m128i out;
+  out.u32[0] = a.u32[0] & b.u32[0];
+  out.u32[1] = a.u32[1] & b.u32[1];
+  out.u32[2] = a.u32[2] & b.u32[2];
+  out.u32[3] = a.u32[3] & b.u32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) {
+  __m128i out;
+  out.u32[0] = a.u32[0] | b.u32[0];
+  out.u32[1] = a.u32[1] | b.u32[1];
+  out.u32[2] = a.u32[2] | b.u32[2];
+  out.u32[3] = a.u32[3] | b.u32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) {
+  __m128i out;
+  out.s32[0] = a.s32[0] + b.s32[0];
+  out.s32[1] = a.s32[1] + b.s32[1];
+  out.s32[2] = a.s32[2] + b.s32[2];
+  out.s32[3] = a.s32[3] + b.s32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) {
+  __m128i out;
+  out.s32[0] = a.s32[0] - b.s32[0];
+  out.s32[1] = a.s32[1] - b.s32[1];
+  out.s32[2] = a.s32[2] - b.s32[2];
+  out.s32[3] = a.s32[3] - b.s32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) {
+  __m128i out;
+  out.u32[0] = (a.u32[0] < b.u32[0]) ? a.u32[0] : b.u32[0];
+  out.u32[1] = (a.u32[1] < b.u32[1]) ? a.u32[1] : b.u32[1];
+  out.u32[2] = (a.u32[2] < b.u32[2]) ? a.u32[2] : b.u32[2];
+  out.u32[3] = (a.u32[3] < b.u32[3]) ? a.u32[3] : b.u32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b) {
+  __m128i out;
+  out.u32[0] = (a.u32[0] > b.u32[0]) ? a.u32[0] : b.u32[0];
+  out.u32[1] = (a.u32[1] > b.u32[1]) ? a.u32[1] : b.u32[1];
+  out.u32[2] = (a.u32[2] > b.u32[2]) ? a.u32[2] : b.u32[2];
+  out.u32[3] = (a.u32[3] > b.u32[3]) ? a.u32[3] : b.u32[3];
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_slli_epi32(__m128i a, int count) {
+  __m128i out;
+  if (count >= 32) {
+    return _mm_setzero_si128();
+  }
+  out.u32[0] = a.u32[0] << count;
+  out.u32[1] = a.u32[1] << count;
+  out.u32[2] = a.u32[2] << count;
+  out.u32[3] = a.u32[3] << count;
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_srli_epi32(__m128i a, int count) {
+  __m128i out;
+  if (count >= 32) {
+    return _mm_setzero_si128();
+  }
+  out.u32[0] = a.u32[0] >> count;
+  out.u32[1] = a.u32[1] >> count;
+  out.u32[2] = a.u32[2] >> count;
+  out.u32[3] = a.u32[3] >> count;
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) {
+  __m128i out;
+  out.u32[0] = (a.s32[0] < b.s32[0]) ? 0xFFFFFFFFU : 0U;
+  out.u32[1] = (a.s32[1] < b.s32[1]) ? 0xFFFFFFFFU : 0U;
+  out.u32[2] = (a.s32[2] < b.s32[2]) ? 0xFFFFFFFFU : 0U;
+  out.u32[3] = (a.s32[3] < b.s32[3]) ? 0xFFFFFFFFU : 0U;
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE int _mm_cvtsi128_si32(__m128i a) {
+  return a.s32[0];
+}
+
+#define _mm_extract_epi32(a, imm) ((a).s32[(imm) & 3])
+
+#define _mm_shuffle_epi32(a, imm)                                              \
+  _mm_setr_epi32((a).s32[(imm) & 3], (a).s32[((imm) >> 2) & 3],               \
+                 (a).s32[((imm) >> 4) & 3], (a).s32[((imm) >> 6) & 3])
+
+static SIMDCOMP_RISCV_INLINE __m128i simdcomp_riscv_alignr_epi8(__m128i a,
+                                                                __m128i b,
+                                                                int count) {
+  __m128i out;
+  int i;
+  if (count >= 32) {
+    return _mm_setzero_si128();
+  }
+  for (i = 0; i < 16; ++i) {
+    int src = count + i;
+    if (src < 16) {
+      out.u8[i] = b.u8[src];
+    } else {
+      out.u8[i] = a.u8[src - 16];
+    }
+  }
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i simdcomp_riscv_srli_si128(__m128i a,
+                                                               int count) {
+  __m128i out;
+  int i;
+  if (count >= 16) {
+    return _mm_setzero_si128();
+  }
+  for (i = 0; i < 16; ++i) {
+    int src = i + count;
+    out.u8[i] = (src < 16) ? a.u8[src] : 0;
+  }
+  return out;
+}
+
+static SIMDCOMP_RISCV_INLINE __m128i simdcomp_riscv_slli_si128(__m128i a,
+                                                               int count) {
+  __m128i out;
+  int i;
+  if (count >= 16) {
+    return _mm_setzero_si128();
+  }
+  for (i = 0; i < 16; ++i) {
+    int src = i - count;
+    out.u8[i] = (src >= 0) ? a.u8[src] : 0;
+  }
+  return out;
+}
+
+#define _mm_alignr_epi8(a, b, imm)                                             \
+  simdcomp_riscv_alignr_epi8((a), (b), (imm))
+
+#define _mm_srli_si128(a, imm) simdcomp_riscv_srli_si128((a), (imm))
+
+#define _mm_slli_si128(a, imm) simdcomp_riscv_slli_si128((a), (imm))
+
+static SIMDCOMP_RISCV_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i mask) {
+  __m128i out;
+  int i;
+  for (i = 0; i < 16; ++i) {
+    uint8_t control = mask.u8[i];
+    if (control & 0x80U) {
+      out.u8[i] = 0;
+    } else {
+      out.u8[i] = a.u8[control & 0x0FU];
+    }
+  }
+  return out;
+}
+
+#define _mm_castsi128_ps(a) (a)
+
+static SIMDCOMP_RISCV_INLINE int _mm_movemask_ps(__m128 a) {
+  int mask = 0;
+  if (a.u32[0] & 0x80000000U) {
+    mask |= 1;
+  }
+  if (a.u32[1] & 0x80000000U) {
+    mask |= 2;
+  }
+  if (a.u32[2] & 0x80000000U) {
+    mask |= 4;
+  }
+  if (a.u32[3] & 0x80000000U) {
+    mask |= 8;
+  }
+  return mask;
+}
+
+#endif /* SIMDCOMP_RISCV128_H_ */
diff --git a/include/simdbitpacking.h b/include/simdbitpacking.h
index 499be6d..e06a22a 100644
--- a/include/simdbitpacking.h
+++ b/include/simdbitpacking.h
@@ -6,9 +6,9 @@
 
 #include "portability.h"
 
-/* SSE2 is required (on ARM, neon128.h via portability.h provides the shim) */
+/* SSE2 is required (on ARM/RISC-V, portability.h provides the shim). */
 #if !(defined(__aarch64__) || defined(__arm__) || defined(__ARM_NEON) ||      \
-      defined(_M_ARM64))
+      defined(_M_ARM64) || defined(__riscv))
 #include <emmintrin.h>
 #endif
 /* for memset */
diff --git a/include/simdcomputil.h b/include/simdcomputil.h
index 810b739..7d5fd29 100644
--- a/include/simdcomputil.h
+++ b/include/simdcomputil.h
@@ -7,9 +7,9 @@
 
 #include "portability.h"
 
-/* SSE2 is required (on ARM, neon128.h via portability.h provides the shim) */
+/* SSE2 is required (on ARM/RISC-V, portability.h provides the shim). */
 #if !(defined(__aarch64__) || defined(__arm__) || defined(__ARM_NEON) ||      \
-      defined(_M_ARM64))
+      defined(_M_ARM64) || defined(__riscv))
 #include <emmintrin.h>
 #endif
 
diff --git a/include/simdfor.h b/include/simdfor.h
index 9e6fd06..5111f2f 100644
--- a/include/simdfor.h
+++ b/include/simdfor.h
@@ -6,9 +6,9 @@
 
 #include "portability.h"
 
-/* SSE2 is required (on ARM, neon128.h via portability.h provides the shim) */
+/* SSE2 is required (on ARM/RISC-V, portability.h provides the shim). */
 #if !(defined(__aarch64__) || defined(__arm__) || defined(__ARM_NEON) ||      \
-      defined(_M_ARM64))
+      defined(_M_ARM64) || defined(__riscv))
 #include <emmintrin.h>
 #endif
 
diff --git a/include/simdintegratedbitpacking.h b/include/simdintegratedbitpacking.h
index 545b742..abd65e2 100644
--- a/include/simdintegratedbitpacking.h
+++ b/include/simdintegratedbitpacking.h
@@ -7,9 +7,9 @@
 
 #include "portability.h"
 
-/* SSE2 is required (on ARM, neon128.h via portability.h provides the shim) */
+/* SSE2 is required (on ARM/RISC-V, portability.h provides the shim). */
 #if !(defined(__aarch64__) || defined(__arm__) || defined(__ARM_NEON) ||      \
-      defined(_M_ARM64))
+      defined(_M_ARM64) || defined(__riscv))
 #include <emmintrin.h>
 #endif
 
diff --git a/src/simdcomputil.c b/src/simdcomputil.c
index 87e45ca..50fbf3c 100644
--- a/src/simdcomputil.c
+++ b/src/simdcomputil.c
@@ -3,7 +3,7 @@
  */
 
 #include "simdcomputil.h"
-#ifdef __SSE4_1__
+#if defined(__SSE4_1__) && !defined(__riscv)
 #include <smmintrin.h>
 #endif
 #include <assert.h>
diff --git a/src/simdpackedsearch.c b/src/simdpackedsearch.c
index d650270..8ecfb86 100644
--- a/src/simdpackedsearch.c
+++ b/src/simdpackedsearch.c
@@ -2,10 +2,10 @@
  * This code is released under a BSD License.
  */
 #if defined(__SSE4_1__) || defined(__aarch64__) || defined(__arm__) ||        \
-    defined(__ARM_NEON) || defined(_M_ARM64)
+    defined(__ARM_NEON) || defined(_M_ARM64) || defined(__riscv)
 
 #include "simdintegratedbitpacking.h"
-#ifdef __SSE4_1__
+#if defined(__SSE4_1__) && !defined(__riscv)
 #include <smmintrin.h>
 #endif
 
diff --git a/src/simdpackedselect.c b/src/simdpackedselect.c
index 9e81002..75b7c75 100644
--- a/src/simdpackedselect.c
+++ b/src/simdpackedselect.c
@@ -2,9 +2,9 @@
  * This code is released under a BSD License.
  */
 #if defined(__SSE4_1__) || defined(__aarch64__) || defined(__arm__) ||        \
-    defined(__ARM_NEON) || defined(_M_ARM64)
+    defined(__ARM_NEON) || defined(_M_ARM64) || defined(__riscv)
 #include "simdintegratedbitpacking.h"
-#ifdef __SSE4_1__
+#if defined(__SSE4_1__) && !defined(__riscv)
 #include <smmintrin.h>
 #endif