Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ option(SIMDCOMP_NATIVE
"Tune for the building machine (-march=native); enables AVX2/AVX-512 on \
capable x86 hosts" ON)

string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SIMDCOMP_SYSTEM_PROCESSOR_LOWER)
set(SIMDCOMP_TARGET_IS_X86 FALSE)
set(SIMDCOMP_TARGET_IS_RISCV FALSE)
if(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^(x86_64|amd64|i[3-6]86)$")
set(SIMDCOMP_TARGET_IS_X86 TRUE)
elseif(SIMDCOMP_SYSTEM_PROCESSOR_LOWER MATCHES "^riscv")
set(SIMDCOMP_TARGET_IS_RISCV TRUE)
message(STATUS "RISC-V target detected; using scalar 128-bit compatibility shim")
endif()

# Default to an optimized build when the user did not pick one.
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
Expand Down Expand Up @@ -55,14 +65,20 @@ set_target_properties(simdcomp PROPERTIES
SOVERSION ${PROJECT_VERSION_MAJOR}
POSITION_INDEPENDENT_CODE ON)

if(SIMDCOMP_TARGET_IS_RISCV)
target_compile_definitions(simdcomp PRIVATE __riscv=1 __riscv_xlen=64)
endif()

# -march=native (when requested and supported by the compiler).
include(CheckCCompilerFlag)
set(SIMDCOMP_HAS_MARCH_NATIVE FALSE)
if(SIMDCOMP_NATIVE)
if(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_X86)
check_c_compiler_flag("-march=native" SIMDCOMP_HAS_MARCH_NATIVE)
if(SIMDCOMP_HAS_MARCH_NATIVE)
target_compile_options(simdcomp PRIVATE -march=native)
endif()
elseif(SIMDCOMP_NATIVE AND SIMDCOMP_TARGET_IS_RISCV)
message(STATUS "Skipping -march=native for RISC-V target")
endif()

# Warnings, mirroring the previous Makefile, on GCC/Clang only.
Expand All @@ -77,6 +93,12 @@ function(simdcomp_apply_native target)
endif()
endfunction()

function(simdcomp_apply_riscv_defs target)
if(SIMDCOMP_TARGET_IS_RISCV)
target_compile_definitions(${target} PRIVATE __riscv=1 __riscv_xlen=64)
endif()
endfunction()

# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
Expand All @@ -86,11 +108,13 @@ if(SIMDCOMP_BUILD_TESTS)
add_executable(unit tests/unit.c)
target_link_libraries(unit PRIVATE simdcomp)
simdcomp_apply_native(unit)
simdcomp_apply_riscv_defs(unit)
add_test(NAME unit COMMAND unit)

add_executable(unit_chars tests/unit_chars.c)
target_link_libraries(unit_chars PRIVATE simdcomp)
simdcomp_apply_native(unit_chars)
simdcomp_apply_riscv_defs(unit_chars)
add_test(NAME unit_chars COMMAND unit_chars)
endif()

Expand All @@ -101,6 +125,7 @@ if(SIMDCOMP_BUILD_EXAMPLES)
add_executable(example example/example.c)
target_link_libraries(example PRIVATE simdcomp)
simdcomp_apply_native(example)
simdcomp_apply_riscv_defs(example)
endif()

# ---------------------------------------------------------------------------
Expand All @@ -127,10 +152,12 @@ if(SIMDCOMP_BUILD_BENCHMARKS)
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON)
simdcomp_apply_native(bitpackingbenchmark)
simdcomp_apply_riscv_defs(bitpackingbenchmark)

add_executable(benchmark benchmarks/benchmark.c)
target_link_libraries(benchmark PRIVATE simdcomp)
simdcomp_apply_native(benchmark)
simdcomp_apply_riscv_defs(benchmark)
endif()

# ---------------------------------------------------------------------------
Expand Down
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
which can easily translate into more than 8 decoded billions integers per second.

It runs on both x86/x64 (SSE/AVX) and 64-bit ARM (NEON, e.g. Apple Silicon). See
It runs on x86/x64 (SSE/AVX), 64-bit ARM (NEON, e.g. Apple Silicon), and
RISC-V through a conservative scalar compatibility backend. See
[Platforms](#platforms) below.

This library is part of the [Awesome C](https://github.com/kozross/awesome-c) list of C resources.
Expand All @@ -39,6 +40,7 @@ Requirements

- On x86/x64: your processor should support SSE4.1 (supported by most Intel and AMD processors released since 2008). The core bit-packing functions only require SSE2 (Pentium4 or better).
- On ARM: an AArch64/ARM processor with NEON (e.g. Apple Silicon). The SSE intrinsics are mapped to NEON by our own self-contained shim (`include/neon128.h`); no third-party translation library is pulled in.
- On RISC-V: the library builds through a small scalar 128-bit compatibility shim (`include/riscv128.h`). This preserves the existing API but does not provide RVV acceleration.
- A C99 (or better) compiler, plus a C++17 compiler if you build the benchmarks.
- CMake 3.14 or better.

Expand All @@ -47,7 +49,7 @@ For a plain C version that does not use SIMD instructions, see https://github.co
Platforms
---------

The library supports two SIMD backends behind the same API:
The library supports three backends behind the same API:

- **x86 / x64** — Intel/AMD SSE (with optional AVX2 and AVX-512 code paths,
enabled automatically when you build with `-march=native` on a capable host).
Expand All @@ -57,10 +59,15 @@ The library supports two SIMD backends behind the same API:
written directly against `<arm_neon.h>`; no third-party translation layer
(such as sse2neon) is pulled in. The wider AVX2/AVX-512 paths are x86-only and
are simply inactive on ARM.

The public API is identical on both: it is selected automatically at compile
time, so the same source (including the `__m128i`-based entry points) builds on
either architecture.
- **RISC-V** — the same 128-bit kernel sources build through a conservative
scalar compatibility shim in `include/riscv128.h`. This keeps the existing
`__m128i`-based API available on RISC-V without pulling in any x86 headers,
while leaving AVX2/AVX-512 inactive. It is a portability path, not an RVV
optimization backend.

The public API is identical across these backends: it is selected automatically
at compile time, so the same source (including the `__m128i`-based entry
points) builds on each architecture.

Usage
-------
Expand Down
16 changes: 16 additions & 0 deletions include/portability.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ typedef signed char int8_t;
defined(_M_ARM64)
/* ARM NEON: use our own SSE-on-NEON shim instead of the x86 intrinsics. */
#include "neon128.h"
#elif defined(__riscv)
/* RISC-V: use a conservative scalar 128-bit shim; this is compatibility, not
* RVV acceleration. */
#include "riscv128.h"
#ifndef __SSE2__
#define __SSE2__ 1
#endif
#ifndef __SSSE3__
#define __SSSE3__ 1
#endif
#ifndef __SSE4_1__
#define __SSE4_1__ 1
#endif
#ifndef __SSE4_2__
#define __SSE4_2__ 1
#endif
#else
#include <x86intrin.h>
#endif
Expand Down
Loading