Skip to content

Commit c12904b

Browse files
authored
AVX 512 support (#114)
1 parent a82c024 commit c12904b

File tree

17 files changed

+781
-40
lines changed

17 files changed

+781
-40
lines changed

.github/workflows/ci.yml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,21 @@ jobs:
138138
run: cargo +stable install rustfilt
139139
- name: Check x86_64 inlining
140140
run: |
141-
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt
142-
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt "--features public_imp"
143-
RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx2.txt
144141
RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-avx2.txt --no-default-features
145142
RUSTFLAGS="-C target-feature=+sse4.2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-sse42.txt --no-default-features
143+
- name: Check x86_64 inlining with avx2 autoselection
144+
run: |
145+
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-old.txt
146+
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-old.txt "--features public_imp"
147+
RUSTFLAGS="-C target-feature=+avx2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx2.txt
148+
if: ${{ matrix.toolchain == '1.38.0' }}
149+
- name: Check x86_64 inlining with avx512 autoselection
150+
run: |
151+
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt
152+
./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std.txt "--features public_imp"
153+
RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-std-avx512.txt
154+
RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2" ./check-inlining.sh x86_64-unknown-linux-gnu expected-methods-x86-nostd-avx512.txt --no-default-features
155+
if: ${{ matrix.toolchain != '1.38.0' }}
146156
- uses: dtolnay/rust-toolchain@master
147157
with:
148158
toolchain: ${{ matrix.toolchain }}

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,6 @@ targets = ["aarch64-unknown-linux-gnu", "wasm32-unknown-unknown", "wasm32-wasip1
5353

5454
[dependencies]
5555
flexpect = "0.1.1"
56+
57+
[build-dependencies]
58+
rustversion = "1.0.22"

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ This library has been thoroughly tested with sample data as well as fuzzing and
1313
## Features
1414
* `basic` API for the fastest validation, optimized for valid UTF-8
1515
* `compat` API as a fully compatible replacement for `std::str::from_utf8()`
16+
* 🆕 AVX 512 support on modern x86/x86-64 CPUs since Rust 1.89
1617
* Supports AVX 2 and SSE 4.2 implementations on x86 and x86-64
1718
* ARM64 (aarch64) SIMD is supported since Rust 1.61
1819
* WASM (wasm32) SIMD is supported
1920
* 🆕 armv7 NEON support with the `armv7_neon` feature on nightly Rust
20-
* x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCI
21+
* x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCII
2122
* aarch64: Up to eleven times faster than the std library on valid non-ASCII, up to four times faster on ASCII (Apple Silicon)
2223
* Faster than the original simdjson implementation
2324
* Selects the fastest implementation at runtime based on CPU support (on x86)
@@ -71,14 +72,17 @@ This comes at a slight performance penalty compared to the `basic` API even if t
7172
## Implementation selection
7273

7374
### X86
74-
The fastest implementation is selected at runtime using the `std::is_x86_feature_detected!` macro, unless the CPU
75-
targeted by the compiler supports the fastest available implementation.
76-
So if you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine, the AVX 2 implementation is selected at
77-
compile-time and runtime selection is disabled.
75+
The fastest implementation is usually selected at runtime using the `std::is_x86_feature_detected!` macro. The AVX 512
76+
implementation requires Rust 1.89 or late and is only selected if the CPU support the VBMI2 features to avoid throttling
77+
happening with CPUs before Intels Ice Lake microarchitecture.
78+
79+
If you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine whichs support AVX 512 with Rust 1.89 or later,
80+
the AVX 512 implementation is selected at compile-time and runtime selection is disabled.
7881

7982
For no-std support (compiled with `--no-default-features`) the implementation is always selected at compile time based on
8083
the targeted CPU. Use `RUSTFLAGS="-C target-feature=+avx2"` for the AVX 2 implementation or `RUSTFLAGS="-C target-feature=+sse4.2"`
81-
for the SSE 4.2 implementation.
84+
for the SSE 4.2 implementation. For AVX 512 use `RUSTFLAGS="-C target-feature=+avx512f,+avx512bw,+avx512vbmi,+avx512vbmi2"` with
85+
Rust 1.89 or later.
8286

8387
### ARM64
8488
The SIMD implementation is used automatically since Rust 1.61.

build.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
fn main() {
2+
println!("cargo::rustc-check-cfg=cfg(avx512_stable)");
3+
// `if rustversion::cfg!(...)` is not supported in older Rust versions
4+
if avx512_stable() {
5+
println!("cargo:rustc-cfg=avx512_stable");
6+
}
7+
}
8+
9+
#[rustversion::since(1.89)]
10+
fn avx512_stable() -> bool {
11+
true
12+
}
13+
14+
#[rustversion::before(1.89)]
15+
fn avx512_stable() -> bool {
16+
false
17+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
simdutf8::implementation::helpers::get_compat_error
2+
simdutf8::implementation::x86::validate_utf8_basic
3+
simdutf8::implementation::x86::validate_utf8_basic_avx512
4+
simdutf8::implementation::x86::validate_utf8_compat
5+
simdutf8::implementation::x86::validate_utf8_compat_avx512
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
simdutf8::implementation::helpers::get_compat_error
2+
simdutf8::implementation::x86::validate_utf8_basic
3+
simdutf8::implementation::x86::validate_utf8_basic_avx512
4+
simdutf8::implementation::x86::validate_utf8_compat
5+
simdutf8::implementation::x86::validate_utf8_compat_avx512
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
simdutf8::implementation::helpers::get_compat_error
2+
simdutf8::implementation::validate_utf8_basic_fallback
3+
simdutf8::implementation::validate_utf8_compat_fallback
4+
simdutf8::implementation::x86::avx2::validate_utf8_basic
5+
simdutf8::implementation::x86::avx2::validate_utf8_compat
6+
simdutf8::implementation::x86::sse42::validate_utf8_basic
7+
simdutf8::implementation::x86::sse42::validate_utf8_compat
8+
simdutf8::implementation::x86::validate_utf8_basic::get_fastest
9+
simdutf8::implementation::x86::validate_utf8_compat::get_fastest

inlining/expected-methods-x86-std.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ simdutf8::implementation::validate_utf8_basic_fallback
33
simdutf8::implementation::validate_utf8_compat_fallback
44
simdutf8::implementation::x86::avx2::validate_utf8_basic
55
simdutf8::implementation::x86::avx2::validate_utf8_compat
6+
simdutf8::implementation::x86::avx512::validate_utf8_basic
7+
simdutf8::implementation::x86::avx512::validate_utf8_compat
68
simdutf8::implementation::x86::sse42::validate_utf8_basic
79
simdutf8::implementation::x86::sse42::validate_utf8_compat
810
simdutf8::implementation::x86::validate_utf8_basic::get_fastest

src/basic.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,16 @@ pub mod imp {
197197
/// Includes the x86/x86-64 SIMD implementations.
198198
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
199199
pub mod x86 {
200+
/// Includes the validation implementation for AVX 512-compatible CPUs.
201+
///
202+
/// Using the provided functionality on CPUs which do not support AVX 512 is undefined
203+
/// behavior and will very likely cause a crash.
204+
#[cfg(avx512_stable)]
205+
pub mod avx512 {
206+
pub use crate::implementation::x86::avx512::validate_utf8_basic as validate_utf8;
207+
pub use crate::implementation::x86::avx512::ChunkedUtf8ValidatorImp;
208+
pub use crate::implementation::x86::avx512::Utf8ValidatorImp;
209+
}
200210
/// Includes the validation implementation for AVX 2-compatible CPUs.
201211
///
202212
/// Using the provided functionality on CPUs which do not support AVX 2 is undefined

src/compat.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ pub mod imp {
105105
/// Includes the x86/x86-64 SIMD implementations.
106106
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
107107
pub mod x86 {
108+
/// Includes the validation implementation for AVX 512-compatible CPUs.
109+
#[cfg(avx512_stable)]
110+
pub mod avx512 {
111+
pub use crate::implementation::x86::avx512::validate_utf8_compat as validate_utf8;
112+
}
108113
/// Includes the validation implementation for AVX 2-compatible CPUs.
109114
pub mod avx2 {
110115
pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8;

0 commit comments

Comments
 (0)