From 7cf772b787985f5528f257e78467cafff490c6c3 Mon Sep 17 00:00:00 2001 From: Yevhenii Reizner Date: Sat, 27 May 2023 23:38:42 +0300 Subject: [PATCH 1/2] Initial commit. --- .github/workflows/main.yml | 23 +- Cargo.toml | 7 +- path/src/f32x2_t.rs | 109 --- path/src/f32x4_t.rs | 80 --- path/src/lib.rs | 14 +- path/src/path_geometry.rs | 18 +- path/src/rect.rs | 16 +- src/lib.rs | 2 +- src/pipeline/highp.rs | 184 ++--- src/pipeline/lowp.rs | 235 ++++--- src/pipeline/mod.rs | 4 +- src/scan/hairline.rs | 55 +- src/shaders/linear_gradient.rs | 4 +- src/shaders/radial_gradient.rs | 3 +- src/wide/f32x16_t.rs | 138 ---- src/wide/f32x4_t.rs | 640 ------------------ src/wide/f32x8_t.rs | 403 ----------- src/wide/i32x4_t.rs | 281 -------- src/wide/i32x8_t.rs | 192 ------ src/wide/mod.rs | 72 -- src/wide/u16x16_t.rs | 250 ------- src/wide/u32x4_t.rs | 191 ------ src/wide/u32x8_t.rs | 127 ---- tests/images/canvas/draw-pixmap-opacity.png | Bin 1585 -> 8055 bytes .../three-stops-evenly-spaced-lq.png | Bin 1659 -> 37525 bytes .../gradients/two-stops-linear-pad-lq.png | Bin 1661 -> 32338 bytes .../gradients/two-stops-linear-reflect-lq.png | Bin 1672 -> 41386 bytes .../gradients/two-stops-linear-repeat-lq.png | Bin 1845 -> 42253 bytes .../two-stops-unevenly-spaced-lq.png | Bin 1616 -> 32426 bytes 29 files changed, 324 insertions(+), 2724 deletions(-) delete mode 100644 path/src/f32x2_t.rs delete mode 100644 path/src/f32x4_t.rs delete mode 100644 src/wide/f32x16_t.rs delete mode 100644 src/wide/f32x4_t.rs delete mode 100644 src/wide/f32x8_t.rs delete mode 100644 src/wide/i32x4_t.rs delete mode 100644 src/wide/i32x8_t.rs delete mode 100644 src/wide/mod.rs delete mode 100644 src/wide/u16x16_t.rs delete mode 100644 src/wide/u32x4_t.rs delete mode 100644 src/wide/u32x8_t.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b84bc29..90667b2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,8 +11,7 @@ jobs: strategy: matrix: rust: - - 1.57.0 - - stable + - nightly steps: - name: Checkout uses: actions/checkout@v2 @@ -23,8 +22,8 @@ jobs: toolchain: ${{ matrix.rust }} override: true - - name: Build with minimal features (no_std) - run: cargo build --verbose --no-default-features --features no-std-float + # - name: Build with minimal features (no_std) + # run: cargo build --verbose --no-default-features --features no-std-float - name: Run tests for tiny-skia-path working-directory: path @@ -62,7 +61,7 @@ jobs: - name: Install toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: nightly override: true target: wasm32-wasi @@ -71,8 +70,8 @@ jobs: curl https://wasmtime.dev/install.sh -sSf | bash echo "$HOME/.wasmtime/bin" >> $GITHUB_PATH - - name: Build with minimal features (no_std) - run: cargo build --target wasm32-wasi --verbose --no-default-features --features no-std-float + # - name: Build with minimal features (no_std) + # run: cargo build --target wasm32-wasi --verbose --no-default-features --features no-std-float - name: Run tests without SIMD run: cargo test --target wasm32-wasi --verbose --no-default-features --features png-format @@ -91,18 +90,18 @@ jobs: - name: Install toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: nightly override: true target: aarch64-unknown-linux-gnu - name: Install cross run: cargo install cross - - name: Build with minimal features (no_std) - run: cross build --target aarch64-unknown-linux-gnu --verbose --no-default-features --features no-std-float + # - name: Build with minimal features (no_std) + # run: cross build --target aarch64-unknown-linux-gnu --verbose --no-default-features --features no-std-float - - name: Run tests without SIMD - run: cross test --target aarch64-unknown-linux-gnu --verbose --no-default-features --features png-format + # - name: Run tests without SIMD + # run: cross test --target aarch64-unknown-linux-gnu --verbose --no-default-features --features png-format - name: Run tests with Neon run: cross test --target aarch64-unknown-linux-gnu diff --git a/Cargo.toml b/Cargo.toml index e80a97c..52de6e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,17 +24,12 @@ png = { version = "0.17", optional = true } tiny-skia-path = { version = "0.10.0", path = "path", default-features = false } [features] -default = ["std", "simd", "png-format"] +default = ["std", "png-format"] # Enables the use of the standard library. Deactivate this and activate the no-std-float # feature to compile for targets that don't have std. std = ["tiny-skia-path/std"] no-std-float = ["tiny-skia-path/no-std-float"] -# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128) -# and AArch64 (Neon). -# Has no effect on other targets. Present mainly for testing. -simd = [] - # Allows loading and saving `Pixmap` as PNG. png-format = ["std", "png"] diff --git a/path/src/f32x2_t.rs b/path/src/f32x2_t.rs deleted file mode 100644 index e471f42..0000000 --- a/path/src/f32x2_t.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#[cfg(all(not(feature = "std"), feature = "no-std-float"))] -use crate::NoStdFloat; - -// Right now, there are no visible benefits of using SIMD for f32x2. So we don't. -/// A pair of f32 numbers. -/// -/// Mainly for internal use. Do not rely on it! -#[allow(non_camel_case_types)] -#[derive(Copy, Clone, Default, PartialEq, Debug)] -pub struct f32x2(pub [f32; 2]); - -impl f32x2 { - /// Creates a new pair. - pub fn new(a: f32, b: f32) -> f32x2 { - f32x2([a, b]) - } - - /// Creates a new pair from a single value. - pub fn splat(x: f32) -> f32x2 { - f32x2([x, x]) - } - - /// Returns an absolute value. - pub fn abs(self) -> f32x2 { - f32x2([self.x().abs(), self.y().abs()]) - } - - /// Returns a minimum value. - pub fn min(self, other: f32x2) -> f32x2 { - f32x2([pmin(self.x(), other.x()), pmin(self.y(), other.y())]) - } - - /// Returns a maximum value. - pub fn max(self, other: f32x2) -> f32x2 { - f32x2([pmax(self.x(), other.x()), pmax(self.y(), other.y())]) - } - - /// Returns a maximum of both values. - pub fn max_component(self) -> f32 { - pmax(self.x(), self.y()) - } - - /// Returns the first value. - pub fn x(&self) -> f32 { - self.0[0] - } - - /// Returns the second value. - pub fn y(&self) -> f32 { - self.0[1] - } -} - -impl core::ops::Add for f32x2 { - type Output = f32x2; - - fn add(self, other: f32x2) -> f32x2 { - f32x2([self.x() + other.x(), self.y() + other.y()]) - } -} - -impl core::ops::Sub for f32x2 { - type Output = f32x2; - - fn sub(self, other: f32x2) -> f32x2 { - f32x2([self.x() - other.x(), self.y() - other.y()]) - } -} - -impl core::ops::Mul for f32x2 { - type Output = f32x2; - - fn mul(self, other: f32x2) -> f32x2 { - f32x2([self.x() * other.x(), self.y() * other.y()]) - } -} - -impl core::ops::Div for f32x2 { - type Output = f32x2; - - fn div(self, other: f32x2) -> f32x2 { - f32x2([self.x() / other.x(), self.y() / other.y()]) - } -} - -// A faster and more forgiving f32 min/max implementation. -// -// Unlike std one, we do not care about NaN. - -fn pmax(a: f32, b: f32) -> f32 { - if a < b { - b - } else { - a - } -} - -fn pmin(a: f32, b: f32) -> f32 { - if b < a { - b - } else { - a - } -} diff --git a/path/src/f32x4_t.rs b/path/src/f32x4_t.rs deleted file mode 100644 index e591b55..0000000 --- a/path/src/f32x4_t.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Right now, there are no visible benefits of using SIMD for f32x4. So we don't. -#[derive(Default, Clone, Copy, PartialEq, Debug)] -#[repr(C, align(16))] -pub struct f32x4(pub [f32; 4]); - -impl f32x4 { - pub fn max(self, rhs: Self) -> Self { - Self([ - self.0[0].max(rhs.0[0]), - self.0[1].max(rhs.0[1]), - self.0[2].max(rhs.0[2]), - self.0[3].max(rhs.0[3]), - ]) - } - - pub fn min(self, rhs: Self) -> Self { - Self([ - self.0[0].min(rhs.0[0]), - self.0[1].min(rhs.0[1]), - self.0[2].min(rhs.0[2]), - self.0[3].min(rhs.0[3]), - ]) - } -} - -impl core::ops::Add for f32x4 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - Self([ - self.0[0] + rhs.0[0], - self.0[1] + rhs.0[1], - self.0[2] + rhs.0[2], - self.0[3] + rhs.0[3], - ]) - } -} - -impl core::ops::AddAssign for f32x4 { - fn add_assign(&mut self, rhs: f32x4) { - *self = *self + rhs; - } -} - -impl core::ops::Sub for f32x4 { - type Output = Self; - - fn sub(self, rhs: Self) -> Self::Output { - Self([ - self.0[0] - rhs.0[0], - self.0[1] - rhs.0[1], - self.0[2] - rhs.0[2], - self.0[3] - rhs.0[3], - ]) - } -} - -impl core::ops::Mul for f32x4 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - Self([ - self.0[0] * rhs.0[0], - self.0[1] * rhs.0[1], - self.0[2] * rhs.0[2], - self.0[3] * rhs.0[3], - ]) - } -} - -impl core::ops::MulAssign for f32x4 { - fn mul_assign(&mut self, rhs: f32x4) { - *self = *self * rhs; - } -} diff --git a/path/src/lib.rs b/path/src/lib.rs index e80a70c..23eb068 100644 --- a/path/src/lib.rs +++ b/path/src/lib.rs @@ -12,6 +12,7 @@ //! //! Note that all types use single precision floats (`f32`), just like [Skia](https://skia.org/). +#![feature(portable_simd)] #![no_std] #![warn(missing_docs)] #![warn(missing_copy_implementations)] @@ -36,8 +37,6 @@ extern crate std; extern crate alloc; mod dash; -mod f32x2_t; -mod f32x4_t; mod floating_point; mod path; mod path_builder; @@ -49,7 +48,6 @@ mod stroker; mod transform; pub use dash::StrokeDash; -pub use f32x2_t::f32x2; pub use floating_point::*; pub use path::*; pub use path_builder::*; @@ -86,16 +84,6 @@ impl Point { Point { x, y } } - /// Creates a new `Point` from `f32x2`. - pub fn from_f32x2(r: f32x2) -> Self { - Point::from_xy(r.x(), r.y()) - } - - /// Converts a `Point` into a `f32x2`. - pub fn to_f32x2(&self) -> f32x2 { - f32x2::new(self.x, self.y) - } - /// Creates a point at 0x0 position. pub fn zero() -> Self { Point { x: 0.0, y: 0.0 } diff --git a/path/src/path_geometry.rs b/path/src/path_geometry.rs index d4c3746..ebd101c 100644 --- a/path/src/path_geometry.rs +++ b/path/src/path_geometry.rs @@ -10,9 +10,10 @@ #![allow(missing_docs)] +use core::simd::f32x2; + use crate::{Point, Transform}; -use crate::f32x2_t::f32x2; use crate::floating_point::FLOAT_PI; use crate::scalar::{Scalar, SCALAR_NEARLY_ZERO, SCALAR_ROOT_2_OVER_2}; @@ -22,6 +23,21 @@ use crate::path_builder::PathDirection; #[cfg(all(not(feature = "std"), feature = "no-std-float"))] use crate::NoStdFloat; +trait PointExt { + fn from_f32x2(r: f32x2) -> Self; + fn to_f32x2(&self) -> f32x2; +} + +impl PointExt for Point { + fn from_f32x2(r: f32x2) -> Self { + Point::from_xy(r.as_array()[0], r.as_array()[1]) + } + + fn to_f32x2(&self) -> f32x2 { + f32x2::from_array([self.x, self.y]) + } +} + // use for : eval(t) == A * t^2 + B * t + C #[derive(Clone, Copy, Default, Debug)] pub struct QuadCoeff { diff --git a/path/src/rect.rs b/path/src/rect.rs index d199f9d..06b7f1c 100644 --- a/path/src/rect.rs +++ b/path/src/rect.rs @@ -345,7 +345,7 @@ impl Rect { /// /// Returns None if count is zero or if Point array contains an infinity or NaN. pub fn from_points(points: &[Point]) -> Option { - use crate::f32x4_t::f32x4; + use core::simd::{f32x4, SimdFloat}; if points.is_empty() { return None; @@ -356,13 +356,13 @@ impl Rect { let mut max; if points.len() & 1 != 0 { let pt = points[0]; - min = f32x4([pt.x, pt.y, pt.x, pt.y]); + min = f32x4::from_array([pt.x, pt.y, pt.x, pt.y]); max = min; offset += 1; } else { let pt0 = points[0]; let pt1 = points[1]; - min = f32x4([pt0.x, pt0.y, pt1.x, pt1.y]); + min = f32x4::from_array([pt0.x, pt0.y, pt1.x, pt1.y]); max = min; offset += 2; } @@ -371,17 +371,17 @@ impl Rect { while offset != points.len() { let pt0 = points[offset + 0]; let pt1 = points[offset + 1]; - let xy = f32x4([pt0.x, pt0.y, pt1.x, pt1.y]); + let xy = f32x4::from_array([pt0.x, pt0.y, pt1.x, pt1.y]); accum *= xy; - min = min.min(xy); - max = max.max(xy); + min = min.simd_min(xy); + max = max.simd_max(xy); offset += 2; } let all_finite = accum * f32x4::default() == f32x4::default(); - let min: [f32; 4] = min.0; - let max: [f32; 4] = max.0; + let min: &[f32; 4] = min.as_array(); + let max: &[f32; 4] = max.as_array(); if all_finite { Rect::from_ltrb( min[0].min(min[2]), diff --git a/src/lib.rs b/src/lib.rs index 38de1cb..f11f8a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ and a user should manage the world transform, clipping mask and style manually. See the `examples/` directory for usage examples. */ +#![feature(portable_simd)] #![no_std] #![warn(missing_docs)] #![warn(missing_copy_implementations)] @@ -52,7 +53,6 @@ mod pipeline; mod pixmap; mod scan; mod shaders; -mod wide; mod painter; // Keep it under `pixmap` for a better order in the docs. diff --git a/src/pipeline/highp.rs b/src/pipeline/highp.rs index bcb113c..66a3ecb 100644 --- a/src/pipeline/highp.rs +++ b/src/pipeline/highp.rs @@ -15,11 +15,12 @@ For some reason, we are almost 2x slower. Maybe because Skia uses clang's vector and we're using a manual implementation. */ +use std::simd::{f32x8, i32x8, u32x8, StdFloat, SimdFloat, SimdPartialOrd, SimdPartialEq}; + use crate::{PremultipliedColorU8, SpreadMode, PixmapRef}; use crate::geom::ScreenIntRect; use crate::pixmap::SubPixmapMut; -use crate::wide::{f32x8, i32x8, u32x8}; pub const STAGE_WIDTH: usize = 8; @@ -125,6 +126,16 @@ pub fn fn_ptr(f: StageFn) -> *const () { f as *const () } +trait F32x8Ext { + fn normalize(self) -> Self; +} + +impl F32x8Ext for f32x8 { + fn normalize(self) -> Self { + self.simd_max(f32x8::default()).simd_min(f32x8::splat(1.0)) + } +} + #[inline(never)] pub fn start( functions: &[StageFn], @@ -209,19 +220,19 @@ fn move_destination_to_source(p: &mut Pipeline) { } fn clamp_0(p: &mut Pipeline) { - p.r = p.r.max(f32x8::default()); - p.g = p.g.max(f32x8::default()); - p.b = p.b.max(f32x8::default()); - p.a = p.a.max(f32x8::default()); + p.r = p.r.simd_max(f32x8::default()); + p.g = p.g.simd_max(f32x8::default()); + p.b = p.b.simd_max(f32x8::default()); + p.a = p.a.simd_max(f32x8::default()); p.next_stage(); } fn clamp_a(p: &mut Pipeline) { - p.r = p.r.min(f32x8::splat(1.0)); - p.g = p.g.min(f32x8::splat(1.0)); - p.b = p.b.min(f32x8::splat(1.0)); - p.a = p.a.min(f32x8::splat(1.0)); + p.r = p.r.simd_min(f32x8::splat(1.0)); + p.g = p.g.simd_min(f32x8::splat(1.0)); + p.b = p.b.simd_min(f32x8::splat(1.0)); + p.a = p.a.simd_min(f32x8::splat(1.0)); p.next_stage(); } @@ -301,10 +312,10 @@ fn gather_ix(pixmap: PixmapRef, mut x: f32x8, mut y: f32x8) -> u32x8 { // Exclusive -> inclusive. let w = ulp_sub(pixmap.width() as f32); let h = ulp_sub(pixmap.height() as f32); - x = x.max(f32x8::default()).min(f32x8::splat(w)); - y = y.max(f32x8::default()).min(f32x8::splat(h)); + x = x.simd_max(f32x8::default()).simd_min(f32x8::splat(w)); + y = y.simd_max(f32x8::default()).simd_min(f32x8::splat(h)); - (y.trunc_int() * i32x8::splat(pixmap.width() as i32) + x.trunc_int()).to_u32x8_bitcast() + (y.trunc().cast::() * i32x8::splat(pixmap.width() as i32) + x.trunc().cast::()).cast() } #[inline(always)] @@ -405,15 +416,15 @@ blend_fn!(source_in, |s, _, _, da| s * da); blend_fn!(destination_in, |_, d, sa, _| d * sa); blend_fn!(source_out, |s, _, _, da| s * inv(da)); blend_fn!(destination_out, |_, d, sa, _| d * inv(sa)); -blend_fn!(source_over, |s, d, sa, _| mad(d, inv(sa), s)); -blend_fn!(destination_over, |s, d, _, da| mad(s, inv(da), d)); +blend_fn!(source_over, |s, d: f32x8, sa, _| d.mul_add(inv(sa), s)); +blend_fn!(destination_over, |s: f32x8, d, _, da| s.mul_add(inv(da), d)); blend_fn!(modulate, |s, d, _, _| s * d); blend_fn!(multiply, |s, d, sa, da| s * inv(da) + d * inv(sa) + s * d); blend_fn!(screen, |s, d, _, _| s + d - s * d); blend_fn!(xor, |s, d, sa, da| s * inv(da) + d * inv(sa)); // Wants a type for some reason. -blend_fn!(plus, |s: f32x8, d: f32x8, _, _| (s + d).min(f32x8::splat(1.0))); +blend_fn!(plus, |s: f32x8, d: f32x8, _, _| (s + d).simd_min(f32x8::splat(1.0))); macro_rules! blend_fn2 { ($name:ident, $f:expr) => { @@ -422,54 +433,54 @@ macro_rules! blend_fn2 { p.r = $f(p.r, p.dr, p.a, p.da); p.g = $f(p.g, p.dg, p.a, p.da); p.b = $f(p.b, p.db, p.a, p.da); - p.a = mad(p.da, inv(p.a), p.a); + p.a = p.da.mul_add(inv(p.a), p.a); p.next_stage(); } }; } -blend_fn2!(darken, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).max(d * sa)); -blend_fn2!(lighten, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).min(d * sa)); -blend_fn2!(difference, |s: f32x8, d, sa, da: f32x8| s + d - two((s * da).min(d * sa))); +blend_fn2!(darken, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).simd_max(d * sa)); +blend_fn2!(lighten, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).simd_min(d * sa)); +blend_fn2!(difference, |s: f32x8, d, sa, da: f32x8| s + d - two((s * da).simd_min(d * sa))); blend_fn2!(exclusion, |s: f32x8, d, _, _| s + d - two(s * d)); blend_fn2!(color_burn, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| - d.cmp_eq(da).blend( + d.simd_eq(da).select( d + s * inv(da), - s.cmp_eq(f32x8::default()).blend( + s.simd_eq(f32x8::default()).select( d * inv(sa), - sa * (da - da.min((da - d) * sa * s.recip_fast())) + s * inv(da) + d * inv(sa) + sa * (da - da.simd_min((da - d) * sa * s.recip())) + s * inv(da) + d * inv(sa) ) ) ); blend_fn2!(color_dodge, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| - d.cmp_eq(f32x8::default()).blend( + d.simd_eq(f32x8::default()).select( s * inv(da), - s.cmp_eq(sa).blend( + s.simd_eq(sa).select( s + d * inv(sa), - sa * da.min((d * sa) * (sa - s).recip_fast()) + s * inv(da) + d * inv(sa) + sa * da.simd_min((d * sa) * (sa - s).recip()) + s * inv(da) + d * inv(sa) ) ) ); blend_fn2!(hard_light, |s: f32x8, d: f32x8, sa, da| - s * inv(da) + d * inv(sa) + two(s).cmp_le(sa).blend( + s * inv(da) + d * inv(sa) + two(s).simd_le(sa).select( two(s * d), sa * da - two((da - d) * (sa - s)) ) ); blend_fn2!(overlay, |s: f32x8, d: f32x8, sa, da| - s * inv(da) + d * inv(sa) + two(d).cmp_le(da).blend( + s * inv(da) + d * inv(sa) + two(d).simd_le(da).select( two(s * d), sa * da - two((da - d) * (sa - s)) ) ); blend_fn2!(soft_light, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| { - let m = da.cmp_gt(f32x8::default()).blend(d / da, f32x8::default()); + let m = da.simd_gt(f32x8::default()).select(d / da, f32x8::default()); let s2 = two(s); let m4 = two(two(m)); @@ -481,9 +492,9 @@ blend_fn2!(soft_light, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| { let dark_dst = (m4 * m4 + m4) * (m - f32x8::splat(1.0)) + f32x8::splat(7.0) * m; let lite_dst = m.sqrt() - m; let lite_src = d * sa + da * (s2 - sa) - * two(two(d)).cmp_le(da).blend(dark_dst, lite_dst); // 2 or 3? + * two(two(d)).simd_le(da).select(dark_dst, lite_dst); // 2 or 3? - s * inv(da) + d * inv(sa) + s2.cmp_le(sa).blend(dark_src, lite_src) // 1 or (2 or 3)? + s * inv(da) + d * inv(sa) + s2.simd_le(sa).select(dark_src, lite_src) // 1 or (2 or 3)? }); // We're basing our implementation of non-separable blend modes on @@ -600,7 +611,7 @@ fn luminosity_k( #[inline(always)] fn sat(r: f32x8, g: f32x8, b: f32x8) -> f32x8 { - r.max(g.max(b)) - r.min(g.min(b)) + r.simd_max(g.simd_max(b)) - r.simd_min(g.simd_min(b)) } #[inline(always)] @@ -610,13 +621,13 @@ fn lum(r: f32x8, g: f32x8, b: f32x8) -> f32x8 { #[inline(always)] fn set_sat(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, s: f32x8) { - let mn = r.min(g.min(*b)); - let mx = r.max(g.max(*b)); + let mn = r.simd_min(g.simd_min(*b)); + let mx = r.simd_max(g.simd_max(*b)); let sat = mx - mn; // Map min channel to 0, max channel to s, and scale the middle proportionally. - let scale = |c| sat.cmp_eq(f32x8::default()) - .blend(f32x8::default(), (c - mn) * s / sat); + let scale = |c| sat.simd_eq(f32x8::default()) + .select(f32x8::default(), (c - mn) * s / sat); *r = scale(*r); *g = scale(*g); @@ -633,14 +644,14 @@ fn set_lum(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, l: f32x8) { #[inline(always)] fn clip_color(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: f32x8) { - let mn = r.min(g.min(*b)); - let mx = r.max(g.max(*b)); + let mn = r.simd_min(g.simd_min(*b)); + let mx = r.simd_max(g.simd_max(*b)); let l = lum(*r, *g, *b); let clip = |mut c| { - c = mx.cmp_ge(f32x8::default()).blend(c, l + (c - l) * l / (l - mn)); - c = mx.cmp_gt(a).blend(l + (c - l) * (a - l) / (mx - l), c); - c = c.max(f32x8::default()); // Sometimes without this we may dip just a little negative. + c = mx.simd_ge(f32x8::default()).select(c, l + (c - l) * l / (l - mn)); + c = mx.simd_gt(a).select(l + (c - l) * (a - l) / (mx - l), c); + c = c.simd_max(f32x8::default()); // Sometimes without this we may dip just a little negative. c }; @@ -652,10 +663,10 @@ fn clip_color(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: f32x8) { pub fn source_over_rgba(p: &mut Pipeline) { let pixels = p.pixmap_dst.slice4_at_xy(p.dx, p.dy); load_8888(pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da); - p.r = mad(p.dr, inv(p.a), p.r); - p.g = mad(p.dg, inv(p.a), p.g); - p.b = mad(p.db, inv(p.a), p.b); - p.a = mad(p.da, inv(p.a), p.a); + p.r = p.dr.mul_add(inv(p.a), p.r); + p.g = p.dg.mul_add(inv(p.a), p.g); + p.b = p.db.mul_add(inv(p.a), p.b); + p.a = p.da.mul_add(inv(p.a), p.a); store_8888(&p.r, &p.g, &p.b, &p.a, pixels); p.next_stage(); @@ -664,10 +675,10 @@ pub fn source_over_rgba(p: &mut Pipeline) { pub fn source_over_rgba_tail(p: &mut Pipeline) { let pixels = p.pixmap_dst.slice_at_xy(p.dx, p.dy); load_8888_tail(p.tail, pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da); - p.r = mad(p.dr, inv(p.a), p.r); - p.g = mad(p.dg, inv(p.a), p.g); - p.b = mad(p.db, inv(p.a), p.b); - p.a = mad(p.da, inv(p.a), p.a); + p.r = p.dr.mul_add(inv(p.a), p.r); + p.g = p.dg.mul_add(inv(p.a), p.g); + p.b = p.db.mul_add(inv(p.a), p.b); + p.a = p.da.mul_add(inv(p.a), p.a); store_8888_tail(&p.r, &p.g, &p.b, &p.a, p.tail, pixels); p.next_stage(); @@ -676,8 +687,8 @@ pub fn source_over_rgba_tail(p: &mut Pipeline) { fn transform(p: &mut Pipeline) { let ts = &p.ctx.transform; - let tr = mad(p.r, f32x8::splat(ts.sx), mad(p.g, f32x8::splat(ts.kx), f32x8::splat(ts.tx))); - let tg = mad(p.r, f32x8::splat(ts.ky), mad(p.g, f32x8::splat(ts.sy), f32x8::splat(ts.ty))); + let tr = p.r.mul_add(f32x8::splat(ts.sx), p.g.mul_add(f32x8::splat(ts.kx), f32x8::splat(ts.tx))); + let tg = p.r.mul_add(f32x8::splat(ts.ky), p.g.mul_add(f32x8::splat(ts.sy), f32x8::splat(ts.ty))); p.r = tr; p.g = tg; @@ -757,11 +768,9 @@ fn bicubic(p: &mut Pipeline) { #[inline(always)] fn bicubic_near(t: f32x8) -> f32x8 { // 1/18 + 9/18t + 27/18t^2 - 21/18t^3 == t ( t ( -21/18t + 27/18) + 9/18) + 1/18 - mad( - t, - mad(t, - mad( - f32x8::splat(-21.0/18.0), + t.mul_add( + t.mul_add( + f32x8::splat(-21.0/18.0).mul_add( t, f32x8::splat(27.0/18.0), ), @@ -774,7 +783,7 @@ fn bicubic_near(t: f32x8) -> f32x8 { #[inline(always)] fn bicubic_far(t: f32x8) -> f32x8 { // 0/18 + 0/18*t - 6/18t^2 + 7/18t^3 == t^2 (7/18t - 6/18) - (t * t) * mad(f32x8::splat(7.0/18.0), t, f32x8::splat(-6.0/18.0)) + (t * t) * f32x8::splat(7.0/18.0).mul_add(t, f32x8::splat(-6.0/18.0)) } #[inline(always)] @@ -803,10 +812,10 @@ fn sampler_2x2( sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa); let w = wx[i] * wy[j]; - *r = mad(w, rr, *r); - *g = mad(w, gg, *g); - *b = mad(w, bb, *b); - *a = mad(w, aa, *a); + *r = w.mul_add(rr, *r); + *g = w.mul_add(gg, *g); + *b = w.mul_add(bb, *b); + *a = w.mul_add(aa, *a); x += one; } @@ -841,10 +850,10 @@ fn sampler_4x4( sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa); let w = wx[i] * wy[j]; - *r = mad(w, rr, *r); - *g = mad(w, gg, *g); - *b = mad(w, bb, *b); - *a = mad(w, aa, *a); + *r = w.mul_add(rr, *r); + *g = w.mul_add(gg, *g); + *b = w.mul_add(bb, *b); + *a = w.mul_add(aa, *a); x += one; } @@ -904,7 +913,7 @@ fn gradient(p: &mut Pipeline) { let mut idx = u32x8::default(); for i in 1..ctx.len { let tt = ctx.t_values[i].get(); - let n: u32x8 = bytemuck::cast([ + let n = u32x8::from_array([ (t[0] >= tt) as u32, (t[1] >= tt) as u32, (t[2] >= tt) as u32, @@ -925,7 +934,7 @@ fn gradient_lookup( ctx: &super::GradientCtx, idx: &u32x8, t: f32x8, r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8, ) { - let idx: [u32; 8] = bytemuck::cast(*idx); + let idx: &[u32; 8] = idx.as_array(); macro_rules! gather { ($d:expr, $c:ident) => { @@ -954,20 +963,20 @@ fn gradient_lookup( let bb = gather!(&ctx.biases, b); let ba = gather!(&ctx.biases, a); - *r = mad(t, fr, br); - *g = mad(t, fg, bg); - *b = mad(t, fb, bb); - *a = mad(t, fa, ba); + *r = t.mul_add(fr, br); + *g = t.mul_add(fg, bg); + *b = t.mul_add(fb, bb); + *a = t.mul_add(fa, ba); } fn evenly_spaced_2_stop_gradient(p: &mut Pipeline) { let ctx = &p.ctx.evenly_spaced_2_stop_gradient; let t = p.r; - p.r = mad(t, f32x8::splat(ctx.factor.r), f32x8::splat(ctx.bias.r)); - p.g = mad(t, f32x8::splat(ctx.factor.g), f32x8::splat(ctx.bias.g)); - p.b = mad(t, f32x8::splat(ctx.factor.b), f32x8::splat(ctx.bias.b)); - p.a = mad(t, f32x8::splat(ctx.factor.a), f32x8::splat(ctx.bias.a)); + p.r = t.mul_add(f32x8::splat(ctx.factor.r), f32x8::splat(ctx.bias.r)); + p.g = t.mul_add(f32x8::splat(ctx.factor.g), f32x8::splat(ctx.bias.g)); + p.b = t.mul_add(f32x8::splat(ctx.factor.b), f32x8::splat(ctx.bias.b)); + p.a = t.mul_add(f32x8::splat(ctx.factor.a), f32x8::splat(ctx.bias.a)); p.next_stage(); } @@ -1012,12 +1021,12 @@ fn mask_2pt_conical_degenerates(p: &mut Pipeline) { let ctx = &mut p.ctx.two_point_conical_gradient; let t = p.r; - let is_degenerate = t.cmp_le(f32x8::default()) | t.cmp_ne(t); - p.r = is_degenerate.blend(f32x8::default(), t); + let is_degenerate = t.simd_le(f32x8::default()) | t.simd_ne(t); + p.r = is_degenerate.select(f32x8::default(), t); - let is_not_degenerate = !is_degenerate.to_u32x8_bitcast(); - let is_not_degenerate: [u32; 8] = bytemuck::cast(is_not_degenerate); - ctx.mask = bytemuck::cast([ + let is_not_degenerate = !is_degenerate.to_int().cast::(); + let is_not_degenerate = is_not_degenerate.as_array(); + ctx.mask = u32x8::from_array([ if is_not_degenerate[0] != 0 { !0 } else { 0 }, if is_not_degenerate[1] != 0 { !0 } else { 0 }, if is_not_degenerate[2] != 0 { !0 } else { 0 }, @@ -1034,10 +1043,10 @@ fn mask_2pt_conical_degenerates(p: &mut Pipeline) { fn apply_vector_mask(p: &mut Pipeline) { let ctx = &p.ctx.two_point_conical_gradient; - p.r = (p.r.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast(); - p.g = (p.g.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast(); - p.b = (p.b.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast(); - p.a = (p.a.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast(); + p.r = (p.r.cast::() & ctx.mask).cast::(); + p.g = (p.g.cast::() & ctx.mask).cast::(); + p.b = (p.b.cast::() & ctx.mask).cast::(); + p.a = (p.a.cast::() & ctx.mask).cast::(); p.next_stage(); } @@ -1145,7 +1154,7 @@ fn store_8888_tail( #[inline(always)] fn unnorm(v: &f32x8) -> i32x8 { - (v.max(f32x8::default()).min(f32x8::splat(1.0)) * f32x8::splat(255.0)).round_int() + (v.simd_max(f32x8::default()).simd_min(f32x8::splat(1.0)) * f32x8::splat(255.0)).round().cast() } #[inline(always)] @@ -1158,12 +1167,7 @@ fn two(v: f32x8) -> f32x8 { v + v } -#[inline(always)] -fn mad(f: f32x8, m: f32x8, a: f32x8) -> f32x8 { - f * m + a -} - #[inline(always)] fn lerp(from: f32x8, to: f32x8, t: f32x8) -> f32x8 { - mad(to - from, t, from) + (to - from).mul_add(t, from) } diff --git a/src/pipeline/lowp.rs b/src/pipeline/lowp.rs index df0a1d3..a7f4de0 100644 --- a/src/pipeline/lowp.rs +++ b/src/pipeline/lowp.rs @@ -28,10 +28,11 @@ we are still 40-60% behind Skia built for Haswell. On ARM AArch64 the story is different and explicit SIMD make our code up to 2-3x faster. */ +use std::simd::{u16x16, f32x16, StdFloat, SimdFloat, SimdPartialOrd}; + use crate::PremultipliedColorU8; use crate::pixmap::SubPixmapMut; -use crate::wide::{f32x8, u16x16, f32x16}; use crate::geom::ScreenIntRect; pub const STAGE_WIDTH: usize = 16; @@ -142,6 +143,16 @@ pub fn fn_ptr_eq(f1: StageFn, f2: StageFn) -> bool { core::ptr::eq(f1 as *const (), f2 as *const ()) } +trait F32x16Ext { + fn normalize(self) -> Self; +} + +impl F32x16Ext for f32x16 { + fn normalize(self) -> Self { + self.simd_max(f32x16::default()).simd_min(f32x16::splat(1.0)) + } +} + #[inline(never)] pub fn start( functions: &[StageFn], @@ -234,10 +245,10 @@ fn uniform_color(p: &mut Pipeline) { } fn seed_shader(p: &mut Pipeline) { - let iota = f32x16( - f32x8::from([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - f32x8::from([8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5]), - ); + let iota = f32x16::from_array([ + 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, + 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, + ]); let x = f32x16::splat(p.dx as f32) + iota; let y = f32x16::splat(p.dy as f32 + 0.5); @@ -285,7 +296,7 @@ pub fn load_dst_u8_tail(p: &mut Pipeline) { pub fn store_u8(p: &mut Pipeline) { let data = p.pixmap.slice16_mask_at_xy(p.dx, p.dy); - let a = p.a.as_slice(); + let a = p.a.as_array(); data[ 0] = a[ 0] as u8; data[ 1] = a[ 1] as u8; @@ -309,7 +320,7 @@ pub fn store_u8(p: &mut Pipeline) { pub fn store_u8_tail(p: &mut Pipeline) { let data = p.pixmap.slice_mask_at_xy(p.dx, p.dy); - let a = p.a.as_slice(); + let a = p.a.as_array(); // This is better than `for i in 0..tail`, because this way the compiler // knows that we have only 16 steps and slices access is guarantee to be valid. @@ -331,7 +342,7 @@ fn load_mask_u8(p: &mut Pipeline) { let mut c = u16x16::default(); for i in 0..p.tail { - c.0[i] = u16::from(p.mask_ctx.data[offset + i]); + c.as_mut_array()[i] = u16::from(p.mask_ctx.data[offset + i]); } p.r = u16x16::splat(0); @@ -347,7 +358,7 @@ fn mask_u8(p: &mut Pipeline) { let mut c = u16x16::default(); for i in 0..p.tail { - c.0[i] = u16::from(p.mask_ctx.data[offset + i]); + c.as_mut_array()[i] = u16::from(p.mask_ctx.data[offset + i]); } if c == u16x16::default() { @@ -365,7 +376,7 @@ fn mask_u8(p: &mut Pipeline) { fn scale_u8(p: &mut Pipeline) { // Load u8xTail and cast it to u16x16. let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail); - let c = u16x16([ + let c = u16x16::from_array([ u16::from(data[0]), u16::from(data[1]), 0, @@ -395,7 +406,7 @@ fn scale_u8(p: &mut Pipeline) { fn lerp_u8(p: &mut Pipeline) { // Load u8xTail and cast it to u16x16. let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail); - let c = u16x16([ + let c = u16x16::from_array([ u16::from(data[0]), u16::from(data[1]), 0, @@ -470,7 +481,7 @@ blend_fn!(screen, |s, d, _, _| s + d - div255(s * d)); blend_fn!(xor, |s, d, sa, da| div255(s * inv(da) + d * inv(sa))); // Wants a type for some reason. -blend_fn!(plus, |s: u16x16, d, _, _| (s + d).min(&u16x16::splat(255))); +blend_fn!(plus, |s: u16x16, d: u16x16, _, _| (s + d).min(u16x16::splat(255))); macro_rules! blend_fn2 { @@ -487,25 +498,30 @@ macro_rules! blend_fn2 { }; } -blend_fn2!(darken, |s: u16x16, d, sa, da| s + d - div255((s * da).max(&(d * sa)))); -blend_fn2!(lighten, |s: u16x16, d, sa, da| s + d - div255((s * da).min(&(d * sa)))); +blend_fn2!(darken, |s: u16x16, d: u16x16, sa: u16x16, da: u16x16| s + d - div255((s * da).max(d * sa))); +blend_fn2!(lighten, |s: u16x16, d: u16x16, sa: u16x16, da: u16x16| s + d - div255((s * da).min(d * sa))); blend_fn2!(exclusion, |s: u16x16, d, _, _| s + d - u16x16::splat(2) * div255(s * d)); -blend_fn2!(difference, |s: u16x16, d, sa, da| - s + d - u16x16::splat(2) * div255((s * da).min(&(d * sa)))); +blend_fn2!(difference, |s: u16x16, d, sa, da: u16x16| + s + d - u16x16::splat(2) * div255((s * da).min(d * sa))); blend_fn2!(hard_light, |s: u16x16, d: u16x16, sa, da| { div255(s * inv(da) + d * inv(sa) - + (s+s).cmp_le(&sa).blend( + + blend((s+s).simd_le(sa).to_int().cast(), u16x16::splat(2) * s * d, sa * da - u16x16::splat(2) * (sa-s)*(da-d) ) ) }); +#[inline] +fn blend(a: u16x16, t: u16x16, e: u16x16) -> u16x16 { + (t & a) | (e & !a) +} + blend_fn2!(overlay, |s: u16x16, d: u16x16, sa, da| { div255(s * inv(da) + d * inv(sa) - + (d+d).cmp_le(&da).blend( + + blend((d+d).simd_le(da).to_int().cast(), u16x16::splat(2) * s * d, sa * da - u16x16::splat(2) * (sa-s)*(da-d) ) @@ -542,8 +558,8 @@ fn transform(p: &mut Pipeline) { let x = join(&p.r, &p.g); let y = join(&p.b, &p.a); - let nx = mad(x, f32x16::splat(ts.sx), mad(y, f32x16::splat(ts.kx), f32x16::splat(ts.tx))); - let ny = mad(x, f32x16::splat(ts.ky), mad(y, f32x16::splat(ts.sy), f32x16::splat(ts.ty))); + let nx = x.mul_add(f32x16::splat(ts.sx), y.mul_add(f32x16::splat(ts.kx), f32x16::splat(ts.tx))); + let ny = x.mul_add(f32x16::splat(ts.ky), y.mul_add(f32x16::splat(ts.sy), f32x16::splat(ts.ty))); split(&nx, &mut p.r, &mut p.g); split(&ny, &mut p.b, &mut p.a); @@ -588,24 +604,24 @@ fn gradient(p: &mut Pipeline) { let mut idx = u16x16::splat(0); for i in 1..ctx.len { let tt = ctx.t_values[i].get(); - let t0: [f32; 8] = t.0.into(); - let t1: [f32; 8] = t.1.into(); - idx.0[ 0] += (t0[0] >= tt) as u16; - idx.0[ 1] += (t0[1] >= tt) as u16; - idx.0[ 2] += (t0[2] >= tt) as u16; - idx.0[ 3] += (t0[3] >= tt) as u16; - idx.0[ 4] += (t0[4] >= tt) as u16; - idx.0[ 5] += (t0[5] >= tt) as u16; - idx.0[ 6] += (t0[6] >= tt) as u16; - idx.0[ 7] += (t0[7] >= tt) as u16; - idx.0[ 8] += (t1[0] >= tt) as u16; - idx.0[ 9] += (t1[1] >= tt) as u16; - idx.0[10] += (t1[2] >= tt) as u16; - idx.0[11] += (t1[3] >= tt) as u16; - idx.0[12] += (t1[4] >= tt) as u16; - idx.0[13] += (t1[5] >= tt) as u16; - idx.0[14] += (t1[6] >= tt) as u16; - idx.0[15] += (t1[7] >= tt) as u16; + let t = t.as_array(); + let idx = idx.as_mut_array(); + idx[ 0] += (t[ 0] >= tt) as u16; + idx[ 1] += (t[ 1] >= tt) as u16; + idx[ 2] += (t[ 2] >= tt) as u16; + idx[ 3] += (t[ 3] >= tt) as u16; + idx[ 4] += (t[ 4] >= tt) as u16; + idx[ 5] += (t[ 5] >= tt) as u16; + idx[ 6] += (t[ 6] >= tt) as u16; + idx[ 7] += (t[ 7] >= tt) as u16; + idx[ 8] += (t[ 8] >= tt) as u16; + idx[ 9] += (t[ 9] >= tt) as u16; + idx[10] += (t[10] >= tt) as u16; + idx[11] += (t[11] >= tt) as u16; + idx[12] += (t[12] >= tt) as u16; + idx[13] += (t[13] >= tt) as u16; + idx[14] += (t[14] >= tt) as u16; + idx[15] += (t[15] >= tt) as u16; } gradient_lookup(ctx, &idx, t, &mut p.r, &mut p.g, &mut p.b, &mut p.a); @@ -617,10 +633,10 @@ fn evenly_spaced_2_stop_gradient(p: &mut Pipeline) { let t = join(&p.r, &p.g); round_f32_to_u16( - mad(t, f32x16::splat(ctx.factor.r), f32x16::splat(ctx.bias.r)), - mad(t, f32x16::splat(ctx.factor.g), f32x16::splat(ctx.bias.g)), - mad(t, f32x16::splat(ctx.factor.b), f32x16::splat(ctx.bias.b)), - mad(t, f32x16::splat(ctx.factor.a), f32x16::splat(ctx.bias.a)), + t.mul_add(f32x16::splat(ctx.factor.r), f32x16::splat(ctx.bias.r)), + t.mul_add(f32x16::splat(ctx.factor.g), f32x16::splat(ctx.bias.g)), + t.mul_add(f32x16::splat(ctx.factor.b), f32x16::splat(ctx.bias.b)), + t.mul_add(f32x16::splat(ctx.factor.a), f32x16::splat(ctx.bias.a)), &mut p.r, &mut p.g, &mut p.b, &mut p.a, ); @@ -643,32 +659,29 @@ fn gradient_lookup( ctx: &super::GradientCtx, idx: &u16x16, t: f32x16, r: &mut u16x16, g: &mut u16x16, b: &mut u16x16, a: &mut u16x16, ) { + let idx = idx.as_array(); macro_rules! gather { ($d:expr, $c:ident) => { // Surprisingly, but bound checking doesn't affect the performance. // And since `idx` can contain any number, we should leave it in place. - f32x16( - f32x8::from([ - $d[idx.0[ 0] as usize].$c, - $d[idx.0[ 1] as usize].$c, - $d[idx.0[ 2] as usize].$c, - $d[idx.0[ 3] as usize].$c, - $d[idx.0[ 4] as usize].$c, - $d[idx.0[ 5] as usize].$c, - $d[idx.0[ 6] as usize].$c, - $d[idx.0[ 7] as usize].$c, - ]), - f32x8::from([ - $d[idx.0[ 8] as usize].$c, - $d[idx.0[ 9] as usize].$c, - $d[idx.0[10] as usize].$c, - $d[idx.0[11] as usize].$c, - $d[idx.0[12] as usize].$c, - $d[idx.0[13] as usize].$c, - $d[idx.0[14] as usize].$c, - $d[idx.0[15] as usize].$c, - ]), - ) + f32x16::from_array([ + $d[idx[ 0] as usize].$c, + $d[idx[ 1] as usize].$c, + $d[idx[ 2] as usize].$c, + $d[idx[ 3] as usize].$c, + $d[idx[ 4] as usize].$c, + $d[idx[ 5] as usize].$c, + $d[idx[ 6] as usize].$c, + $d[idx[ 7] as usize].$c, + $d[idx[ 8] as usize].$c, + $d[idx[ 9] as usize].$c, + $d[idx[10] as usize].$c, + $d[idx[11] as usize].$c, + $d[idx[12] as usize].$c, + $d[idx[13] as usize].$c, + $d[idx[14] as usize].$c, + $d[idx[15] as usize].$c, + ]) }; } @@ -683,10 +696,10 @@ fn gradient_lookup( let ba = gather!(&ctx.biases, a); round_f32_to_u16( - mad(t, fr, br), - mad(t, fg, bg), - mad(t, fb, bb), - mad(t, fa, ba), + t.mul_add(fr, br), + t.mul_add(fg, bg), + t.mul_add(fb, bb), + t.mul_add(fa, ba), r, g, b, a, ); } @@ -704,10 +717,42 @@ fn round_f32_to_u16( let bf = bf.normalize() * f32x16::splat(255.0) + f32x16::splat(0.5); let af = af * f32x16::splat(255.0) + f32x16::splat(0.5); - rf.save_to_u16x16(r); - gf.save_to_u16x16(g); - bf.save_to_u16x16(b); - af.save_to_u16x16(a); + save_to_u16x16(rf, r); + save_to_u16x16(gf, g); + save_to_u16x16(bf, b); + save_to_u16x16(af, a); +} + +// TODO: optimize +// This method is too heavy and shouldn't be inlined. +fn save_to_u16x16(src: f32x16, dst: &mut u16x16) { + // Do not use to_i32x8, because it involves rounding, + // and Skia cast's without it. + + // let n0: [f32; 8] = self.0.into(); + // let n1: [f32; 8] = self.1.into(); + let n = src.as_array(); + let dst = dst.as_mut_array(); + + dst[0] = n[0] as u16; + dst[1] = n[1] as u16; + dst[2] = n[2] as u16; + dst[3] = n[3] as u16; + + dst[4] = n[4] as u16; + dst[5] = n[5] as u16; + dst[6] = n[6] as u16; + dst[7] = n[7] as u16; + + dst[8] = n[8] as u16; + dst[9] = n[9] as u16; + dst[10] = n[10] as u16; + dst[11] = n[11] as u16; + + dst[12] = n[12] as u16; + dst[13] = n[13] as u16; + dst[14] = n[14] as u16; + dst[15] = n[15] as u16; } pub fn just_return(_: &mut Pipeline) { @@ -723,28 +768,28 @@ fn load_8888( data: &[PremultipliedColorU8; STAGE_WIDTH], r: &mut u16x16, g: &mut u16x16, b: &mut u16x16, a: &mut u16x16, ) { - *r = u16x16([ + *r = u16x16::from_array([ data[ 0].red() as u16, data[ 1].red() as u16, data[ 2].red() as u16, data[ 3].red() as u16, data[ 4].red() as u16, data[ 5].red() as u16, data[ 6].red() as u16, data[ 7].red() as u16, data[ 8].red() as u16, data[ 9].red() as u16, data[10].red() as u16, data[11].red() as u16, data[12].red() as u16, data[13].red() as u16, data[14].red() as u16, data[15].red() as u16, ]); - *g = u16x16([ + *g = u16x16::from_array([ data[ 0].green() as u16, data[ 1].green() as u16, data[ 2].green() as u16, data[ 3].green() as u16, data[ 4].green() as u16, data[ 5].green() as u16, data[ 6].green() as u16, data[ 7].green() as u16, data[ 8].green() as u16, data[ 9].green() as u16, data[10].green() as u16, data[11].green() as u16, data[12].green() as u16, data[13].green() as u16, data[14].green() as u16, data[15].green() as u16, ]); - *b = u16x16([ + *b = u16x16::from_array([ data[ 0].blue() as u16, data[ 1].blue() as u16, data[ 2].blue() as u16, data[ 3].blue() as u16, data[ 4].blue() as u16, data[ 5].blue() as u16, data[ 6].blue() as u16, data[ 7].blue() as u16, data[ 8].blue() as u16, data[ 9].blue() as u16, data[10].blue() as u16, data[11].blue() as u16, data[12].blue() as u16, data[13].blue() as u16, data[14].blue() as u16, data[15].blue() as u16, ]); - *a = u16x16([ + *a = u16x16::from_array([ data[ 0].alpha() as u16, data[ 1].alpha() as u16, data[ 2].alpha() as u16, data[ 3].alpha() as u16, data[ 4].alpha() as u16, data[ 5].alpha() as u16, data[ 6].alpha() as u16, data[ 7].alpha() as u16, data[ 8].alpha() as u16, data[ 9].alpha() as u16, data[10].alpha() as u16, data[11].alpha() as u16, @@ -769,10 +814,10 @@ fn store_8888( r: &u16x16, g: &u16x16, b: &u16x16, a: &u16x16, data: &mut [PremultipliedColorU8; STAGE_WIDTH], ) { - let r = r.as_slice(); - let g = g.as_slice(); - let b = b.as_slice(); - let a = a.as_slice(); + let r = r.as_array(); + let g = g.as_array(); + let b = b.as_array(); + let a = a.as_array(); data[ 0] = PremultipliedColorU8::from_rgba_unchecked(r[ 0] as u8, g[ 0] as u8, b[ 0] as u8, a[ 0] as u8); data[ 1] = PremultipliedColorU8::from_rgba_unchecked(r[ 1] as u8, g[ 1] as u8, b[ 1] as u8, a[ 1] as u8); @@ -797,10 +842,10 @@ fn store_8888_tail( r: &u16x16, g: &u16x16, b: &u16x16, a: &u16x16, tail: usize, data: &mut [PremultipliedColorU8], ) { - let r = r.as_slice(); - let g = g.as_slice(); - let b = b.as_slice(); - let a = a.as_slice(); + let r = r.as_array(); + let g = g.as_array(); + let b = b.as_array(); + let a = a.as_array(); // This is better than `for i in 0..tail`, because this way the compiler // knows that we have only 16 steps and slices access is guarantee to be valid. @@ -818,7 +863,7 @@ fn store_8888_tail( #[inline(always)] fn load_8(data: &[u8; STAGE_WIDTH], a: &mut u16x16) { - *a = u16x16([ + *a = u16x16::from_array([ data[ 0] as u16, data[ 1] as u16, data[ 2] as u16, data[ 3] as u16, data[ 4] as u16, data[ 5] as u16, data[ 6] as u16, data[ 7] as u16, data[ 8] as u16, data[ 9] as u16, data[10] as u16, data[11] as u16, @@ -830,7 +875,7 @@ fn load_8(data: &[u8; STAGE_WIDTH], a: &mut u16x16) { fn div255(v: u16x16) -> u16x16 { // Skia uses `vrshrq_n_u16(vrsraq_n_u16(v, v, 8), 8)` here when NEON is available, // but it doesn't affect performance much and breaks reproducible result. Ignore it. - // NOTE: the compiler does not replace the devision with a shift. + // NOTE: the compiler does not replace the division with a shift. (v + u16x16::splat(255)) >> u16x16::splat(8) // / u16x16::splat(256) } @@ -852,9 +897,9 @@ fn lerp(from: u16x16, to: u16x16, t: u16x16) -> u16x16 { #[inline(always)] fn split(v: &f32x16, lo: &mut u16x16, hi: &mut u16x16) { // We're splitting f32x16 (512bit) into two u16x16 (256 bit). - let data: [u8; 64] = bytemuck::cast(*v); - let d0: &mut [u8; 32] = bytemuck::cast_mut(&mut lo.0); - let d1: &mut [u8; 32] = bytemuck::cast_mut(&mut hi.0); + let data: [u8; 64] = bytemuck::cast(*v.as_array()); + let d0: &mut [u8; 32] = bytemuck::cast_mut(lo.as_mut_array()); + let d1: &mut [u8; 32] = bytemuck::cast_mut(hi.as_mut_array()); d0.copy_from_slice(&data[0..32]); d1.copy_from_slice(&data[32..64]); @@ -864,20 +909,14 @@ fn split(v: &f32x16, lo: &mut u16x16, hi: &mut u16x16) { fn join(lo: &u16x16, hi: &u16x16) -> f32x16 { // We're joining two u16x16 (256 bit) into f32x16 (512bit). - let d0: [u8; 32] = bytemuck::cast(lo.0); - let d1: [u8; 32] = bytemuck::cast(hi.0); + let d0: [u8; 32] = bytemuck::cast(*lo.as_array()); + let d1: [u8; 32] = bytemuck::cast(*hi.as_array()); let mut v = f32x16::default(); - let data: &mut [u8; 64] = bytemuck::cast_mut(&mut v); + let data: &mut [u8; 64] = bytemuck::cast_mut(v.as_mut_array()); data[0..32].copy_from_slice(&d0); data[32..64].copy_from_slice(&d1); v } - -#[inline(always)] -fn mad(f: f32x16, m: f32x16, a: f32x16) -> f32x16 { - // NEON vmlaq_f32 doesn't seem to affect performance in any way. Ignore it. - f * m + a -} diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs index ee2b252..d7c9318 100644 --- a/src/pipeline/mod.rs +++ b/src/pipeline/mod.rs @@ -45,6 +45,7 @@ and should be optimized out in the future. */ use alloc::vec::Vec; +use core::simd::u32x8; use arrayvec::ArrayVec; @@ -57,7 +58,6 @@ pub use blitter::RasterPipelineBlitter; use crate::geom::ScreenIntRect; use crate::pixmap::SubPixmapMut; -use crate::wide::u32x8; mod blitter; #[rustfmt::skip] mod highp; @@ -137,7 +137,7 @@ pub const STAGES_COUNT: usize = Stage::ApplyVectorMask as usize + 1; impl<'a> PixmapRef<'a> { #[inline(always)] pub(crate) fn gather(&self, index: u32x8) -> [PremultipliedColorU8; highp::STAGE_WIDTH] { - let index: [u32; 8] = bytemuck::cast(index); + let index: &[u32; 8] = index.as_array(); let pixels = self.pixels(); [ pixels[index[0] as usize], diff --git a/src/scan/hairline.rs b/src/scan/hairline.rs index 150078b..6abd3d2 100644 --- a/src/scan/hairline.rs +++ b/src/scan/hairline.rs @@ -5,8 +5,9 @@ // found in the LICENSE file. use core::convert::TryInto; +use core::simd::{f32x2, SimdFloat}; -use tiny_skia_path::{f32x2, PathVerb, SaturateCast, Scalar}; +use tiny_skia_path::{PathVerb, SaturateCast, Scalar}; use crate::{IntRect, LineCap, Path, PathSegment, Point, Rect}; @@ -27,6 +28,48 @@ pub type LineProc = fn(&[Point], Option<&ScreenIntRect>, &mut dyn Blitter); const MAX_CUBIC_SUBDIVIDE_LEVEL: u8 = 9; const MAX_QUAD_SUBDIVIDE_LEVEL: u8 = 5; +trait F32x2Ext { + fn x(self) -> f32; + fn y(self) -> f32; + fn max_component(self) -> f32; +} + +impl F32x2Ext for f32x2 { + fn x(self) -> f32 { + self.as_array()[0] + } + + fn y(self) -> f32 { + self.as_array()[1] + } + + fn max_component(self) -> f32 { + let a = self.x(); + let b = self.y(); + // This is faster than `f32::max`. Unlike std one, we do not care about NaN. + if a < b { + b + } else { + a + } + } +} + +trait PointExt { + fn from_f32x2(r: f32x2) -> Self; + fn to_f32x2(&self) -> f32x2; +} + +impl PointExt for Point { + fn from_f32x2(r: f32x2) -> Self { + Point::from_xy(r.as_array()[0], r.as_array()[1]) + } + + fn to_f32x2(&self) -> f32x2 { + f32x2::from_array([self.x, self.y]) + } +} + pub fn stroke_path( path: &Path, line_cap: LineCap, @@ -429,8 +472,8 @@ fn compute_nocheck_quad_bounds(points: &[Point; 3]) -> Option { let mut max = min; for i in 1..3 { let pair = points[i].to_f32x2(); - min = min.min(pair); - max = max.max(pair); + min = min.simd_min(pair); + max = max.simd_max(pair); } Rect::from_ltrb(min.x(), min.y(), max.x(), max.y()) @@ -564,8 +607,8 @@ fn compute_nocheck_cubic_bounds(points: &[Point; 4]) -> Option { let mut max = min; for i in 1..4 { let pair = points[i].to_f32x2(); - min = min.min(pair); - max = max.max(pair); + min = min.simd_min(pair); + max = max.simd_max(pair); } Rect::from_ltrb(min.x(), min.y(), max.x(), max.y()) @@ -631,7 +674,7 @@ fn compute_cubic_segments(points: &[Point; 4]) -> usize { let p13 = one_third * p3 + two_third * p0; let p23 = one_third * p0 + two_third * p3; - let diff = (p1 - p13).abs().max((p2 - p23).abs()).max_component(); + let diff = (p1 - p13).abs().simd_max((p2 - p23).abs()).max_component(); let mut tol = 1.0 / 8.0; for i in 0..MAX_CUBIC_SUBDIVIDE_LEVEL { diff --git a/src/shaders/linear_gradient.rs b/src/shaders/linear_gradient.rs index 8cf1b41..81b47c3 100644 --- a/src/shaders/linear_gradient.rs +++ b/src/shaders/linear_gradient.rs @@ -110,10 +110,10 @@ fn points_to_unit_ts(start: Point, end: Point) -> Option { } fn average_gradient_color(points: &[GradientStop]) -> Color { - use crate::wide::f32x4; + use core::simd::f32x4; fn load_color(c: Color) -> f32x4 { - f32x4::from([c.red(), c.green(), c.blue(), c.alpha()]) + f32x4::from_array([c.red(), c.green(), c.blue(), c.alpha()]) } fn store_color(c: f32x4) -> Color { diff --git a/src/shaders/radial_gradient.rs b/src/shaders/radial_gradient.rs index 3c7f441..ce1104d 100644 --- a/src/shaders/radial_gradient.rs +++ b/src/shaders/radial_gradient.rs @@ -13,7 +13,6 @@ use crate::{GradientStop, Point, Shader, SpreadMode, Transform}; use super::gradient::{Gradient, DEGENERATE_THRESHOLD}; use crate::pipeline; use crate::pipeline::RasterPipelineBuilder; -use crate::wide::u32x8; #[cfg(all(not(feature = "std"), feature = "no-std-float"))] use tiny_skia_path::NoStdFloat; @@ -142,7 +141,7 @@ impl RadialGradient { }; p.ctx.two_point_conical_gradient = pipeline::TwoPointConicalGradientCtx { - mask: u32x8::default(), + mask: core::simd::u32x8::default(), p0, }; diff --git a/src/wide/f32x16_t.rs b/src/wide/f32x16_t.rs deleted file mode 100644 index 3cd76a1..0000000 --- a/src/wide/f32x16_t.rs +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use super::{f32x8, u16x16}; - -#[derive(Copy, Clone, Debug)] -#[repr(C, align(32))] -pub struct f32x16(pub f32x8, pub f32x8); - -unsafe impl bytemuck::Zeroable for f32x16 {} -unsafe impl bytemuck::Pod for f32x16 {} - -impl Default for f32x16 { - fn default() -> Self { - Self::splat(0.0) - } -} - -impl f32x16 { - pub fn splat(n: f32) -> Self { - Self(f32x8::splat(n), f32x8::splat(n)) - } - - #[inline] - pub fn abs(&self) -> Self { - // Yes, Skia does it in the same way. - let abs = |x| bytemuck::cast::(bytemuck::cast::(x) & 0x7fffffff); - - let n0: [f32; 8] = self.0.into(); - let n1: [f32; 8] = self.1.into(); - Self( - f32x8::from([ - abs(n0[0]), - abs(n0[1]), - abs(n0[2]), - abs(n0[3]), - abs(n0[4]), - abs(n0[5]), - abs(n0[6]), - abs(n0[7]), - ]), - f32x8::from([ - abs(n1[0]), - abs(n1[1]), - abs(n1[2]), - abs(n1[3]), - abs(n1[4]), - abs(n1[5]), - abs(n1[6]), - abs(n1[7]), - ]), - ) - } - - pub fn cmp_gt(self, rhs: &Self) -> Self { - Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1)) - } - - pub fn blend(self, t: Self, f: Self) -> Self { - Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1)) - } - - pub fn normalize(&self) -> Self { - Self(self.0.normalize(), self.1.normalize()) - } - - pub fn floor(&self) -> Self { - // Yes, Skia does it in the same way. - let roundtrip = self.round(); - roundtrip - - roundtrip - .cmp_gt(self) - .blend(f32x16::splat(1.0), f32x16::splat(0.0)) - } - - pub fn sqrt(&self) -> Self { - Self(self.0.sqrt(), self.1.sqrt()) - } - - pub fn round(&self) -> Self { - Self(self.0.round(), self.1.round()) - } - - // This method is too heavy and shouldn't be inlined. - pub fn save_to_u16x16(&self, dst: &mut u16x16) { - // Do not use to_i32x8, because it involves rounding, - // and Skia cast's without it. - - let n0: [f32; 8] = self.0.into(); - let n1: [f32; 8] = self.1.into(); - - dst.0[0] = n0[0] as u16; - dst.0[1] = n0[1] as u16; - dst.0[2] = n0[2] as u16; - dst.0[3] = n0[3] as u16; - - dst.0[4] = n0[4] as u16; - dst.0[5] = n0[5] as u16; - dst.0[6] = n0[6] as u16; - dst.0[7] = n0[7] as u16; - - dst.0[8] = n1[0] as u16; - dst.0[9] = n1[1] as u16; - dst.0[10] = n1[2] as u16; - dst.0[11] = n1[3] as u16; - - dst.0[12] = n1[4] as u16; - dst.0[13] = n1[5] as u16; - dst.0[14] = n1[6] as u16; - dst.0[15] = n1[7] as u16; - } -} - -impl core::ops::Add for f32x16 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - Self(self.0 + rhs.0, self.1 + rhs.1) - } -} - -impl core::ops::Sub for f32x16 { - type Output = Self; - - fn sub(self, rhs: Self) -> Self::Output { - Self(self.0 - rhs.0, self.1 - rhs.1) - } -} - -impl core::ops::Mul for f32x16 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - Self(self.0 * rhs.0, self.1 * rhs.1) - } -} diff --git a/src/wide/f32x4_t.rs b/src/wide/f32x4_t.rs deleted file mode 100644 index 21d5140..0000000 --- a/src/wide/f32x4_t.rs +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -use bytemuck::cast; - -#[cfg(all(not(feature = "std"), feature = "no-std-float"))] -use tiny_skia_path::NoStdFloat; - -use super::i32x4; - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct f32x4(__m128); - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - use core::arch::wasm32::*; - - // repr(transparent) allows for directly passing the v128 on the WASM stack. - #[derive(Clone, Copy, Debug)] - #[repr(transparent)] - pub struct f32x4(v128); - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - use core::arch::aarch64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct f32x4(float32x4_t); - } else { - use super::FasterMinMax; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct f32x4([f32; 4]); - } -} - -unsafe impl bytemuck::Zeroable for f32x4 {} -unsafe impl bytemuck::Pod for f32x4 {} - -impl Default for f32x4 { - fn default() -> Self { - Self::splat(0.0) - } -} - -impl f32x4 { - pub fn splat(n: f32) -> Self { - Self::from([n, n, n, n]) - } - - pub fn floor(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_floor(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vrndmq_f32(self.0) }) - } else { - let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4()); - roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(1.0), f32x4::default()) - } - } - } - - pub fn abs(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_abs(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vabsq_f32(self.0) }) - } else { - let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32)); - self & non_sign_bits - } - } - } - - pub fn max(self, rhs: Self) -> Self { - // These technically don't have the same semantics for NaN and 0, but it - // doesn't seem to matter as Skia does it the same way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_max_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_pmax(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vmaxq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0].faster_max(rhs.0[0]), - self.0[1].faster_max(rhs.0[1]), - self.0[2].faster_max(rhs.0[2]), - self.0[3].faster_max(rhs.0[3]), - ]) - } - } - } - - pub fn min(self, rhs: Self) -> Self { - // These technically don't have the same semantics for NaN and 0, but it - // doesn't seem to matter as Skia does it the same way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_min_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_pmin(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vminq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0].faster_min(rhs.0[0]), - self.0[1].faster_min(rhs.0[1]), - self.0[2].faster_min(rhs.0[2]), - self.0[3].faster_min(rhs.0[3]), - ]) - } - } - } - - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_eq(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vceqq_f32(self.0, rhs.0) })) - } else { - Self([ - if self.0[0] == rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] == rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] == rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] == rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - pub fn cmp_ne(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_ne(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) })) - } else { - Self([ - if self.0[0] != rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] != rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] != rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] != rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - pub fn cmp_ge(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_ge(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) })) - } else { - Self([ - if self.0[0] >= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] >= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] >= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] >= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - pub fn cmp_gt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_gt(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) })) - } else { - Self([ - if self.0[0] > rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] > rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] > rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] > rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - pub fn cmp_le(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmple_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_le(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vcleq_f32(self.0, rhs.0) })) - } else { - Self([ - if self.0[0] <= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] <= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] <= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] <= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - pub fn cmp_lt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_lt(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vcltq_f32(self.0, rhs.0) })) - } else { - Self([ - if self.0[0] < rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[1] < rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[2] < rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 }, - if self.0[3] < rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 }, - ]) - } - } - } - - #[inline] - pub fn blend(self, t: Self, f: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { - Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_bitselect(t.0, f.0, self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) }) - } else { - super::generic_bit_blend(self, t, f) - } - } - } - - pub fn round(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { - Self( - unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) }, - ) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_nearest(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vrndnq_f32(self.0) }) - } else { - use super::u32x4; - - let to_int = f32x4::splat(1.0 / f32::EPSILON); - let u: u32x4 = cast(self); - let e: i32x4 = cast(u.shr::<23>() & u32x4::splat(0xff)); - let mut y: f32x4; - - let no_op_magic = i32x4::splat(0x7f + 23); - let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) | e.cmp_eq(no_op_magic)); - let no_op_val: f32x4 = self; - - let zero_magic = i32x4::splat(0x7f - 1); - let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic)); - let zero_val: f32x4 = self * f32x4::splat(0.0); - - let neg_bit: f32x4 = cast(cast::(u).cmp_lt(i32x4::default())); - let x: f32x4 = neg_bit.blend(-self, self); - y = x + to_int - to_int - x; - y = y.cmp_gt(f32x4::splat(0.5)).blend( - y + x - f32x4::splat(-1.0), - y.cmp_lt(f32x4::splat(-0.5)).blend(y + x + f32x4::splat(1.0), y + x), - ); - y = neg_bit.blend(-y, y); - - no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y)) - } - } - } - - pub fn round_int(self) -> i32x4 { - // These technically don't have the same semantics for NaN and out of - // range values, but it doesn't seem to matter as Skia does it the same - // way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - i32x4(unsafe { _mm_cvtps_epi32(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - i32x4(i32x4_trunc_sat_f32x4(self.round().0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - i32x4(unsafe { vcvtnq_s32_f32(self.0) } ) - } else { - let rounded: [f32; 4] = cast(self.round()); - cast([ - rounded[0] as i32, - rounded[1] as i32, - rounded[2] as i32, - rounded[3] as i32, - ]) - } - } - } - - pub fn trunc_int(self) -> i32x4 { - // These technically don't have the same semantics for NaN and out of - // range values, but it doesn't seem to matter as Skia does it the same - // way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - i32x4(unsafe { _mm_cvttps_epi32(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - i32x4(i32x4_trunc_sat_f32x4(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - i32x4(unsafe { vcvtq_s32_f32(self.0) }) - } else { - cast([ - self.0[0] as i32, - self.0[1] as i32, - self.0[2] as i32, - self.0[3] as i32, - ]) - } - } - } - - pub fn recip_fast(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_rcp_ps(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_div(f32x4_splat(1.0), self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - unsafe { - let a = vrecpeq_f32(self.0); - let a = vmulq_f32(vrecpsq_f32(self.0, a), a); - Self(a) - } - } else { - Self::from([ - 1.0 / self.0[0], - 1.0 / self.0[1], - 1.0 / self.0[2], - 1.0 / self.0[3], - ]) - } - } - } - - pub fn recip_sqrt(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_rsqrt_ps(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_div(f32x4_splat(1.0), f32x4_sqrt(self.0))) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - unsafe { - let a = vrsqrteq_f32(self.0); - let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a); - Self(a) - } - } else { - Self::from([ - 1.0 / self.0[0].sqrt(), - 1.0 / self.0[1].sqrt(), - 1.0 / self.0[2].sqrt(), - 1.0 / self.0[3].sqrt(), - ]) - } - } - } - - pub fn sqrt(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_sqrt_ps(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_sqrt(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vsqrtq_f32(self.0) }) - } else { - Self::from([ - self.0[0].sqrt(), - self.0[1].sqrt(), - self.0[2].sqrt(), - self.0[3].sqrt(), - ]) - } - } - } -} - -impl From<[f32; 4]> for f32x4 { - fn from(v: [f32; 4]) -> Self { - cast(v) - } -} - -impl From for [f32; 4] { - fn from(v: f32x4) -> Self { - cast(v) - } -} - -impl core::ops::Add for f32x4 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_add_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_add(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vaddq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] + rhs.0[0], - self.0[1] + rhs.0[1], - self.0[2] + rhs.0[2], - self.0[3] + rhs.0[3], - ]) - } - } - } -} - -impl core::ops::AddAssign for f32x4 { - fn add_assign(&mut self, rhs: f32x4) { - *self = *self + rhs; - } -} - -impl core::ops::Sub for f32x4 { - type Output = Self; - - fn sub(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_sub_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_sub(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vsubq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] - rhs.0[0], - self.0[1] - rhs.0[1], - self.0[2] - rhs.0[2], - self.0[3] - rhs.0[3], - ]) - } - } - } -} - -impl core::ops::Mul for f32x4 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_mul_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_mul(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vmulq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] * rhs.0[0], - self.0[1] * rhs.0[1], - self.0[2] * rhs.0[2], - self.0[3] * rhs.0[3], - ]) - } - } - } -} - -impl core::ops::MulAssign for f32x4 { - fn mul_assign(&mut self, rhs: f32x4) { - *self = *self * rhs; - } -} - -impl core::ops::Div for f32x4 { - type Output = Self; - - fn div(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_div_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(f32x4_div(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vdivq_f32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] / rhs.0[0], - self.0[1] / rhs.0[1], - self.0[2] / rhs.0[2], - self.0[3] / rhs.0[3], - ]) - } - } - } -} - -impl core::ops::BitAnd for f32x4 { - type Output = Self; - - #[inline(always)] - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_and_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_and(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) })) - } else { - Self([ - f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()), - f32::from_bits(self.0[1].to_bits() & rhs.0[1].to_bits()), - f32::from_bits(self.0[2].to_bits() & rhs.0[2].to_bits()), - f32::from_bits(self.0[3].to_bits() & rhs.0[3].to_bits()), - ]) - } - } - } -} - -impl core::ops::BitOr for f32x4 { - type Output = Self; - - #[inline(always)] - fn bitor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_or_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_or(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) })) - } else { - Self([ - f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()), - f32::from_bits(self.0[1].to_bits() | rhs.0[1].to_bits()), - f32::from_bits(self.0[2].to_bits() | rhs.0[2].to_bits()), - f32::from_bits(self.0[3].to_bits() | rhs.0[3].to_bits()), - ]) - } - } - } -} - -impl core::ops::BitXor for f32x4 { - type Output = Self; - - #[inline(always)] - fn bitxor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_xor_ps(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_xor(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) })) - } else { - Self([ - f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()), - f32::from_bits(self.0[1].to_bits() ^ rhs.0[1].to_bits()), - f32::from_bits(self.0[2].to_bits() ^ rhs.0[2].to_bits()), - f32::from_bits(self.0[3].to_bits() ^ rhs.0[3].to_bits()), - ]) - } - } - } -} - -impl core::ops::Neg for f32x4 { - type Output = Self; - - fn neg(self) -> Self { - Self::default() - self - } -} - -impl core::ops::Not for f32x4 { - type Output = Self; - - fn not(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - unsafe { - let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX)); - Self(_mm_xor_ps(self.0, all_bits)) - } - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_not(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(cast(unsafe { vmvnq_u32(cast(self.0)) })) - } else { - self ^ Self::splat(cast(u32::MAX)) - } - } - } -} - -impl core::cmp::PartialEq for f32x4 { - fn eq(&self, rhs: &Self) -> bool { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == 0b1111 } - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != 0 } - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - u32x4_all_true(f32x4_eq(self.0, rhs.0)) - } else { - self.0 == rhs.0 - } - } - } -} diff --git a/src/wide/f32x8_t.rs b/src/wide/f32x8_t.rs deleted file mode 100644 index 6039334..0000000 --- a/src/wide/f32x8_t.rs +++ /dev/null @@ -1,403 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -use bytemuck::cast; - -use super::{i32x8, u32x8}; - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct f32x8(__m256); - } else { - use super::f32x4; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct f32x8(pub f32x4, pub f32x4); - } -} - -unsafe impl bytemuck::Zeroable for f32x8 {} -unsafe impl bytemuck::Pod for f32x8 {} - -impl Default for f32x8 { - fn default() -> Self { - Self::splat(0.0) - } -} - -impl f32x8 { - pub fn splat(n: f32) -> Self { - cast([n, n, n, n, n, n, n, n]) - } - - pub fn floor(self) -> Self { - let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8()); - roundtrip - - roundtrip - .cmp_gt(self) - .blend(f32x8::splat(1.0), f32x8::default()) - } - - pub fn fract(self) -> Self { - self - self.floor() - } - - pub fn normalize(self) -> Self { - self.max(f32x8::default()).min(f32x8::splat(1.0)) - } - - pub fn to_i32x8_bitcast(self) -> i32x8 { - bytemuck::cast(self) - } - - pub fn to_u32x8_bitcast(self) -> u32x8 { - bytemuck::cast(self) - } - - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) }) - } else { - Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1)) - } - } - } - - pub fn cmp_ne(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_NEQ_OQ) }) - } else { - Self(self.0.cmp_ne(rhs.0), self.1.cmp_ne(rhs.1)) - } - } - } - - pub fn cmp_ge(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GE_OQ) }) - } else { - Self(self.0.cmp_ge(rhs.0), self.1.cmp_ge(rhs.1)) - } - } - } - - pub fn cmp_gt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GT_OQ) }) - } else { - Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1)) - } - } - } - - pub fn cmp_le(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LE_OQ) }) - } else { - Self(self.0.cmp_le(rhs.0), self.1.cmp_le(rhs.1)) - } - } - } - - pub fn cmp_lt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LT_OQ) }) - } else { - Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1)) - } - } - } - - #[inline] - pub fn blend(self, t: Self, f: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_blendv_ps(f.0, t.0, self.0) }) - } else { - Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1)) - } - } - } - - pub fn abs(self) -> Self { - let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32)); - self & non_sign_bits - } - - pub fn max(self, rhs: Self) -> Self { - // These technically don't have the same semantics for NaN and 0, but it - // doesn't seem to matter as Skia does it the same way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_max_ps(self.0, rhs.0) }) - } else { - Self(self.0.max(rhs.0), self.1.max(rhs.1)) - } - } - } - - pub fn min(self, rhs: Self) -> Self { - // These technically don't have the same semantics for NaN and 0, but it - // doesn't seem to matter as Skia does it the same way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_min_ps(self.0, rhs.0) }) - } else { - Self(self.0.min(rhs.0), self.1.min(rhs.1)) - } - } - } - - pub fn is_finite(self) -> Self { - let shifted_exp_mask = u32x8::splat(0xFF000000); - let u: u32x8 = cast(self); - let shift_u = u.shl::<1>(); - let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask); - cast(out) - } - - pub fn round(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) }) - } else { - Self(self.0.round(), self.1.round()) - } - } - } - - pub fn round_int(self) -> i32x8 { - // These technically don't have the same semantics for NaN and out of - // range values, but it doesn't seem to matter as Skia does it the same - // way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - cast(unsafe { _mm256_cvtps_epi32(self.0) }) - } else { - i32x8(self.0.round_int(), self.1.round_int()) - } - } - } - - pub fn trunc_int(self) -> i32x8 { - // These technically don't have the same semantics for NaN and out of - // range values, but it doesn't seem to matter as Skia does it the same - // way. - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - cast(unsafe { _mm256_cvttps_epi32(self.0) }) - } else { - i32x8(self.0.trunc_int(), self.1.trunc_int()) - } - } - } - - pub fn recip_fast(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_rcp_ps(self.0) }) - } else { - Self(self.0.recip_fast(), self.1.recip_fast()) - } - } - } - - pub fn recip_sqrt(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_rsqrt_ps(self.0) }) - } else { - Self(self.0.recip_sqrt(), self.1.recip_sqrt()) - } - } - } - - pub fn sqrt(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_sqrt_ps(self.0) }) - } else { - Self(self.0.sqrt(), self.1.sqrt()) - } - } - } -} - -impl From<[f32; 8]> for f32x8 { - fn from(v: [f32; 8]) -> Self { - cast(v) - } -} - -impl From for [f32; 8] { - fn from(v: f32x8) -> Self { - cast(v) - } -} - -impl core::ops::Add for f32x8 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_add_ps(self.0, rhs.0) }) - } else { - Self(self.0 + rhs.0, self.1 + rhs.1) - } - } - } -} - -impl core::ops::AddAssign for f32x8 { - fn add_assign(&mut self, rhs: f32x8) { - *self = *self + rhs; - } -} - -impl core::ops::Sub for f32x8 { - type Output = Self; - - fn sub(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_sub_ps(self.0, rhs.0) }) - } else { - Self(self.0 - rhs.0, self.1 - rhs.1) - } - } - } -} - -impl core::ops::Mul for f32x8 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_mul_ps(self.0, rhs.0) }) - } else { - Self(self.0 * rhs.0, self.1 * rhs.1) - } - } - } -} - -impl core::ops::MulAssign for f32x8 { - fn mul_assign(&mut self, rhs: f32x8) { - *self = *self * rhs; - } -} - -impl core::ops::Div for f32x8 { - type Output = Self; - - fn div(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_div_ps(self.0, rhs.0) }) - } else { - Self(self.0 / rhs.0, self.1 / rhs.1) - } - } - } -} - -impl core::ops::BitAnd for f32x8 { - type Output = Self; - - #[inline(always)] - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_and_ps(self.0, rhs.0) }) - } else { - Self(self.0 & rhs.0, self.1 & rhs.1) - } - } - } -} - -impl core::ops::BitOr for f32x8 { - type Output = Self; - - #[inline(always)] - fn bitor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_or_ps(self.0, rhs.0) }) - } else { - Self(self.0 | rhs.0, self.1 | rhs.1) - } - } - } -} - -impl core::ops::BitXor for f32x8 { - type Output = Self; - - #[inline(always)] - fn bitxor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - Self(unsafe { _mm256_xor_ps(self.0, rhs.0) }) - } else { - Self(self.0 ^ rhs.0, self.1 ^ rhs.1) - } - } - } -} - -impl core::ops::Neg for f32x8 { - type Output = Self; - - fn neg(self) -> Self { - Self::default() - self - } -} - -impl core::ops::Not for f32x8 { - type Output = Self; - - fn not(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - let all_bits = unsafe { _mm256_set1_ps(f32::from_bits(u32::MAX)) }; - Self(unsafe { _mm256_xor_ps(self.0, all_bits) }) - } else { - Self(!self.0, !self.1) - } - } - } -} - -impl core::cmp::PartialEq for f32x8 { - fn eq(&self, rhs: &Self) -> bool { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx"))] { - let mask = unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) }; - unsafe { _mm256_movemask_ps(mask) == 0b1111_1111 } - } else { - self.0 == rhs.0 && self.1 == rhs.1 - } - } - } -} diff --git a/src/wide/i32x4_t.rs b/src/wide/i32x4_t.rs deleted file mode 100644 index fb77a0f..0000000 --- a/src/wide/i32x4_t.rs +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -use bytemuck::cast; - -use super::f32x4; - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct i32x4(pub __m128i); - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - use core::arch::wasm32::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct i32x4(pub v128); - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - use core::arch::aarch64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct i32x4(pub int32x4_t); - } else { - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct i32x4([i32; 4]); - } -} - -unsafe impl bytemuck::Zeroable for i32x4 {} -unsafe impl bytemuck::Pod for i32x4 {} - -impl Default for i32x4 { - fn default() -> Self { - Self::splat(0) - } -} - -impl i32x4 { - pub fn splat(n: i32) -> Self { - cast([n, n, n, n]) - } - - pub fn blend(self, t: Self, f: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { - Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_bitselect(t.0, f.0, self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vbslq_s32(cast(self.0), t.0, f.0) }) - } else { - super::generic_bit_blend(self, t, f) - } - } - } - - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - cast(Self(cast(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) }))) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(i32x4_eq(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { cast(vceqq_s32(self.0, rhs.0)) }) - } else { - Self([ - if self.0[0] == rhs.0[0] { -1 } else { 0 }, - if self.0[1] == rhs.0[1] { -1 } else { 0 }, - if self.0[2] == rhs.0[2] { -1 } else { 0 }, - if self.0[3] == rhs.0[3] { -1 } else { 0 }, - ]) - } - } - } - - pub fn cmp_gt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - cast(Self(cast(unsafe { _mm_cmpgt_epi32(self.0, rhs.0) }))) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(i32x4_gt(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { cast(vcgtq_s32(self.0, rhs.0)) }) - } else { - Self([ - if self.0[0] > rhs.0[0] { -1 } else { 0 }, - if self.0[1] > rhs.0[1] { -1 } else { 0 }, - if self.0[2] > rhs.0[2] { -1 } else { 0 }, - if self.0[3] > rhs.0[3] { -1 } else { 0 }, - ]) - } - } - } - - pub fn cmp_lt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - cast(Self(cast(unsafe { _mm_cmplt_epi32(self.0, rhs.0) }))) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(i32x4_lt(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { cast(vcltq_s32(self.0, rhs.0)) }) - } else { - Self([ - if self.0[0] < rhs.0[0] { -1 } else { 0 }, - if self.0[1] < rhs.0[1] { -1 } else { 0 }, - if self.0[2] < rhs.0[2] { -1 } else { 0 }, - if self.0[3] < rhs.0[3] { -1 } else { 0 }, - ]) - } - } - } - - pub fn to_f32x4(self) -> f32x4 { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - cast(Self(cast(unsafe { _mm_cvtepi32_ps(self.0) }))) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - cast(Self(f32x4_convert_i32x4(self.0))) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - cast(Self(unsafe { cast(vcvtq_f32_s32(self.0)) })) - } else { - let arr: [i32; 4] = cast(self); - cast([ - arr[0] as f32, - arr[1] as f32, - arr[2] as f32, - arr[3] as f32, - ]) - } - } - } - - pub fn to_f32x4_bitcast(self) -> f32x4 { - bytemuck::cast(self) - } -} - -impl From<[i32; 4]> for i32x4 { - fn from(v: [i32; 4]) -> Self { - cast(v) - } -} - -impl From for [i32; 4] { - fn from(v: i32x4) -> Self { - cast(v) - } -} - -impl core::ops::Add for i32x4 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_add_epi32(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(i32x4_add(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vaddq_s32(self.0, rhs.0) }) - } else { - Self([ - self.0[0].wrapping_add(rhs.0[0]), - self.0[1].wrapping_add(rhs.0[1]), - self.0[2].wrapping_add(rhs.0[2]), - self.0[3].wrapping_add(rhs.0[3]), - ]) - } - } - } -} - -impl core::ops::BitAnd for i32x4 { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_and_si128(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_and(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vandq_s32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] & rhs.0[0], - self.0[1] & rhs.0[1], - self.0[2] & rhs.0[2], - self.0[3] & rhs.0[3], - ]) - } - } - } -} - -impl core::ops::Mul for i32x4 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { - Self(unsafe { _mm_mullo_epi32(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(i32x4_mul(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vmulq_s32(self.0, rhs.0) }) - } else { - // Cast is required, since we have to use scalar multiplication on SSE2. - let a: [i32; 4] = cast(self); - let b: [i32; 4] = cast(rhs); - Self(cast([ - a[0].wrapping_mul(b[0]), - a[1].wrapping_mul(b[1]), - a[2].wrapping_mul(b[2]), - a[3].wrapping_mul(b[3]), - ])) - } - } - } -} - -impl core::ops::BitOr for i32x4 { - type Output = Self; - - #[inline] - fn bitor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_or_si128(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_or(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vorrq_s32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] | rhs.0[0], - self.0[1] | rhs.0[1], - self.0[2] | rhs.0[2], - self.0[3] | rhs.0[3], - ]) - } - } - } -} - -impl core::ops::BitXor for i32x4 { - type Output = Self; - - #[inline] - fn bitxor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_xor_si128(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_xor(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { veorq_s32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] ^ rhs.0[0], - self.0[1] ^ rhs.0[1], - self.0[2] ^ rhs.0[2], - self.0[3] ^ rhs.0[3], - ]) - } - } - } -} diff --git a/src/wide/i32x8_t.rs b/src/wide/i32x8_t.rs deleted file mode 100644 index 52f9729..0000000 --- a/src/wide/i32x8_t.rs +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -use bytemuck::cast; - -use super::{f32x8, u32x8}; - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct i32x8(__m256i); - } else { - use super::i32x4; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct i32x8(pub i32x4, pub i32x4); - } -} - -unsafe impl bytemuck::Zeroable for i32x8 {} -unsafe impl bytemuck::Pod for i32x8 {} - -impl Default for i32x8 { - fn default() -> Self { - Self::splat(0) - } -} - -impl i32x8 { - pub fn splat(n: i32) -> Self { - cast([n, n, n, n, n, n, n, n]) - } - - pub fn blend(self, t: Self, f: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_blendv_epi8(f.0, t.0, self.0) }) - } else { - Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1)) - } - } - } - - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_cmpeq_epi32(self.0, rhs.0) }) - } else { - Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1)) - } - } - } - - pub fn cmp_gt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) }) - } else { - Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1)) - } - } - } - - pub fn cmp_lt(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - // There is no `_mm256_cmpLT_epi32`, therefore we have to use - // `_mm256_cmpGT_epi32` and then invert the result. - let v = unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) }; - let all_bits = unsafe { _mm256_set1_epi16(-1) }; - Self(unsafe { _mm256_xor_si256(v, all_bits) }) - } else { - Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1)) - } - } - } - - pub fn to_f32x8(self) -> f32x8 { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - cast(unsafe { _mm256_cvtepi32_ps(self.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "avx"))] { - cast([self.0.to_f32x4(), self.1.to_f32x4()]) - } else { - f32x8(self.0.to_f32x4(), self.1.to_f32x4()) - } - } - } - - pub fn to_u32x8_bitcast(self) -> u32x8 { - bytemuck::cast(self) - } - - pub fn to_f32x8_bitcast(self) -> f32x8 { - bytemuck::cast(self) - } -} - -impl From<[i32; 8]> for i32x8 { - fn from(v: [i32; 8]) -> Self { - cast(v) - } -} - -impl From for [i32; 8] { - fn from(v: i32x8) -> Self { - cast(v) - } -} - -impl core::ops::Add for i32x8 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_add_epi32(self.0, rhs.0) }) - } else { - Self(self.0 + rhs.0, self.1 + rhs.1) - } - } - } -} - -impl core::ops::BitAnd for i32x8 { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_and_si256(self.0, rhs.0) }) - } else { - Self(self.0 & rhs.0, self.1 & rhs.1) - } - } - } -} - -impl core::ops::Mul for i32x8 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_mullo_epi32(self.0, rhs.0) }) - } else { - Self(self.0 * rhs.0, self.1 * rhs.1) - } - } - } -} - -impl core::ops::BitOr for i32x8 { - type Output = Self; - - #[inline] - fn bitor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_or_si256(self.0, rhs.0) }) - } else { - Self(self.0 | rhs.0, self.1 | rhs.1) - } - } - } -} - -impl core::ops::BitXor for i32x8 { - type Output = Self; - - #[inline] - fn bitxor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_xor_si256(self.0, rhs.0) }) - } else { - Self(self.0 ^ rhs.0, self.1 ^ rhs.1) - } - } - } -} diff --git a/src/wide/mod.rs b/src/wide/mod.rs deleted file mode 100644 index c58c239..0000000 --- a/src/wide/mod.rs +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This module was written from scratch, therefore there is no Google copyright. - -// f32x16, i32x16 and u32x16 are implemented as [Tx8; 2] and not as [T; 16]. -// This way we still can use some SIMD. -// -// We doesn't use #[inline] that much in this module. -// The compiler will inline most of the methods automatically. -// The only exception is U16x16, were we have to force inlining, -// otherwise the performance will be horrible. - -#![allow(non_camel_case_types)] - -mod f32x16_t; -mod f32x4_t; -mod f32x8_t; -mod i32x4_t; -mod i32x8_t; -mod u16x16_t; -mod u32x4_t; -mod u32x8_t; - -pub use f32x16_t::f32x16; -pub use f32x4_t::f32x4; -pub use f32x8_t::f32x8; -pub use i32x4_t::i32x4; -pub use i32x8_t::i32x8; -pub use tiny_skia_path::f32x2; -pub use u16x16_t::u16x16; -pub use u32x4_t::u32x4; -pub use u32x8_t::u32x8; - -#[allow(dead_code)] -#[inline] -pub fn generic_bit_blend(mask: T, y: T, n: T) -> T -where - T: Copy + core::ops::BitXor + core::ops::BitAnd, -{ - n ^ ((n ^ y) & mask) -} - -/// A faster and more forgiving f32 min/max implementation. -/// -/// Unlike std one, we do not care about NaN. -#[allow(dead_code)] -pub trait FasterMinMax { - fn faster_min(self, rhs: f32) -> f32; - fn faster_max(self, rhs: f32) -> f32; -} - -#[allow(dead_code)] -impl FasterMinMax for f32 { - fn faster_min(self, rhs: f32) -> f32 { - if rhs < self { - rhs - } else { - self - } - } - - fn faster_max(self, rhs: f32) -> f32 { - if self < rhs { - rhs - } else { - self - } - } -} diff --git a/src/wide/u16x16_t.rs b/src/wide/u16x16_t.rs deleted file mode 100644 index 5e1a464..0000000 --- a/src/wide/u16x16_t.rs +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// No need to use explicit 256bit AVX2 SIMD. -// `-C target-cpu=native` will autovectorize it better than us. -// Not even sure why explicit instructions are so slow... -// -// On ARM AArch64 we can actually get up to 2x performance boost by using SIMD. -// -// We also have to inline all the methods. They are pretty large, -// but without the inlining the performance is plummeting. - -#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] -use bytemuck::cast; -#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] -use core::arch::aarch64::uint16x8_t; - -#[allow(non_camel_case_types)] -#[derive(Copy, Clone, PartialEq, Default, Debug)] -pub struct u16x16(pub [u16; 16]); - -macro_rules! impl_u16x16_op { - ($a:expr, $op:ident, $b:expr) => { - u16x16([ - $a.0[0].$op($b.0[0]), - $a.0[1].$op($b.0[1]), - $a.0[2].$op($b.0[2]), - $a.0[3].$op($b.0[3]), - $a.0[4].$op($b.0[4]), - $a.0[5].$op($b.0[5]), - $a.0[6].$op($b.0[6]), - $a.0[7].$op($b.0[7]), - $a.0[8].$op($b.0[8]), - $a.0[9].$op($b.0[9]), - $a.0[10].$op($b.0[10]), - $a.0[11].$op($b.0[11]), - $a.0[12].$op($b.0[12]), - $a.0[13].$op($b.0[13]), - $a.0[14].$op($b.0[14]), - $a.0[15].$op($b.0[15]), - ]) - }; -} - -#[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] -macro_rules! impl_aarch64_call { - ($f:ident, $a:expr, $b:expr) => { - let a = $a.split(); - let b = $b.split(); - Self(bytemuck::cast([ - unsafe { core::arch::aarch64::$f(a.0, b.0) }, - unsafe { core::arch::aarch64::$f(a.1, b.1) }, - ])) - }; -} - -impl u16x16 { - #[inline] - pub fn splat(n: u16) -> Self { - Self([n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n]) - } - - #[inline] - pub fn as_slice(&self) -> &[u16; 16] { - &self.0 - } - - #[inline] - pub fn min(&self, rhs: &Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vminq_u16, self, rhs) - } else { - impl_u16x16_op!(self, min, rhs) - } - } - } - - #[inline] - pub fn max(&self, rhs: &Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vmaxq_u16, self, rhs) - } else { - impl_u16x16_op!(self, max, rhs) - } - } - } - - #[inline] - pub fn cmp_le(&self, rhs: &Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vcleq_u16, self, rhs) - } else { - Self([ - if self.0[ 0] <= rhs.0[ 0] { !0 } else { 0 }, - if self.0[ 1] <= rhs.0[ 1] { !0 } else { 0 }, - if self.0[ 2] <= rhs.0[ 2] { !0 } else { 0 }, - if self.0[ 3] <= rhs.0[ 3] { !0 } else { 0 }, - if self.0[ 4] <= rhs.0[ 4] { !0 } else { 0 }, - if self.0[ 5] <= rhs.0[ 5] { !0 } else { 0 }, - if self.0[ 6] <= rhs.0[ 6] { !0 } else { 0 }, - if self.0[ 7] <= rhs.0[ 7] { !0 } else { 0 }, - if self.0[ 8] <= rhs.0[ 8] { !0 } else { 0 }, - if self.0[ 9] <= rhs.0[ 9] { !0 } else { 0 }, - if self.0[10] <= rhs.0[10] { !0 } else { 0 }, - if self.0[11] <= rhs.0[11] { !0 } else { 0 }, - if self.0[12] <= rhs.0[12] { !0 } else { 0 }, - if self.0[13] <= rhs.0[13] { !0 } else { 0 }, - if self.0[14] <= rhs.0[14] { !0 } else { 0 }, - if self.0[15] <= rhs.0[15] { !0 } else { 0 }, - ]) - } - } - } - - #[inline] - pub fn blend(self, t: Self, e: Self) -> Self { - (t & self) | (e & !self) - } - - #[inline] - #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] - pub fn split(self) -> (uint16x8_t, uint16x8_t) { - let pair: [uint16x8_t; 2] = cast(self.0); - (pair[0], pair[1]) - } -} - -impl core::ops::Add for u16x16 { - type Output = Self; - - #[inline] - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vaddq_u16, self, rhs) - } else { - impl_u16x16_op!(self, add, rhs) - } - } - } -} - -impl core::ops::Sub for u16x16 { - type Output = Self; - - #[inline] - fn sub(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vsubq_u16, self, rhs) - } else { - impl_u16x16_op!(self, sub, rhs) - } - } - } -} - -impl core::ops::Mul for u16x16 { - type Output = Self; - - #[inline] - fn mul(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vmulq_u16, self, rhs) - } else { - impl_u16x16_op!(self, mul, rhs) - } - } - } -} - -impl core::ops::Div for u16x16 { - type Output = Self; - - #[inline] - fn div(self, rhs: Self) -> Self::Output { - impl_u16x16_op!(self, div, rhs) - } -} - -impl core::ops::BitAnd for u16x16 { - type Output = Self; - - #[inline] - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vandq_u16, self, rhs) - } else { - impl_u16x16_op!(self, bitand, rhs) - } - } - } -} - -impl core::ops::BitOr for u16x16 { - type Output = Self; - - #[inline] - fn bitor(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - impl_aarch64_call!(vorrq_u16, self, rhs) - } else { - impl_u16x16_op!(self, bitor, rhs) - } - } - } -} - -impl core::ops::Not for u16x16 { - type Output = Self; - - #[inline] - fn not(self) -> Self::Output { - u16x16([ - !self.0[0], - !self.0[1], - !self.0[2], - !self.0[3], - !self.0[4], - !self.0[5], - !self.0[6], - !self.0[7], - !self.0[8], - !self.0[9], - !self.0[10], - !self.0[11], - !self.0[12], - !self.0[13], - !self.0[14], - !self.0[15], - ]) - } -} - -impl core::ops::Shr for u16x16 { - type Output = Self; - - #[inline] - fn shr(self, rhs: Self) -> Self::Output { - impl_u16x16_op!(self, shr, rhs) - } -} diff --git a/src/wide/u32x4_t.rs b/src/wide/u32x4_t.rs deleted file mode 100644 index 27d78cb..0000000 --- a/src/wide/u32x4_t.rs +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - // unused when AVX is available - #[cfg(not(target_feature = "avx2"))] - use bytemuck::cast; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct u32x4(__m128i); - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - use core::arch::wasm32::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct u32x4(v128); - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - use core::arch::aarch64::*; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct u32x4(uint32x4_t); - } else { - #[derive(Clone, Copy, Debug)] - #[repr(C, align(16))] - pub struct u32x4([u32; 4]); - } -} - -unsafe impl bytemuck::Zeroable for u32x4 {} -unsafe impl bytemuck::Pod for u32x4 {} - -impl Default for u32x4 { - fn default() -> Self { - Self::splat(0) - } -} - -impl u32x4 { - pub fn splat(n: u32) -> Self { - bytemuck::cast([n, n, n, n]) - } - - // unused when AVX is available - #[cfg(not(target_feature = "avx2"))] - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(u32x4_eq(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vceqq_u32(self.0, rhs.0) }) - } else { - Self([ - if self.0[0] == rhs.0[0] { u32::MAX } else { 0 }, - if self.0[1] == rhs.0[1] { u32::MAX } else { 0 }, - if self.0[2] == rhs.0[2] { u32::MAX } else { 0 }, - if self.0[3] == rhs.0[3] { u32::MAX } else { 0 }, - ]) - } - } - } - - // unused when AVX is available - #[cfg(not(target_feature = "avx2"))] - pub fn shl(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - let shift = cast([RHS as u64, 0]); - Self(unsafe { _mm_sll_epi32(self.0, shift) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(u32x4_shl(self.0, RHS as _)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vshlq_n_u32::(self.0) }) - } else { - let u = RHS as u64; - Self([ - self.0[0] << u, - self.0[1] << u, - self.0[2] << u, - self.0[3] << u, - ]) - } - } - } - - // unused when AVX is available - #[cfg(not(target_feature = "avx2"))] - pub fn shr(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - let shift: __m128i = cast([RHS as u64, 0]); - Self(unsafe { _mm_srl_epi32(self.0, shift) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(u32x4_shr(self.0, RHS as _)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vshrq_n_u32::(self.0) }) - } else { - let u = RHS as u64; - Self([ - self.0[0] >> u, - self.0[1] >> u, - self.0[2] >> u, - self.0[3] >> u, - ]) - } - } - } -} - -impl core::ops::Not for u32x4 { - type Output = Self; - - fn not(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - let all_bits = unsafe { _mm_set1_epi32(-1) }; - Self(unsafe { _mm_xor_si128(self.0, all_bits) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_not(self.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vmvnq_u32(self.0) }) - } else { - Self([ - !self.0[0], - !self.0[1], - !self.0[2], - !self.0[3], - ]) - } - } - } -} - -impl core::ops::Add for u32x4 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_add_epi32(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(u32x4_add(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vaddq_u32(self.0, rhs.0) }) - } else { - Self([ - self.0[0].wrapping_add(rhs.0[0]), - self.0[1].wrapping_add(rhs.0[1]), - self.0[2].wrapping_add(rhs.0[2]), - self.0[3].wrapping_add(rhs.0[3]), - ]) - } - } - } -} - -impl core::ops::BitAnd for u32x4 { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - Self(unsafe { _mm_and_si128(self.0, rhs.0) }) - } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { - Self(v128_and(self.0, rhs.0)) - } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { - Self(unsafe { vandq_u32(self.0, rhs.0) }) - } else { - Self([ - self.0[0] & rhs.0[0], - self.0[1] & rhs.0[1], - self.0[2] & rhs.0[2], - self.0[3] & rhs.0[3], - ]) - } - } - } -} diff --git a/src/wide/u32x8_t.rs b/src/wide/u32x8_t.rs deleted file mode 100644 index b3791b5..0000000 --- a/src/wide/u32x8_t.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2020 Yevhenii Reizner -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Based on https://github.com/Lokathor/wide (Zlib) - -use super::{f32x8, i32x8}; - -cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - use bytemuck::cast; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct u32x8(__m256i); - } else { - use super::u32x4; - - #[derive(Clone, Copy, Debug)] - #[repr(C, align(32))] - pub struct u32x8(u32x4, u32x4); - } -} - -unsafe impl bytemuck::Zeroable for u32x8 {} -unsafe impl bytemuck::Pod for u32x8 {} - -impl Default for u32x8 { - fn default() -> Self { - Self::splat(0) - } -} - -impl u32x8 { - pub fn splat(n: u32) -> Self { - bytemuck::cast([n, n, n, n, n, n, n, n]) - } - - pub fn to_i32x8_bitcast(self) -> i32x8 { - bytemuck::cast(self) - } - - pub fn to_f32x8_bitcast(self) -> f32x8 { - bytemuck::cast(self) - } - - pub fn cmp_eq(self, rhs: Self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_cmpeq_epi32(self.0, rhs.0) }) - } else { - Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1)) - } - } - } - - pub fn shl(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - let shift: __m128i = cast([RHS as u64, 0]); - Self(unsafe { _mm256_sll_epi32(self.0, shift) }) - } else { - Self(self.0.shl::(), self.1.shl::()) - } - } - } - - pub fn shr(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - let shift: __m128i = cast([RHS as u64, 0]); - Self(unsafe { _mm256_srl_epi32(self.0, shift) }) - } else { - Self(self.0.shr::(), self.1.shr::()) - } - } - } -} - -impl core::ops::Not for u32x8 { - type Output = Self; - - fn not(self) -> Self { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - let all_bits = unsafe { _mm256_set1_epi16(-1) }; - Self(unsafe { _mm256_xor_si256(self.0, all_bits) }) - } else { - Self(!self.0, !self.1) - } - } - } -} - -impl core::ops::Add for u32x8 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_add_epi32(self.0, rhs.0) }) - } else { - Self(self.0 + rhs.0, self.1 + rhs.1) - } - } - } -} - -impl core::ops::BitAnd for u32x8 { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - cfg_if::cfg_if! { - if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - Self(unsafe { _mm256_and_si256(self.0, rhs.0) }) - } else { - Self(self.0 & rhs.0, self.1 & rhs.1) - } - } - } -} diff --git a/tests/images/canvas/draw-pixmap-opacity.png b/tests/images/canvas/draw-pixmap-opacity.png index 8e01b5b7e48849ed7303d181e13560cc367844e5..81572a3457c05f9b04d34d44c04bfd94f1d325a5 100644 GIT binary patch literal 8055 zcmds6Ygkj)wgoBT!9$^~G)Jrkv_7e(tyn}M0ToeEuu>Y8nACa{p9jz)Z#G2{l`2>t z)JGGFwiHr>N)Z$!Eh1P^qXHsmjerOO0s#Yg?tSOnJ1F>_zxV#Qd|w)hYp*pQV~#oI z+WVIK`Pf)HSeu!d*(_ePURTHf;DZ5Pl-?r~l^v#?LpnHJ^{CY}l}H;ey>oCy!ezmTurjghxd9EAYsr znT2gRQlVX0Ab*WI(NT7}Er<11SWO`RisN1>{GX+lAiO4!Gomp^DvS2Abo6MolOSR5a!D^aX8e z;~uHqT94+Hk0sQ{*#;khntG=(r^ScUe@WTXDE_5l+V5je6U~Z}Cs+T|l0${ShN-n= zN7sq2qtBgwS2ZWtOk^d0E24@x1%A6m@rPQBZQ=Ro9;rcVVu3e!&#{;W5aLD8!PQRP-vSSwi z#)@&9v#k05T9#j})K)1p30XU8D673vaYbcZU%@HAdSB;DqXjenoiOaHgEu}Rn*Dj| zs|r{W4oj+niJiqC)Jl%jJD<6V&1#cyXy8;koQ7}zV@X8H8C0uULomQ=6e5j zZrc67(oTnYH`MJW`syR6JLDMB>-7-4h%YW%xvWYYP% z^1LwJC+5UB-_{=-qY_|c=#tWi13t-eV}XeLM>1Gv1J-qa4dFB8@R!D)zOoR@_e*wkWJMM@gpm>oba8E##`^UQF+82;5gJ{IzHNX3vQ` z(@z=d2u(-C2N05BxY^N?Cl6&~lb>G;*yuWucOj+hJUHncZ$ zTY~(}bH#2y?oW&4q7SEB66Kmo1$jpf5qW_=@HHJFu^THRKj{v)9+P37)KZgB+WYB= z%QEtwB(~n_QDq=?(n%J54187QB&)+}3(BQ8>i^@}Q0JidGSbgxieT+xm)lbt`D9}r zuXV4KESM)ydq!VU7>}{~T|B=0Y1=s1m9?zNdcC`G<)hxYa$}vuBdms62vBK;awvKh zvp#ynwph0_-t{vKm0%@o>bcNRCyIM!McMF;A-fCPRtfcayrRyqTa^!0i=6ePa*3)s z+{iKhl@<5L6@gj~DGQFu=V%YUsQL5d%Ia}1GP&DW;s6V4&YY~~cUfP*FDF?NgI79) z<~<;(2VkG}zNOoh6~}s(&kc;c3yCw#$}lUBFf<1f4>*A2NoZa(S0Cgz`NhviAN^A{ zdOM8*u&=?v?)&{W*yO6m>PW6W@353~^%R6(djt0A!$)PPwpwl!d>$!;C>^?Yxt{0_ zmKcBXhibEd&{z7#)w}+^a;|*d>dEsDmfO>z&zqIeT{rBK!%qa^@zj8H| z+!)oxw=m{|qSUoKP!J^Ps@=&zrejLE~;z z3#Y#D-d&wgYHu^8_$%fkIP~TB(FRf6V=L+%zR`bAVcUiOQW;`?Q=K*6CBw{SihFLk z{mjcE^00zTmImpm=w^of7tBlKmEP- zNiEfx!h1O$&VfW;c(Z2YUQPQ;<-eI9Dv6jpPVWoo$eju3aM>%J5i^j`2fxLuLUUAG zlUOqIBxlILT^C2k3));#*URltfH@9mPW|NgioWgK;%SF!M+S<0VOzreboMdbTtYXR z-TjSBCxMoA4{L%Lcasf)||^2 z40w3b%L3b|j%>N+d_-(Tc_8(-9UnP6NPUjmskBKx=%SQ*Yx|woZBNYwkyaV*2r{Sy9@UZ z?igwBZz@8B2x@SW&3v7xB^^i?JKkVEaB4^Jf0Pbg$Dg5I8;?)Pq#bZ2hzY;c3M zUFp_V(egyT>aOrCnua(KqJH|3j`$X1r!yAOOb7U^ue0#J;78%o-*u74z*-2)df}A)LclO zsTMljx|R4AMiNN{AThEQ=nX#by(m4@V4N=wZA&FODjuH?t0C;m0*kA&;;Ihh_d1^m z|MV$@SGxuJ2-GHP8DwT9949%vu$x6_>)5&ifm&p#q8`mPw5A@L%1d>6FAyK^QjN&t(3;g~uE8I*J@jY`?_ z@~YHp9`fFY(7WE_8-wQ=c>P2A7@baxc(DV26vPv1NmBr(CVZ#Klk0BUl4Ib!-Ebbw z%U0Xe+gBOv&LnNcCWD~>P(z(3vDRQ9hA^-MBk>9r8`LSu0YDpuT7VGqT;ykO5HZi9 zk-Eg!mvuc5-cIIE?1}#A#>l&LV(0`B-whx2hvuN%&|#}z4^}0BRYMvdHrrwongM<8 z1oIKV9`?|Ky_;>RkGi7WAryZFQitp)NgYl40e?t?{j$b?NWKSQK+XqlhDYQd{T!Z8 zJyCG=^CeK6uwHScG?-|bA5@<5ZEnB=6WAa>)P>=r$Z&`sx!z{Vvk^xnq$yFUi5jBg zmA*Tx%DHfH43Y(%UcQTLia`&@0VP1|FU=c0tbJ+>tDF~kqc-m82`M?jktC`r<2nn9 z3$vb|x8Bx^;cIshpT+NoU~8tY^I{J&m)o_Af-Vco*)cB^q)aP3K5pI^(c7C2T^tFu zkkpb3Qffc+9-5Bf7hv)I!)ReL#gi_Vq?YQ!uNU@L5>i^J; zK?k^qWd_7PP_zK@@o`;#?tSU;`aDr^V-De5M)7Hu4fpcPJ@T`AwVypQB{c_voy$l# z1b74UNf`dtg3qWL5%DV56-L++p080S{ga%Ouo|9?@uE8@; zY|fVVW=T}${w+DgNH`Wyq1rAWR&35qd$z%=N5GT=6%Qw8mG@3_KkPOw&v2AythVJT z0;$#LXIoXt^YVJF{k=J(x6^V#96%q^#NZIxG2M#Knmg?~*56UKj(A?c(}z!z>1sU< zJoVc=-Rb7PQs)vooP_nqYo|RjnFd9y`~|Yc^k+aKrYfYiFf%mij*|QId=}YQ@mMu> z4FAfnb`JlFq#@eg;1F>tO(KHNv7&(pOb*b{X22r? ziF(Fk1EMWr3WNZ;8M2B`5uwt=H=7JDIW)x|kfuMV8)-bQ5`zb}!gB$-?6Whugj@KE<@d z0Pa4Mirh~PgVQ17!a|db0^ZCX;5lG{|Z{b8sW%G53Aw@es_=R;<`*G5v!pv^A z6d!_L1{x@wYuac@#|$@Z9@SmfH9NGXcbA5`PPawzFrmkeIrdIC+hFoc463|)`)b6{q+ek zm)gFyqj-SJ{vhF_s+TL1E^1xjktlG&l*9&0cpRXFTP?~A^Lt0$!;%=id<5aVGHQOV zu}(Ql>0C8`V^gKm^m{wuFs4}-Z-iw1iGgGIOn44c3uS^9fyx}(>&$@;qk#_J;4mR% z4U)k^7Aj&G1>=cl3~G79pg>=47WIsMDBbn^{I4GM-f=LAv_meiuLMC9QcPIJ1Yjg^ zUI28MHD$N!1_EKNq7Ix**PjYmlUC)teyVML04-I)1-eCpJ9O)mfjbx-4%mU>%HTFV zz0fWXNLBOH_Hr2&f+-m)d}t+TI+C6xpkEDG1VE?T%E08(O92>8LLegDWW1^Q{6#zR z9LKzZGgcGLQcx2}?)1)n2ZEhxB?Xakeq*$uBzBbTl_8pl!4h@rn%UKTnzvePDC~7*HU9i#)Y;Or5E2Xppq{2~8f##b;UR<>{)j_5 zN;uMC`OSBx@r*Ql3=~uk+?c>M%j>Bhbu$eVY9&5j3A8Rg3v&YvW&1sUMBjLvq_P$F zgtIR_qgY@Ir+>QgUKl5}&&A`|olm@aOlknh0&wbECLi8iwg2CR0Uyo3f z+>w+-orkE$|Cyh(A9& zbUvYM!8p1$cpIhZm5s@c)Bk;v^*OUAa1lo^uvr6dA^leX0^|gV&Ta72Me)$IA%m1P zvO9#cfk7r_5^Ds_W2zLaq4|%4WkOfYnXbQIUEdg#$r|F>&pf^KpwDg5zU%Et(-c1R z9s@1FB=|s}!``%LmJT`*(Uvv1ms9=F%0ZhM_SAmjE^u;Xm;=lF6%e4QW6hy>G6fZw zU>Ha$!CIG^6-0|augOAuo7#r8L*|vJrk{+PG_Uda>q^BE#B2gq&1q1)Qb!;-U`xa> zG6U+Rv-H0mzxny`=3K+`tawcn*#%0v?}9{Y!%lNz!2()*!&2bH}DzCBvZSBa8aKzh9Ux)%}{0*ht4DPX%%8!3OlAE zVyi*9tFXrstWxN&~ zcPB(eSb=OjoUVr}D+1#j0B%9{XflKtK-Dt<5e^wZ$^Q8Qbyv@JiSa;}g234cN)m|_ zJudzV0Exz;>pM2Xp}>)zgPJ@Q4|4lz@god&uYhqGp2rHI=eGNi7wMja3>H$wKnZ}m z@9&9|i9R-0Kh}zjfb-d%P%_}~KvvfQw?PkJYK^%wq3GTBSH+^{s zHxZ1$0&PN^Vt|N2^mF)AgjZOt${sljt|{N;^w%jhq&pV5iK^=W|9ElqI^`fXy_rBl z>J#o(hpW@cr&Zi1LNWsG@3ljVOwIS1`L?_4(jTLE`X-|mM;Ow zBCye6j8Io&mqOnog$JCEs9vrMQ*;^nf{DH|Y6isj;T0ynIA>aJaSx(J0}W+3lxZ?F z7dcKKJ94?byQn?X8auu*RYfONS(s^dpydn5>}z%%m{}6h1lR;C9Q*#@o~=~^5~)Q@ zj1Yj}8o~++9A>0U_{%^||JHhiDG-K)-)1UaJzomWc^Zj00^Dj z25y=N2Wboi5JvsgI7>!&!kuL~Q+~Dp;E^EsX+s$OmH4x0HZcU5P}121hUrr-jWrmlvJUA!fPZZK?s8f0e~Ul0e_rnz6QKQ_C7hS zZ{}$`Yzuc$QILK~|2?c}(Lk_ikM1KFV91uLPK_VZ3G*CfI--dIbIWO(F~LB(hVj}3 zuHB#jhjco{x8TgrOxXz+3?^pK70B_~=H`cP5$ak=|5~D)`T&3|GBG>$9=!fD#TmQc z;I}EgaQl%X6@zXDPLQ;~>kl9T$R2}V<`-HM4g2Brrq6sMx=UJ z1-EAqE~p7$Oc*lgj;x)KsE|vQaW&3t~|H%830s5;# zVg&otx7Vm25~@w?Z(ONxG)U6Qq#zjT6krPO3GUJXop!$H=~D%HHCW#C2Vn#642&#O z;I+)G2>=+src~8$8&#UkBhN!)E+@G3Iuisg(FHt&lO?wBQj}W86ct%*5oMKe_gzbzd$tj(I(DDlPyw$^{tsRYH?YlFACxWs VOngoXFVxKzd;2Yr&HF0xe*m3EmX-hj literal 1585 zcmYjR2~bmK5Do+a5;;7O8jNx&AP`6%hZt0jAW#t^5I{6sDqtkSIB5n1!V3`;FD`Sz zL?YH|0OR?hJW45uqN7MGhc*v%)Q(0m3Q|zH(w9!#=|A)Dzq{Y=w>!Hte=zHIA&9SJguSjjhV2lX#=!9_8#OU<~34)B~W(K1<80-uNK|m_?0GmBP z(1l$a3^oFwl*Bnft&~x@GT@lh2dwfc5cyY}G5A4oKzRdDF7+-F2Y?NJM}&SwLLX4< zFKF-sMSwsG6iX?B28uvP1|@!F_1-1*zMvQ=lTtttxlH0yA~cvK|Cr^2B7m*cX%-AimH5@g+d1 z$WVeWCYMUcr9uM)8W<>iG+snTnfVejPl%@b{Kt=0{F@DB_|%`9f{rDX6BiSYF)}j7 zVogj;aX2$`3%sSJwat=c%U9T~v|mLeIygExJG;BD_wexaB6<7y2W$uoqy~kAgoe=~ zA|j)@QCOJ{Gkc0xw zhc5o#>yXbY>=ecZ72Qpn_eI*GOpLj*qhG~F{M&E9dh&4nh&=wz3xbQg?HpDqiSD}~ z8$WjPdEQ3HrOAhTBzPxg7nfs1>lpwuf zl~L34%5}%vyrjbN>+_lPqgfjrg0AU}XtIlrJ8P7$ulO%{ewPuB`*I_vtDBq5Q4Ez% zRdIQulKSYg*=o>jGSH0xTRWuKyjgdd0H@v;T*}fCnsD}F+b8PszNhjWZMRbq;zfX0 z^i;C6De6aNeqnZ3b5mXY!fxS^gN*}C=MK~%vBxWveV;@*TFn_v;GW&CnV4f09`prl zK86+&(PuOs)}{~1seU$ICYxU_tqfw&3DyhT=?-Xd-UT`J;F{IFP#7D}A-3r9)thiu zjUI!l;HQcmh%wVO4fV@bLtbje_XcN?&sx?lc}vO!D#vh%2tz*hM+;k^tZPx3!o5PB zFcV^$z!8tjlF^hY&-`Z?^CXn!-e|QUfNOnvh-;=j9Mh_z20z8Srm*cfqXI{06brLl zA>tzVwlT{Yl9|D$w>dyEOL%<-G-?L3jtp{Lw2ya1vamV5(J{)s03E`c8Sl|=M zz6Fx*Oel24YBa7cSp2&uU9Pf(3tsjQaTn}IX9v)tZrY8PP8e##N1Ni2ck=d2YatmH z)=s*_A@1@qxi>JR3Q20598$f|Qi#`_AtDZTeS>!O=P6P%kc51bk1b6^_RCLP{I!w_ zvDJ6a_cBK`tl2{On1$=|j-l!J7urJyr4A5rF+8S=W>HNzcKP2mqivk-vFL#j%G_UL3jt`F2zN%vhS!wrb5*fs%n_PcuJG;vHE<(ZBtq+QfOyc%ab5&niRDXf|Vy zFIctcE;X(geir(w++`pt9llk=Wz8B5hi?aB*|GY`iOyPYmzT0bPUE4q8D}#$hx~X} zvpNX=^rlgh`mvot{-y)ORE=L&9r&CI5v#l+*#R;5 zM1(DE(Y+_dA>XczoYyOLgYk&o>r1QZ1m&vc;He=lyO`A;kcfo0^JIHB;tk diff --git a/tests/images/gradients/three-stops-evenly-spaced-lq.png b/tests/images/gradients/three-stops-evenly-spaced-lq.png index 652837949d09a657c1db4f8a08d2c6eb8eec9117..2a2f546756dbae277711dab3429746eeb1e6afcc 100644 GIT binary patch literal 37525 zcmd^I3sh6rn$8mv!ZXPL^-&_m2-9M6TY71SsY$%3p%baqp|m0<)JO4NFxFPU^2kHD zir67gZ>3s=22{qoP)=TJ>fu_)0^+jNjSg>jU(^j*CM-W!0*A^P+O9cYf;8AYK)h zwmvQG^#=4K#e0r)?kivBdptHSp!?JtQbRux{`LlW0fs--!h3R3Q29>K%6jLe)-PvWcy zz=?6fqf!!6z{byFpC)m(5*fuf6?;npjz?xx=7tjRxomB)ABX361UyAgtWU)ig(^hu zzHB^+Q}54F%n3dxq!&{df@W_4yPnxX@Zcy|)GjTKNspdY%-Uc&Wvk!W$Nf{y=4HPg z()1R5)5-rEk39X{Txzk(FSU-c7)SM3ok(J`M6{6Ca;W0dvv6PmAJkDo6e7Y>k(w$F zbm#E!q-bU}PU^u)rY$BBJfanIgT>O|wcsgseNH!yy%<;M%@PqP;+!xsE;s6vjclGD zM{!`PLX;DFkjw;|q&F+^A?%{1JYkV`Iv)vW{Y?JM)w#FVl~_NLe-~?8of8`P_N>>m z%&iA#^NIco@O#`F)uN` zIM{cgi10i;k-$)br>6OFlHFOK5;lJTK35Q_8VOUMQHJXFtHzybi98aPno9$PpK)4N}^hf zkNP*DYb>5$B@fS}zwKS5jVi_g4Gs3&d)j{?sx(q}w^FreT^%W!5T>nM+=wG`tzT(O z{+4sT+&$aPT{6q^{X;&}#C50Fy+PP3%}D^u6sYDb3B;2r7YSQXfj&sX0gX-osKF}2 z1<+Rz7$=ofRH04)O;u5K^-QH=4qaMEVH8uAf~WF$iIKQ7OhFluQiLNg5@`$|;@4PI zaSoLQw1S?vB^gg<2-KuhDb6F09UxY-8wtsgWG|ljpk!cq_KmPXSk??2XIWgl_PNX- zxV1BEFC5SR<=SlXWLt4LlHddy`b8<;ZBZ(X*Rv(4|374BV6l_GnCLO@lYBZr+%A>h zUL2iyv7QnlQ2Q_32F^!!KpdILDhHC|9*Wlee!`ou{ zo!eWBMo`0_okVj(tI2`9M#VO3XVBm|*@+Ry{4dUQ%t?Z?F>NpLZ*A9afnOx|1!r3% zOL+-7Ey;ckS-Cmj!nv%02;>iOwZLM)*b-2r+|y`jDLyn(Nm|r%G^lsfE zR0}*74cwHQDr#PWA#z!W^Bp_0tjn$D!Y13<_z{*VnZ8%L;+qisVqX2Mz(fz0a9Y?_ z1%b30KNk=$wS|lWv^5J*qi?i=Mtwm^ZP$_jHRinotmwL&l*BCp!8Gat0Fq)7cq%ul z7}%dHJXjY2595@12Xd*U>eK7gz-%V@RuX;lcylWiDqutsB6+}MEuwKm^dx-cE|puL zs3lCRP`Z_Mk?11f*)ab!D;O!h>Kmr*H0CK#Ut<5EUs?B*%N&s>k^%V`bS6W}m zo69Oylip$jZ;$|BG2!LEk zc|iSYE{|TZhjSv>y#>GA*3CD?>Hi9;ob7#Qg@;lq9oYJ71wUq=@)`WP>e_ut?z0s; zvEl&R8P6iOdV5fRaYEv9e5fbU? z#s}Uvz&K39Mk8*nd6p-oZ)U(LEK==M$7DagaHQJowDSt;?23KGgeJ;#E2MlBxiGK*Jh&2nvp zW-6?x?Jl9HAbJxIq<(fAG27Y=MGGxK&J)0SxYS=J(0S_mpxcNB8GzzrNP&}j1$g2D zbu);q5HD7ey+p$i3~He0id*I{X4{Ysa%=|qBB!;#h3^1g*IT6X4`<&x9$ZI5A zOm)i~%CKIKymdbWF({ems&ckElIWRv@`3$Cx$=I(+Yae(M(7LS_yyhR;}?~sfa5`= z7PVV`0Eah>;VjTK1?J8u@@IP~5SO^4OdfqN= zz_ClRS_W)t*14Ri2?E10N~OQL@-Vq)vB}Rl(^3DQ;|W<1-{U}hiS-o& z5O8ueIrCbzZ6Ff%NP}FPu|;dE%Bp)_vw?`$-q(4+h%Eu`(f+!W2!TkorG)gt92?jkWN^M0%_S= zB9Kf_-a#OSu~Oaz#Z_!|GtfwUB^q39B%NHN0%-fW7sx`mTeOkZkxKgd)2O5%T{@*P z+13+jL#kmv@pV_pQjZLadp;DA?rb))ZGLO7QU4JXXp$3LF`2O#U~XBrC#VJ{(IfFh zP}+~?UUz7l7l8Er-orq;+Z3yGc&ep25*c~AKCYFGy_fVD*Xgp9{yiVdY-Ynrh!T6e*Ej2laj*?4R& zI-nKsEG1l2`11V%6`-Q`l9J=q<2&BC#iFy0Jn1yo%`uIP5J5IFYHfHYRZdl-p0Gc7 zS(5Z>etH`0!+MY9;eq>ij=oEE??%RNwa>p5H$FT$FxtZdk%-kGUZ%yLuOy;1;KR(O z<X=Tlw^{WE`u@Job_CKvC%#q^ZjKt+5RXI>}r!RX<+V0*D5pQPl7jx9si| zThFVo8~ohUYG#M)yhZmsN5Xg= zLcTS^6&Kv6G>|ZY+DSW-0qiO37O1sB$NGbg&4k!4V2-Qj%h7!0IORVAgg}vBz6970 z1(RxUW7#9c=f zS5j_?%Mk^fE)8%fLizlq)cm!m8p|l0O6Pig^W%_R;RDVSLUD*gA z=inBu111yxBvM^vYBxe@dD;}GhGIGQz zZ*bccC_(>}t>E73zh60TGt~~;4;8mtB(_t|dnwqS5+Q8gGV{Jn{EdHeGV(vpcctF} z0&()ttB^ztv`CEeG4DR)I7{+8h$t*kfNPEEF|Vih@v~*a0mSmGflJ6681+KKiZ^-S zI&H&Esth@0I6s^jIp2Wn;M1{bzBaM-O%J9+&}=Weam8d&Db*8A>aGwR0{(@+w2K+O zTLS^Z*R###wgXtS#KTeyxXp}KOM!mpuy1j+#2PCP!LSz@7HDu~ch7aXrFGbxzGUvI zuzZU}v%d+gfdjFF9T!0@j)J%&#T{*?Hhhgz$pOB@0_j>1cw>QoAc8g>%fBe?0w_lMAZgOx#OiZ=^C0Oa??i4vZv!B;N(?{rv2XbI5SFrl;RS61GbW5cl^r z3?Ag8y(BT|0BFCY;i#ZVm=;`(x9DJP?|(%y;O@wduGoRR%$O5lyB8oh-;sI7;G*^G zk7VB=SrE5ZSwG_rTAiW=mVLclJ*?fE8mxQt0S;{_ivf$93VI>=ZtuvxVsLjrFKaBR z+x8wL+iA#8T$9z&-CG@u-KRc;4AuU=**2qx;~Eh=-afIVLt%=H%+?Ouv-a+Tj-e}# zybm$6p?5=xF~gCKh#f4-9UCA~zS%e1V6<<8drS`&3OXA4Hk9;LJEE&O79`XF8(FrF ztO0|E{bm+md8B`VbTz1FLy5K8;T>=t5iTF$A#UUw4$0%}G!z3jq2SFhiz5ka@u4e zg2^xBS7ut?w99lF2eVs4|NR(N);|KW>t)J`o%Do>40b~@_$JwXB1 zp`W;xBYKYs9eRZUcTI)3tHGu!ur9FQ@DhhW#Iq1Y1Z38ovaFi0foWKXocF1}ZG|1w&pF zGQ|&rom_@@GE`q+@_Ld{oES=%9+IvmiRp^Q@5!esVsIXl;q$Q8`>?80 zh3%@3xqkwNAUfF0)Zc>4%)bZ7f2p87&dgja;|K;Q4s>iVpMU!j?Ma4=VjxI6AwiOv zulgYCA9;{6rZM9*D3YT`PJrn!k3ZuTgV(QvUcbSz$H(&ZBTkIQRQjQU&12^L**%vY zZQ3=KYnP|rPkbKh$mh3Rnuu|T4)G|0AdkW*?{&BA82`|7Om$TQW$XAoCk)2&C!Rm) zkQdH}Ubs(Iui5(N$C@$hV0yLPzE7{#(*c@>3b)4?jO^$|L!l>Y^YEfM_5K_%&+(j) zUQA&Kzz9foJ+p=20mhqByRz1+|$roEtlNdubJ|gLv`(2QM@nUIwY(8+>GrKZ)cQJWCnx)vh0>>%R1*HuDIF| z$otSE7t@f>ollAKVY0f+&FYZQA7MVZL$t>>2|8^<){RJ{X#5k@9kaqE@CxNOA}x!Y z3o11yAOM;#85q!nI1`d_RzO~fH?MzzF~B_c%faJf-MG>!=E+X$L4pcA3MXdvi z9?8TSy!dd|vWzRwFY&?FYL{0+*KtE7$SQsbhJnicLlr&uyoe*6`^uO39*>Pn=vT4! zAMM;1Kzgq!Hm;Vnxg2~)7A{@i6w9St-B-TM-hHp$eb*&-SG_x} zo$R3_yBWx29obz+=C+YJZDdv(nchzJZ>0n^Q+!(~KCKjfE9LtpkKh)M&?b+tiypCB zYK)e8u#p<6r5@5!lj~_o4Yb4tT0#RYy^fw$OHZw%3+m`8_4IrVBUi)7zQ8!CVTd$L zNW+BBGeHgWl!mFOV#%vnXbnqN%{p7jF0W*(D%qu#Y^;i1UBT9zWmi_PD=OHHYEIJ` zPJ^0Lr{-Kxa|~r%T{*W!&225`_Nu&kR9@XGugfa0jxw*265db=Z=i(NSIQe#@^2UO zZz}m?O8yNc|Nd$3dkXJKh4)Vi?+0?9DTU9h+;0{ycG)avfJ%b)qp~)q1G6tsP{9I=S)hUm z7Slk?6I4>bQVLi?Dl8)vmb!teePG#MuzcGA!{_*Q>B$WC-w)K()M#+EgbTPjLOrg5 zpv7GzG~${F&A1jqE3S>uj?)qJI0NAlu7l9o+1W+7jO!-!;I0s^;(7_!aD9Y++yG$^ zH$)gFj1aC9ZhSOKFdB^?EJ{ZKz?92pGeUA^f1lF(tzpO8>;kCCk!e7s5=WXeTt-%b7SfJx# z&SPhd=ZI3^@-sp*Sfx4rmUF{-G1_Py#a`jLr(30c^W2=isi1|vrOY$^^y3df&jNXU z#quq{k=J??%-eBt`npiJr4uC$bUd;a+ShNT``g(baSTrH zeSnoiiM;Uz#kD1jYhn>S@@EJ-H|UIHD4#ftn>W6Q5oNyngK;x&I(GNV7}>QxXjd)vJ+uggq;FUa4s z{vBy&@bbdu*e=8@wrkW1ub18ChUWx$eWz#ZqLodSPPw_?EoYwbG0hAL!wTB+S?eeb z*e%agJLaC10auWHzIj-G!rq6M?zkG{H{)84Z80#(&J?C1syr}+Zl=?haM zf@J_>FIc>6NBWf&;LbdPrCK_NBG^o(`50LgZ_hMXIEN!x3yXK(+2y%aN!I^!Iyl4p uDdoT8^lMj(-?#w0hf%;C@xQMj#Yo$}5&m`FrlkP?763lSpWVicF8T*<(;++n diff --git a/tests/images/gradients/two-stops-linear-pad-lq.png b/tests/images/gradients/two-stops-linear-pad-lq.png index 41e00258a289c772918e8bedb5f507d0ff9ab5d1..04e065948721604c85b7b5c8ebe3417df773bbb2 100644 GIT binary patch literal 32338 zcmd5_3sBSNmjCh)5=0455U(u+Mbj=?sP4#RnLZ!Fr)h6BU-E~{1U}e|h7EGYAXw{~-FUx8Hqe9gRfe1;+YtQ#3n1CSU1#zbB zlt^-N{=e^>-#OoTRBl+mc2>ymLJ&f;($iM`BmDOq{)Z6+|NivFS{Xv(x#_D`{F8`s zx06zxkbWQ`J!0P{hrd|6YSp%F+y40l__GuL(O>k(pMS+~otcS0W!ttjYgX;~;>h6; zb;h>Lov-iQDOBT+%=oC`_9w@)S*jN@HxHDtYybE5Cv)h9KfaLJ#I86F|2Jbr*6TmM zki^h@a=iA96|Bt7um3o+Y{ai*uS(Llbr@Q+wyC0;qgqhzTmedme=-tv?WRkawJiNI z>DefXG+ceDBC#wnLH1@EjUKjD6{Tq_OH^4R5QVz0I{__hu4kemeqnhsiU?w%MGY&_ zq2P#2_!ZMa>6?jk>1ny>RA&gK`l>K1K3NJcCs5e2%8Eo))b8Tw>q>4lN``mrPN-!u zDF;H?D57vR%8gA!hdNW4)!Sw1CuKTuL`-Fc9f&E9!@PQMgT7#K&&v(lV-1y zyRQ)IGzQ+W2p-NJln&3TvSB_-4og58+N1Py%hd^JKm0&dU($a}{NtVeq-Pr|vaR)s zYgLr8#671kE|sFRm0646Gnx3ad-Bk}L@HV!I})Br&rIV6zZ=AeJqM4qeg=G@g@{Uz zR;Q!q%JKkDVhw!h1o+Y!ARc^TEi03*e;x&AG5|UFOH$eRd*z}+ke({wO2Yv^RNdmQ zb{DfLx$uta^sM*@Y65=NSWy@=@IBa9xHSNty@;j0R>L1So_$aqw@N`rWoU92c zT%9XB`+E%B{qT;dm5HlXN)u=d=_+`(V0zFZc!*n%XMyrATWZ(nYF(zaB! z@7F)`Om}QC=mI&L@E?p{;Pq`(blELl-MzLPTWi3pIV)V* zDsfTtoq1M9;JTCwKr{H79cM?&oEETN&>^yRvDF~7{N?iXoVP^bP5pVLU&gHnU^9p9 zpZbr|DjOJ%XY7VU^3!F@9|GB8WQBA0QK*KWooc?FC;26g!U#y0T||7jD4IskEcxx2 z_|ly|;=Bjy6+KmfYn6$P%koSvO8!5rWa9J#(T&&z4t|a|xKYtFV8C3oZm=!Kw3bVJ zfkqs30y8Sj>A7H5%2h?If01_X23~+!8E^--Ah+)*{lf&&a_rk`SwVspgy|o~6!vO6 z6!aaKAL%2p(?JKo&(MQd;7t;ciWY+jNV58mk75tEhPz37R24NpUT^>y&U|oOgp`4Y zI(casun+h_-=pLO*;#PC3#q)W(Gml3!rU@YCD!b%GX7Av%Wf`?mkwUGh6lKgH6fHZ z%f~tTH_R7UNgrjJeyg)A2uwCK3}ei-s;o{eOR$y?0YwWin(Xi&ox3C849JbY3N1qp z*Ef_WxfF-PjX;W@oW?lpJqU*^87R*9agNQ7i_7NU>Wm9eV5^u}YdGgrau-%|WnimV zoA0XQA*|yk11NiOpSMAX{5Zt6ro)}7r8ka=Ee=P0r(V%jr9{yXC$8b9fcdVi7R{bN zi^Z*mzl>u|2$t!L#SlKeM^{4x4uNM2N{bOdjs)VQ*fbnpK#(#UVkC&QAw5GEyapK= zB?6;V4WTA@1cB z7`EucRXEm%{LIC*CAU)=V@VM8KRnZM8zVry9r#jyB$u-V@iZG1*BpL&pe@JpcHksg z9xxKxqkAYU?KSz(th!c3Xy5!d?y9Y~`SidFNz3w{f`myWBumtEb$Se<(5+gb^{va@ zBoGfTXZi;M`W{OQ9_8hWR-83JIt#ZpRY$KN=Vrw~vXBs|q znzw6tlACeV>AwCL*LN^vHIQzoF8P)t(b;W9yB33D;k4f7O@N3aa0z6P3awoZr}ww+ zlACe;D9{Kop%XL~m(!OIdH&i5CC&(JiDh?bJmV3EJzgW%{aM18_M4h`Lh*G03g%5@Vcp@$}v4KGbnY6EJ@}w^V5={W?WKA#8IU zzx#qsdLaL;qm__au3IRN71)$r0S@Vi5V1tH=l!4o!8P&EzaHt_Oj_8s)CiuRm|Ho4HG1=B(^# zDYKb0ETH6SuYTygf?YInOT9__%|Q!AI;}T%4>ifvxEya8v;9tu;;zGF7_rHi@{%#P zfE=z)U}RJG+rR3x_RoN6AxYObD+s-OwzCfbT? z)Ibna(r`#_E2 z`Ye*|EHI_!^qbHWR>QjErVrC051Y!Qoodi*=8sf!(iBysdt%&x=I8E9q&HsRNLuHS z0_*l34CX&fS;?_yN?w`O_oUqXSiw0GN`0ZWeAE#(g_W>wpS?}^LcH?>=K-88H?778 z(Ep7Jp2Gt7GxXj&-_iFL39@$@t|kqYLjC#FmWoY=aDt?8H<^zs(nP0r4&CS#Jxd7l z(e07^%~B13^dSVzc@#`YijDaJp=@FB$>AJjl0?_ zNHkI_*UwM{`>9dWYcmWzVMz@ue?|qS^BuZ1b}D3PRNI=Su+(wlcvQLsS5&H(gyR-Q z1@v^%9`D@F>l;>ak6dq51hf6ppXrTfKD`jUMsClXtoVF1<6zSzj#xd9^oq1wbUd5h zUyY7FIB|Pseb~8_QedeZ28G@Lpm@eF&7@wKwBY9ubp1IX9D;kPKjruB>aZWYSh>Df z(Fz@-zHZs|9VD2Dru|87Xev;VP_IgDYV5dv*gsvF9%LxM0It>wn7E7lkO|D%JVEm- zpzYq#N)Cp0QD3(hdw~i&_c@)jqx=Nr5!^y;p^7dD&Fv`~LWEkoOWS*W-^lF^DuBX6 zFLtsXYR{KT%b>Lvgg?7UtkItdCrs!Wey>g(2K>$sjjx`C`?5_?H4EkMFv%cj!wzWd z)z6gOUE(}^GJNR>+6&27duZ?z|zB+vRX#TJ4pRB|q++9NkK7#5+ zwW57aBwxlzp5fcpexIH@&~PSQyUnR%Ik9jj{ndHrIb(B}wj|MVxW}n(Y~zS+4p-f+ zFl)m&?^K(NzmebH`eiP1-Bm}p_5gvPgTtH zUys#fE%DgC1KP*VTj;qj!P)Ci=W88Wwl%7)n`P;~7!Bjiu^WR?xG0$}P|lNl7uNU` zVHXyDZ@pxWV7f&f+fu-^9YlFC8c3+-9lk3v+7yYt8d%~NAkzihD=tc=@j%H3^Fn+v z$;NYAv4M&wyNsl5#DC}`rLVjT17DUFnA7{bPP;*Kbjz>zr?JgbcrTS)y zjgMKyeN;5pH(ldcI_Hu8C*H3*o@wU{KQz5g)*z2fjTtb+fvG&12J;&Z!|<-gW~amk zEOES-Q?I+g;k#+n?e6WFwzq z0?+MoSPGa#$KEf!Y}*s}#><4o1xwoVtWQF=z>CRZUg-G2u%CnUz!*mKZ+E`@P@*|tdWukVmTk+}Y1x$z6+)gUx~x(@;h(fT?Y zlYh_QMIah6Z}xUPmtR3FT2E>0;h4X5n74a6o^B^jEP63c%%}3=p=_>)PPD!`s?Oxp zFeL2Fk9j=I&;LhGwsA*!zTz8>ReOZgl<|He8uoaS-xL72IqKvr?LWv3Ulz3B)a=j+ zwdrC1C@bP8>bm8hJ8djcyk$rWRO1^m!2~fjqjY8W8p3KnnCQuqX>VW*jeR>J=DO)PK zAIXZp0?i@Wrbvll9cdVeGZxFOPkT5cacb}YhS;g!DzjN=drWrL%~`NspJ2-{$1>u-56y&weUriqEoI4E+q=~c#@oN2qjFq6O7?3fb|IG z%hoB6OEgtym?RVQ?|j?$;$XI?gYSGJE`r(B zq0qiIWw?C}T^QSx5$%>GW)HaNw^%-OWzu7}r--T3fbGC^Bs~5c?$D)~#{1{3RnmUU zygZCO=~2+09yRx9ufvkrixdQd$4gqO6i*O?cAek5QA~zYyE(&isty|rk7I9ACQ3$1 zJWnvG+3~(-4)?R|kX9W@%)qnZL8?&8gxWNFGp&2#BpXSbK`?P@GFt__a>ZlE6{8bq z!*NAX2#SC?A*3=0ojhCL)Xy;+j*uP@-ZdTX=I6~Gj^&#yk(}ttXgC{1LgSGK4sG4n zeeY|z_IknBB=p>)Hr?omp6ncR&x~*E;}jTuJ6v0Lzt~_@Ms)=7g~>eR?^Di2tDlf& zn(qUGBW~x|ma89`+}w4~` zJb`t9Tm%Xgpd|e$iJ^#W;zb8OJ#b@XCP%O{R}D8uJs@#_3tZoHm19;p{Kick&-giI z!tEEHo;hzQ&ne7`sY*l-d#@2B{=rq^)f27jm}Y?)qBf!7V~44@-Ce3Fhp^cZB#JL?8h6edy27+m6c(|!{ zsa(6!K_>n?$^6$2>UhhDyb*VziW9i@9xFU{bs7}=yynF0u1N9ib>zNFK3Fclw`u&P zKAxk&7d``q{X?LzI#=-n$9(SyDedQ3X7X=+j>Q^t36baW`U<_Uo+?Q6vCwcUg)B5F z9hxIfa@_v4t@B%VEECd5g z@$l|iMt*GPfAFpy>)20f*yiRKn2M-zHYH87=_NM-muL*pc_$ox!f>$f1es_=`DA0E zajG>Ql2jiNTR<@nj7KBu2!{gL9XOXLnlK2RQK+e8+Ej95_S1r#P(Mlep` zW~XLkYIj!n&sZ>Owq~@rcpFHdimTas(6C2dyo|*5d8OA{ZP9oYh_A%;?Li}HlB7N+ zU*|9mLoHRdC9nr1P@$_TdZSWkIP5UJgLA~@g559|d&HhPU%Lo5qD%+D0TgESO3cQY-q*351?G;;RKPLu%w6y#VnJShtV{ZWXtv8|RX&YLP;F zT|CoA%JnvtgJbt$9;Sm0PZix!5FQ`E;x@U6`@CSj z?7D8Q^xPu&lH|H5w{!1W$$j@ZfpH%CGBO6xjYq=}MWphmB(ofqh`NV?6B0`7plc;f;6?7ED%K7#t+vZirBR r$+ymF$3|@2jp&nFqTa%%CVs5?`23fD`a9SakJ4AKUsby@x9tA`u3^uq literal 1661 zcmYL}2~ZPP7{|j=8)@w%7L-~dgjk2_jMnOCN)S^}*Mlou0mJ|z8UYOf5(0s+o0n`h zyXi(m>QQm1)K);Qsu&G&nHpM8JU|&MRmRFF#X6R%;L#T}^f&+az1jD@{eR!gd-IG@ z5y&);Ssn}qW7^s^E2H7u?HEiq8296nqYQ@YqNuRgRq*q+X??#byx+9`mMOf?6mgRZ z@1-JpsqhogQl;|QAbCD9Ar((}jF~3o<-Bhd{ii|r)#@Wbt z8yR<&jPD{7y2$trGNGMJXrmHasYEN3G3m51$zt4KF(#cfZfG%XY$20RkdhN*@-b5K zGbw2zQ<_LgBbm}bq|_6sbwpYnky=Zn)ez}5MA}b8`j15VVTU&VKx7;uGQNj0WKYdYSeios#uNY~0Bi)hjQ}JAT@uhI0bL@{CusFzEgr8m z#B1?5tsw>qGe%=Z5oV0SjFA`>i5Vj>Dh#96W7Im#v<{=zU?v2kSEKYwlnzDBp(q`K z?hHZALUiW}hst0KM$3ZGT|sD>04*1wy8_X2erW~2w0v1<#WHmFQnX?TTCo`Yb}?Ep zbxKw?yfoJQ=!h7`aR(>f(cqZbMlLm zb5894)9oO|8xLnN+=OdaejTgo-<0M(m(lUsdoSa|vtNY0cyov&R(r><*5rKf*^JIu zubMTh{)J;+beVxYSIk?|@oPuGP{;3nf)xKjKLNjS%5n8{S>*IP8^-FkGyS>U<0~-S zZ~X8bI%3Y$O##fSug9AubY}aJb@0>HGc`9_yxvIkZ=pYCjnww0qtY zc(`yHSp#rQrGHMtrK7A{Y+{({o?krQJ%9L{D|85dV%rlR@$mjZlB4u=6?kAAJxYs- z#{srHpP4Qk0h}DZ4=l4_2L6(aa1Rc8LGy`?pW74;voLO3HLG959A?_`T0@EvZq>vV z7fuZE=(&fS)9&Rja4q9tD1C-~WzpMiHgK!b4c)X*XrFH@@O+zB(beoL&7nfX;n(xT z;6Ii+YOf*Pv2fn-@LDO*D8VsMjL0e)bU;K zDiyB|v#p2^rh3L-pjFI9{HoHI6210UaeE#O7Tr3vaWrEO2jM07+Ru-SvJt;O8YGUA zm*utwg(jFtc<>A`AJ?)j%7Y1s@6&i@pS_i%{c9dA46^qSH=wVy#KvY?>SVCLYIcno zmK-B^tQ>J8tFJjU$nMe{Xy&o%pYmO?f;i?-!ly3d{8>F?u>ic};q#VSRc>BB11j-aB}M+=BTAP??7f1;p8 zHC>Ph+1dNw``h2YA34uHGi!|B&-@TVW8zh@bKw6+*#CH3c>HjF(OU?;y*xfP=C^Bz zAL@xbNqm7MUbJ!7w*9kWW0x#h^4vW5TFrj79UY3VKe4}Bt!Dqqk|l9*v6=g~Z}YPx zEK#pswR&};h5eHW@9ev@YxhE7)jahJ*E8s%k1y>Crj{R{r#?Vu?}qPuVv<%JpC{#= z*|oc9ZH!R;!m8u`8MpkCg|X7M+B)lDGerxw_+*L*Ez0C8(MggFFA_$eoSN7t_DvTr zpNUACU$SrdY*L8g7@26CPzxD{qlJzFljGF$h@xxHYyhnk33C_|S4baqknkq9{9H`rQF*3tg){in4*mEuGp}th zIL7Fs@1jPK1wI$k8NW%OVliP>%~xtE&<*HMiO2&ZP!mWH^{L9(A~VZ1h|H)e;Dnx2?i8V#B!h5su<#F4%G z!cacqafHmGjZq|=1F&l{M9Ik7{NYW^U!kRxA|)vlulf3L=Xcv%q%$hA7hc>`d3qCd zr_<=IYBVXs9&Bk=*v{K)gwl7^&JR+o?~Xb(sxp^7*%7Gw@tAe%goUxi9krUaStM&z z3@Y)V;*usLAko&GKf#efBskM}=1|Za5rbr8W_}4{VttcRiSq7Kim{es853A8pOnEP z_bmLHaPW(a>bHx)GFkgS7?d2M6cZC338V|^C;MW5Qty|1|NKI5OmK{q;^)W3!{6eu zzUH5ZU=J9IK*>=`#J&QKYYqMZ;Rp7)jI~cb14gT2jTW3%Y#3caP`s5!aDO=yedEAE zN>F-w?2+e0zitiGop%e_$_m+I;P7HoT@=L^RGf^oAKl)J&*Tq#jIEn`3XWbbqohe> zXquApDfX@(S)Bh?h$0{!f!o|Sy%0$r3A_jfg~ML~+8DJ{0?$#r@MI!A0Kx%;4Eh1E z7}k_?P`VEUISQWU2|i6HgmK^=QU3oOfq;(y4`f7{r-G}Uo)n>!99(=Kgfj&8YksH% z9$6!2WTnvs@VA6KUXk!cuwEQnkoh?{9qZ2c*oS}$SYR1o0SLX> zu}Uq6Cp@ns;7vSrUoKLSDnxQ}@;;Qv@{*dn33=V(d{g1~6OiNWdbG55lkpfUqX_L~ zOxaNY=*~>ehYy?!uEh&+97W6Wc(t=hPW7Jg%>{<0uiVD^AuA&}TH-$hj*02%4Yj$l zra8CBcu8^pXh5on;)*6Lh}Y7uObtI8B)!g8IF?`_aKd?|*!jsQv(w}v)y}Q$nTEA< zor|-BzqqVxRXg4uMGp1X+XK#Th@IS2YG|69@)?dD2Q1EA3d-A)#_rArlqlgutSL{l z=1e=Md38;|fm2KM$3y)BnX1KM4YAHh?aH zyeI>nCejZ+23aB--e^(K^NflFw3qPm%#&OQmDx2^wq)N?7eIsj$!8p73Xsb}7~L{c z1b>TU!P~<)<$yFpdD2ga;1!TKA1P(?6D7lDir+$}7)#QSJ{GX)BVfFI9&Z^U%->1j zP5izZf_6OOE46e$>0c;r!O|(_qSgdY8`$N&8&woWQVIaX8q57m^#dhK!s=CJN=q@=Q_s zn|gd*egX8jbA6OMvnQjA@(K{e1kkyekZl3%1R8_+6R;?l-XSR0@YRq0I;0MD*h_fMPVk6G*1- zEifssE{iLS5>+#NBub{s&-_Jk`f%N5OzV7VXgQ|`j$!n8n>-$;I*7aQT%gYWbUT|S zr*u57ar)lSlGl-NJ=w)?!Ph%>;C8M|UjCU4MwB~xE{U6g##6A+m6;CKxp#9?GGx{eALq|mapfNT=ywcdz=Hqpj8Fz{pW@h3<^@*ozu@rl}qRHh}S3AFxTGPDdqQ7@`3S@(6d+L{CD_kz;1$m>t zF3~&6HF~c^ZbH-F4cF4#F6V^BmQm;L9n~7WRwBcXzI;sHY^%%0NbW~NTe~q5uI}Ns z(Ua?n?uEd?vpzMu=w&|+T-$2Nnv8moah+Sn2U!_`UGtX9;d}a1rC64}I3~}uAoWaJ zWdMc%Ps$rxbYq88M92^NfB)TE5t;NddsSmvIL?sQrJlOrcxyNf73K7~0IsiwN)sG@ z#q~&CV+bzmIMvc3Z)~GHy*R8d?#4fZ5=hkDWxUMF=#4aZ_grzy=A_g2bD`wJL~Zej zA-M&%oHA!s<0~#(wXwGPpm$xmbt9%wphPV$+1cRto(w8sIT0n%JdrH#yF~Gt?TPlc zrk%m$s^vo3=Z#$2bL%ww68N>x3nf~*+#svZEORv6RAbty3-%(T|vlX3)SR-G2xl{z2KEky0JuW1Y9?$GX}PMvuW$Gi$2$`eVR2_ z%+^8#&L2nd#i}181&0i6}CwIw;@f~u%J63x^rOn+_3_j->*&Not zBFG!L2|PNz&EAl*j)M`vES>Yc&ijU_Q3}z}WW;t(^`%j&e*l~cpNPzC;gL*h%7n+J zg$DaeDJB-Ib{w&E{V`xMe)PxA?dFfv6Berkt=L;OsebP z;^?@^X-~BM)kFF+2O(m}CP3VPM#1_6igOF<7z|ebVd#9{I0P+=BX|;_p@osGsY`4> z?~WW-E~H&b@#MnAzMn+s$0Rw8ZIkNkZd2{7NNi`@Xa}`@7^(W)K0JtKO7jKtzp?4B zmQ~0xkUOhroU2>lvDK&jRN|J!RlccLrx{%`2Fe@X&@b?u7O(+eH&SI{#?Yw1#4H|9 zG!X{-v0@93j#U<0Lmf0$d0IJt@3Va{hD{ zZPi97TOQdK@WDYg*q_R@&qE_?diq#rLrQ8IMskt*Q<0r}J)~E0unaYi;tEoaMC#98 z#)UcZb1m}rvY8%O?A~v8!`iD%-Cf4(E%N5Fc_ZH)J+M3qqc;?3DEXB) zjLp|Ew`yq3GF)5W0X?;YXs1Vm_~Pc%2KyMdU%e)^@2nFIN>2t74OUOa0L@gQ%?wPd zdQbPsWqk1{KkKhJUFJC*U;_;qCZ;@$2;Xredz1a^V*QiYtu9SF9cdpo@Ye5Py83oq z)of<(-+*`n&gPc55AKvT)6=nWD7E1|ox`oh%l-;ynJ2Xl4T71DC-O|Q^p{Q;AH-;N zPi50t{Zj*-{4&yMlsoJbD`%2%kL3pd;Awu;(6OrcFh+7$r1pJvejd~RH8@IS0#Inn z`{pGFGvW9yE)4(pH zxPn5VlH@|SHH3=j8enc@+9H*S7de@v3AnIBc}aiY)Cs2g0uGMBsRs|+|318fv<5$m z>gG;i;uX^{d16zJq2rguHQ1|~rPjha!N|AM4w^d!VDQLXGQzqu(Xs0U9tn`v9IQK< zLh^=h8rpCuQA=_R_V+c{KgN+m>NIa^e|k8Fp$+{>q!P46)`l)MR(#ylf7&>0OsYU<*9mo1HCd@MfM)l&^ChGi#{}NmOnP|Il8ry?)U#F16J6vjz z6Ja*Pkic9rT&dyxPanl1S2@(UQaCO;zgw<;)S8mg+94EAGRly1TL|TsMOqH8>n2 z2?v{W>jqA5`WhJ8F_Dlo)fzg2ia)^^=aST(UDu~kwj+NRbuT-WK-OX1k?6R30*`e_ zyB@5wroesN2uwm5Z0P28Hj`lVN;r~Vm}|7J!s9tKwyaQR2e#&N|C^VrTYqw6Tb@ZYd>i|uRaI+t5Lk#_nu$E!m)ReG312743{%4hiU z&bh`0x2<*_3T?eS=tGGL(A|d;4dqcR5<-+T1*cxx>JA+tmDU;hCwnyl4do5S2(+OB z;9R)FfthP!0tS``ieCmYbxP&6kMLDa-L*rZ9hceJ)I&P_bQtid%iCl}#gY-S`X$@y zH_Fd$)NIH1$4_hQZyO&OnhEbCbruEto{x|<9Wpc@Dn5omz`etHh z#tGb@n`Q}4GZjy`!*ZA5>22@U94F|7uORcOqCtv;EeWRvLx7;A;gSl3lr zIl8BNUVP#mp&#b3{!~PiUF^7K(LaoXN1LwM*Z#Xf-0d=~1l(=tMy7?OKEm0V1c#QT z{;)LF`3GEoRN1*vQGWsr){35C_cyo4#aLME#NjHns*K{!TuYi;k$?j1Zw_chkA&!2 z(c>vrElYrZ(FgyMD6U#ltUcTtq6%-r%elZsVZjDgfmu9}3_K7!xjDJ-amZ zN0+tgnl{!os(l7wVb%zQjUIg};tAlYfQhNQHr`-+6Jwpwjx%kx9nn zHytxHmls##OgUewGirR;-7o!_n>~_QF}fY5@+O)YLMVo|0AjsB$qHH9UC5wYWb_Uw zj%hfY`OhR-;)i5gf?pgQ-=9A}YIu;u7 z(n+RY%OpK4Ho`hR>S$1$1ijtZk!x8q1}7ki`ituvegoXlH+CpJu7FgFd-r)Qh&ObEP zFIRU4^?HZRNMiAO;Z&6=DLIxD&W5fegRIG6XujxnI$6@RHq9tIi!e<`a>iG2_AB>! z+^D(wps^nth=-fr=$VM?!J`r@70IxuMECs^`47Lubh7c<#KzVdXd9q=zYcr^oDDAA z=yrK?lcd6FaaZW41cf#}2U4wfL!Czx*R8@JK&r3a@0i@1ZQ$|9y$yT>$c>&j@tz{T z)&$cmRhdM{_~vj!`}@Usn|H@K=_@BR_YG`!okzJ-F3{z1aboKiQ0b|@z+Ndkprqxp z)PKM$!GXSQ#&wJ`*Gs+Hi98G-1Wvaz#)%%^L16e@miAsc7h==7Za~J*IyQM64SnkJn#gjbIOd@{;KEGC?C4D zh0G#$b3JTBm5^kJl99FfL%G?=UEA~(*W55u4&v}Twy|nFivve1t45zYZ8^`Dpxd{c z4_l!hlodm{=3RK=Tky9#VDLEnlLV7a#WYPZT;~_#h4UT%k)~bHh@kut5vslW!q_eG zv0*ffqT#JH!qWMiiN0{pPDlVM#U6Q1G>mrIPj?C@QI<#Z1okUS)6Bc^O{=c)sk-K^ z{age(lu^GI4C;+kSZ9XWvnP-6I5bc>zKAQLfVR>(_sCBlcj582zcpH)^1O)AcXWEg z*(ftuN+(@o{UCA8FEJi()L;CSUDVTN<$Owe$IZ%~32f*ab_&FtHOtbG>2^8Jt(BjE z%kiwRKTstCFQ>78uu1PyW2g?)Sm0R^qkA=K&-XPz^oy8hN$yccW_ql+BtHO_7pqM7 zA%TSF;>eNpkfp=wR^u024>+V7MoUc0fcq+tABn8zx}9z~SZ9&L%n4d&YMy$G&~1+g*ne-xx8;&hG0?zf_-d{qS%B11{zPBvuLG z))A;xXo}=JK3kf0Wj7w%?X=dMat3oo2e7%RMEG1a?H_DPn}?mML09{2=Q}cX?Y$7@ zd1jVt5SdXmV%4_yz8t>EfU~TdnRz`|hF}8wpOKc15yFXtje$xsg3^9ctZO}7`3sCy zEtOY(&^8|tBuli`!)A_Mk+k;mU<0>1FvF{40u)>e?B|#2kGkFJrJ7UcgE?NFDBgM$VS|g3gQ)kNbVNQ0y^_g3Esx_b?qzWjlBQ z+o6lA<+gVhW?|B?Ow;;}(QA1(meA)$}1o02znGbREZg>0JNSNE4PJfT*itODI8G{KIn(B znysd{A6%+$*p2sEI8WD{x)eN;#MA%?R2QdFZte>GkR@^G@pw*vzNt2}`4_kl;BZ>) z0>^YOWl(!|I56#@NXT_I?NOYWhZ(CxYiXI)XH;O+8x9A`=V`&t^$O>XvFn0xc6HWh ze^@UE90b!Ypa^UF-U5^I>asX!bE;;T+F8cV=u3&2U@pRX{PK(t?8~JVdcJpi!}hq~H9zcQ>=Q``_+Ar;LgS zadepP06~yrSZGi*d2OF+_I6}{7>%oepc!3JYuT&F$6nQ%`>OE!sx|ji;dfOLJw&*X zh%^%6cZkT_MC2_Zs*8x~AlBU^*4-f1UnkhtiS^eAc00i~5bQR5Lo2?a1?ODGxtDQH zGtO7k7N8}7{3Bb{1rrmT&hSuf+dw;$%in( zK}>J}6BJ{D{g|)_6&9j93sIpK-K9l$YS3M3G*$IJa5Pnk?#9sFsC>6vzFQ_wm!cvV z73HCsd1!_N&D@8Ib5L*1;sDqTWSaqq1u`CxZvt|zRK8J)a-<55 z6x|?Itd}aH5#>5W8Ho^)h%y2p)*{3jgjkKJLJ?I6q7H`DL9jXy)~tjz0kGD8DhgO| z0Rt{r4i_zl3;p0CpSgo~HJ#f#yeI)Jl1d8j2B(GlyRGgEZa&f1)t)?lNz zId9Wwb79&aHW#NgO>3TZ>3^4PT5MWvu1ss2X0T}|(LTPm7J}@qhXt)*XWdWO`uW%K z-CmVsJL>j@rM)sFnEAtS!6xX$aVZ3SKKuFPA%C$r{hkf z3&l=;>bwyj^Mv}v>!#T6R$g$6%!pNZ&<-RARi0I>HZ9v71Pdg8rkzQr2Fxsd zMhy^@(&i7{D0aE(`>9Kq`*^X@XzBa(bWzVaqI z{Gn^_f*xH$>34m(e9720jpbnxvs2-^yuwK7<6g0jxm3rx^FvQ)=oNPiA10i%&{a$$ zh2LSJzsQ_B$1m-!Lk87Aol|F+>1}S%Fb5kH-Bzw$Xex7d{e-K)+nnZA#p(=AV|GrZ zF>hl0uNmzz&qh-R1M?X#UhG*OtmV0@m|cdaFVD*v<9Z`i#p!pI@fF>E?{4|d8w@13 zcBEy|t>Ko1Ikg(kira>z14Vyy8~&urTCHQw$Bc?!LL4Mdr~&VH`*wXGue7m%YaVFi z)Y580YiX1Fp6NTxdwN&KiH`f34=mQ4*11ilZ%<(w?dSD~mnl4!Qap65sU)QTn(zZ{ zKL_TYvG#)vb+je7N)|1P7F8A;4$N=$6MN}yQnY`^4k^B0VVD(KO`CK&ZlvVd7nM{$ z?%|V%EK!m3G`uyLRzFDXM4X3wpSleBI_2D}Um>OLYVxIc$VF))DB6cYuBp?1Sz&H= z{)*gX7w@~2KF@KJgSfWLuHInn-&}2myp4MIOrd1Vb&lVuw{6z`qbapC*09NhlT*(i z*JceHt^FHkh(yOm&zYOYV@6sYq;?=ZF{L@LYc1uY%X~I+3(DSqNK%uY^-0c+OFcpf zS&)(sJ@oo?ucZ{S#x4bCzb{P>Ja0Fn12KoZsx zmy17lNWkew{+y zucdMVSL6k*@XR{A|K#%U@C_R_ycP{#$@r_|;zWG?7Qb~&4E|p>Y>0>mPdj;FzgyMH z4Kc}Elar&W@PE1T!{WxnM`C$rqGQ&z?PLo6-FVoCv-wJNOevFb1pei`WbM`~(R^;* z;Ufj_EaAnh+j_-)=R+@v4d-`Mt2F1!In4CE&S{=h97=N)qH8oAP{5mqGAqNMFJ9`q zc^RS^*P`O3FVZ{|!G?Hx^L*h+4ws9*q_|EDgQy%XZxzcjOW?<>GVU>76wl$raCw>U z1`7izmg3DT_TS{h4PeeB(BYnI>ucR|xoGy0CH5I3Nb-pLXAa_F5w;lE*7 z(-Ulw7lT&5mCiC$P6B=ji_$5s@c3CTDgz#7QQ(@j9&E@dFMki-HAJUn4 zl$ZQm%CWCx&DDOLpX)1m{15Yu_mVU>P}-@Z>?{6gIvYZ9(Hddg88481^Zv+bG2DOx zt`G%M-Z7CY5Uu3$BFf=ofF}_}37<#th{Iu!Cq=vSc!6B_G0!*K$%)B^-<0zIWv+8Z zAPPa{oS3vXLH4xbm2BkB%E#8ufsb@&P}(o!;&%qY=jDmw*RRIXrV52`rANYBktb;0 zahk)r7Qj2BqrvbJn)jCwL|r&u?2mF0*NMlj&kCa9LtGk2(~k<-kPdhFSXaFr#}T$H znwIptM{4$QopYeB927fLc>?;KMYc7GDgyw62W z%t}xh9Lvq=Aynkrd0(FLD$jsV46R_#di~0iMIm%rZV{`*PYQ)7XQt2>w8~|}StZTk zxY8l;-Q|W7P9amgl#TrLddlAwpOI5u@sA39sX2>gGkLX7O^eA0NBe5s;(xLVzEkA9 zuJ#6iI zcGujf(iX*BCkp*GQTBu14zT`k9o)Iu#Y^|0z(up~zzsU%1-CCyS?Bg2;BI`W^2k-f zI1cy;@FkJcnCUDFpMt+I4&O1#m!j#^&ET0l;F1XgpFE0kImku0ulC)d8DM{T`L4Vb zZ|zTo-wV5n5Vv6X4xbOrWCy!ri_60w&l~Vo;y^+S>?@p(V#vis52|Hy;`JO zj`fT-Q9C?h;E^*4{*QxygHMdg;jCIaXD+y#0HLodd<@_xz*3+)D2?LrSQN?O27?D= z;K#zakeOh0;5X%DgfH`E@(?~FYdwxrAbZ_YV3ZbZ0k=V1MZ}JDFdz&QiPCZtu(Z=@ ztjLALvA`3;Yi>NOcJMYAj^dXHS!{W}g98>04+~KqHbx|2ZDj(a0BAO=YT^5r^!4Hj+zGCn~8QfL%6|# zA9JUl0~Z_t@dnEM+XV=}5JYD?gUX);N3(S8JYnFegqeUuu}>^tP=q}r#;qYogRyH_ z!T=!t7~YCJL2=hmv=Rb4aI34m;UyIJ6$rxC;~)$khREzFL}uWzAne&OgAwl2IfD-i zelCP&w&kaWE}j7}y*zHUZ~^{x0Tr_gYflJtABjF7fy+Dcu4ow-9&r>X)T>TsPJO6A z_j;^_pbH#R&Io0axcz*|lf;X*33Nfm5c^LfU}V=Xpw_%sW!M*UivZp0iLzUMFFHWn ziQdwV&=V@uH!fe=5|!fO5fLM8KU#k#ulDP99uj4<5tN^AH6@ayZYjDE>zu z2#ktuD!J^zYPLlY^41Sz3+z6PFpxT_ zNOh3B?UTaK71Nvj-+^eYMF7^_@zg1-J1WxtW49!lL@o!Um-CGjYya`JrBmq!KM(k_ zZ_XsZ9*)Il0fu zTjwhHj)VDxbx-wzS!w;Tusc^siatsAoTx>I?6Gt-Fy?zGHjy=AK}mMOB&dVR>8!t* zDEaaTDHqoKxJ7Zh!^zGH*uz^PQkk!tFSxrW_8@UCGv&Hv3cg*|jt0WE4;RJsXA-5K zn-$uO6?)39>hwcnA3N3`jK4w_rl8r(Kl3e?NsI4Q$NB8oJx12h4kjrf;N+LX=>RfQg64L zE}D_wPTU?;ze~{imD%PN$UB@BK6cU+HA0`#)9q#WYnA3-S8GY4Q`aCdG`T{Pfn9Xb zjN;Vd_8>ofcGUJ6B)PC%T+?8D!k)944%suYva)q&B~|F>P%j)t(d=AxLUph}Z%wneH2$>k@( zB9{(hNtI!pZFky^+NpGdpW4M?H;%>bAzsuV@2FM;G0^L0n|<)(P$$K0y)SRBF08?-Lm2+*A- zt4TC2vU;I@N_}U~Tnd{6&c#AdLxG*NMUBvR_VjzHFZEv0oGRT#m6vs=aS-&JL#MR0 zTBpZ&Y@sK-1qA)fY>WO63ksbj_R?Q6LMvoyoGH8WbXdnTBt`GnzD=%OOk1%No`%5} z+`MP)ZobhTTim9oXa+$)B}ErM0~U1R1{)?bBI`G?!6s?4t0W7F(OoXBIb@t^y{RMk z9P!N9IETnam;M@bI!@h&ujp9<7cNv8AE>Aa8f-v0;~Q)kLDF`}i4F1nM6qEbDRhUm zvf}+r)Iajmip{+V{lw56QX(Z6tzD$gtex<%PN-eBUWRk?xL2{TmXx z>^A75bO&u{lyzJXzE7yByejVWDe)#nm)8ZxS7p;~Sbz3$>eWf7X&a-ARNKI zLDHCH>piGrxSh~kyAp}CcSIWV$u*WlY4H%URR^#UlRK6HY-^rcXxoj`e;{v8FT6yY zi>6BVzRKH12OGu$g}Ns;W}|aEv@KPd53iEBSV?(OInz#yC-Lv)0y3 za8agyy6l1ZiW<`857tlW9w$L}qWJoJ{cB?yJF@A^NmzFB zs*GRrx0?l(fS}%181h7()*12~FBa;ZUiJ4-Z6rLsk@E?4=Mull{ri5F_+R)aL zG|7{q#Fr0AtM8JMe%M`if#Jlsr{z$jHK*mq=Q~YffLY-}6c^U;szSe&Bmm`I>3$t+ zri^rFLVOBl69S42G+o&sZp>HAB+g}vLVZK(&Wwu^%SIxyKF1asKI@MO+Ja(tk;3R_ zBF$!n*Oc;?SQFZsiCHpPLd>E}ls3kcm^9j|sC`{FdpulGHs?G|$Cd>~0O(v5H9t>6 zmn!4V-Ce(!e10Kz(xC`n-fsk<0xxYr)%r^+BSv;+@BsBxdL&(>6 zlbUhEw67`}x039Fs3qO6^}xtTv8J+VaV&CXR9Xt!neNwiU@SZ>wg};StdNO-rgl$Q z+mYCO0vB$HG{021{(N{M2-}ay0K)fJArtRuOQyW@8M7^F;p_jVGLh5g}igC`Q(1f*Q-^CH)7_@sctZ_C?0iFf|?`G0VqMWx1+i` z&9>;ik5rR1RWwRgOAHOd52enPcea@|`ft9zNcBqGVC331TqJuqlQ4ijUD3tm(#Fsd z(;dC5(43a8vtXTG0Zg}CGk@=-RU3dqwtLER7&>iJZMQoO-2%t4MViePD(X1?t7T~g~VyOSk&a205jCyWz*QZgp}$nI$< zc<)Hp7DXA(U2P!1=qs#IOq$=1&CMP`e9!U%XsidOY2?>Zq0)2`z>s99o7J_IBE-{K zf#6R$=3NGp)jgfTVMfZRUCv=G8MT2ZZ!kt}6vrW~pq7GaLC`j4I395@OOrML*{|K! zOprcgE};kY5w`OX+C=N`r|;FMu5OlL>u@_8P~ez$HcaLH#LVHfsbShjMRF2%zbVq6 zQY`Ajmu{y)nTgDnVmsVh`^^BpzT&Fb8`+S`%bXAk~iXK8LGNhKtGoh<%h zn(>cAk!RYU`|Q~j(`u0G7Zz5LaP%_2vsUCjIL$wt;&IXZk9}69cYnd?t*x1ZSud5h zTuM?BL4V|TDn+mYVp6zJCmO_KG}NQcfJ*;uswnL@p@d^X(%~biEp+YKkTY2zJ{OI6 zPlLImZ}_0h6=;|&*KREHRBo4jexi&bf%o>Z+fLFMgXnX}>&b+mK{g(tfvAedy2`1y z18lfeP9`+evv&jFPhF|t)*2N{+{=H8^ps-Wz)a%ylQY45=N4WC!`VpK?X;~hN_*UV zbenu+%0g%BFIAb`oC%OSbb}!p@8mMO%bL4;6!&M4Mh%tmte<)#!rMmdudrgV6=y&> zIVBDAW#2tYT5rmXS%TWDcp;fXfcS1||2$a118Z?=e-LQDt1;bCCy`#Nc(UL27`w`W z^*6U`=a98HO@<0py9u3T53X&7PPHCl3Ol#M`qW z5LIA7OHFt5d>IJpWebD#RtQ#t&0d8pTZ6Dmxft$fub`!}XC|IADxBtxba&nuQbU$# z$RhWyTrBPpbbgtpR2JR6d(APtXAjJ)e>_ZkQe`qmdqnzJ#fLC=j`v(T(Dfvv^eHE! z3_ph6#QE#4uk*{U7EZ*zk;#s+ExZbBSu}B*soM7_8r~zAUeNVFO>IHrTU$D@9n`oL zb-)7B5Z1Pv8KI_rwxyujAm~5`8#Q=ph^C#Gk#4cF-hjgp>$wJsJ$u%3^->0;+>O%z zIh2e%ciqb`Ybb-x9{>$mCcW4b`ZGS(h;W9gzLJ zC+uq<>2gaj0=p|sYvqz!RtVI+f_B{+{Sx=#tkB4}(~qj1 zW~5fg>@Jh^y@Gb-C3hu^Y3;Yoq`+XG`5^sJp@xtB#IF-KS}3VFX*fEzeb_dAJ#i-k z8>;UWR91ab|CA6N#ml~-0|z#MSHlKym?h<9vPu^obFqttU5tAl^QCf1Do-6a=mNE( z0d_!Ak-ZCKrr3G*qCB;W{6T8cMS=j^_}lYEGlj=t|7-@7ELaqXm(oJOFa>q8{;mIg zx`qbIX!duWz5D6w+XUBl)$Ai~^nj>+ks^5XYus)A*dnky>Q1@(1+zx~$(J;UJYkV1 z)b?S!=O)k`)gQ%2rs(+}Z|80D8&4gdpm|@#4Bq1HFUVe4B7-M%VXfqo=Nb3 zd~KgiIYW{zj;beq(0X9M(9;BNNgrSj{7Jdgdvt5^%Wm*pop|i}tRT!d0h@~HM}=%i zhdazWt$I5Sm}wVHOZwd-_7JP|W6f-NM6UTLshs%HF1}=!h#vhSciRNyeZ+1mB=N4bdG+9=n&Zr12&tkFEja-ZulxQj$-R7zWv{u8rCzsN7` z;=k*7*OSKfwXic&&_0jE-2%hW6UJq%gMfImS0Tq1+gHWs^YxkoWRTsUsjM;hI60cR zCHl-;So?wC`gzs+#E%{lwYMu4jCy0eEmV-afEfO*bZ9ir8H^va`E$AYMiN;!-+7F` z{iJC6ID$``G`D74ka9Lk(;Rw>VBNF%r9bfLF-I%5hHlN~){F~U-Q?=oW{qZHjs9fJ z$pl;0CvP5`uS=7syh?ILB5Cz^`VYsNG_h?ti|cNMU7ufLGDb0?_GcBo*ASidnF#s2 zD)XEIP+szLDaXFT(0%$|=QL0{ft8wzt)4W&(WiI6^uf&p!SzFCK|@9D z-zX3x+^2J9@=)+|AvCisKQ(mm3?Ni4k6SHV;DLvt5QkmZ58V#D%c^G|2aw0?7?y-* z+$ril=#~c(S73>WA1S#wU;nk)kJeY#boy|(TNEhVlbt&)0tGg`t=@2m0v-AWyAP4N zosxDM%f2ty;VXdBzej2Agc1^luKW3=ZTvCF$q{-^e%sI}<#b805cG%q(kp!RO8ZWN z^`Q7Z;b!ezxTC6Uf{cs)A-x7M2>hr zF#y_jZ@&Kf1EjS7gz;9AwsjOq&B>W58?2vZprj_xm4!5J^OHjxR~r*LH!L``z2C)iRx@Qq1k4iC8WW;XD^)H-Kmjp-$H_y zJ!Se!QiEeT&z|U{HrLO%CFwHGU%&HNmEoMgo{HFg^IwD37cT5JriTKhMsefeBeA?Q z(J|}Vb}|M3ZanP6*?c8Brj*Gz0{?PevUcm0Xg;^@@R5RdmhfWMZN1{Y)AA+i-EaOZ WJ~QRTioFQFRxEotykKeE&i@C;o1M4- literal 1845 zcmX}s3se)w8UWyMV{f?Sk>F{y^%@pPkEcg^Em8^smk1GvUIpZhR7fDa3W$URh!Dvp zlWaEG5Cp`EJgmgls)#(~m7st`z_wzcj|8q*t|Czzd5SEeIr`5_+YUF9vXlfOjR*9yc zM$_py*Rjlm_55P?QSbWB?}tm;=NKuL7_DhzB4JNaFxF45YC@77L_@fGkEL zi5Xg)Eh4MhReL*@?D~eU7~UiQH2M5(jBgFgDXCQPnByn+u&PU!w(A% zCtSm|u4!A-{-=(0o$Iu(>DG0vxxVhkn(lQyYkL23b6wwS`gOO~^sgCsZ4k#v{of4& zVMF#lf4>O9n0Z9}_dZ|i{<#m$p-T}h(c?={r{bRjQQ47-sQ1cdzrW8fP20O?%RJTQ z(^t&~7u!nfVh>1eSNUaSdUDRQA5@ois=M5%?vbA6>=d!8@8f9ZrP6P9*)P4FbYV|< z-`sx_Mw)(i@}_R(&2k&1)qQ?1S6Rt(Sti@?Dnp`+rD=L`TJ(>CZjpp|hO+VUoUc~w z8dLHp+@q3_M7I~8m{}3D6B%3SYW0>qne zPbr!E4St$O^Szp6BlwOhHQ)WQb%=Y-pI%@pcJzX7i#YfQGt)cuEhHV-^q^<}qi?_Z zZR9YcD~LJ&U~%xB#09ZcsENpO76_kDZ!>R%o{ZZVBk9X8F3$~9;qfUAMb|ShYN1J9 z5YJF@sr=t6JWcs%75!8L$!r%#VjEU^e4J*vdA=x{TuXh+$q1Mv{KaQ8-uR4MbcMap z%Z#fc)1PXEv!T&bphW8m34Ul@nb0(ROQP2qg;K_*sdOEue+SRLYxV2ug=dD3qR0Il z+ib^Zu24Xseu}t+W;M*RAN|6nZG!}jBP(oLb7vy0&DN!P_TG)B#Nzth?X3G2j@r&T z3X4?D%%q1=nkP7|z&7fPkC7KJL+Sp7HpA~ps}!DJrq-<`V^WiUpHepljqjA9FPJk~ zE{UV~wLI;`t7P9HLG?*=M&Jgjoa~mj9pnCw4xqsC#tm>KhLx*$I&^1{fov?Fus#jDpQZ)d2A`Y%AeHO8u2wPhkTrW zeI|z_{a_bsQCt-_1w3?jNVQBpy=U+y@3#iBn-gy+u&u@vYs#GD?>OyTu$Z!7*VC|2 z`Zu%0N5FT;yJMeu8(np$$3K2Q9^DQln72XKb&mKMs!eQPe%$F~#@B5iGVmc1P3M>= zpPBU9uQXcX?;~GTIsbZ1(#WwLHup}$!yBtLworiP3%s}dBHr7y^8Ogj9qM3O-&HV4 z?9J7hj)#_=SIE<+n|>Z8@l}p=h|hSw{8;_2E|r`|`j@_>CiNWco}bkm&s0+EgHo?i zj___-g&HgZljyTgmKad$GmW={zL}&WHlQi^Bky!R@l<*{OlnN}50@?9ZCdfg=S%W$ zBy~QteYiLHyF`+pn5EX?eGtv4o_`w;CqEG zA;ll?&A$?8Rzvy6KewwQyUq76TJojG4|_&u;?tgm>iN71lJu}!^kORSjX)t@HdPy@ z7var5**;mbLK)R1c8dw$_>LFMFt=DI({*Q8*~>q^TkT1S{^KR1$g!IuLU-%8&ReaJ YtPA}m`_7*A#eb57eF4G#4GgyA|9JndP5=M^ diff --git a/tests/images/gradients/two-stops-unevenly-spaced-lq.png b/tests/images/gradients/two-stops-unevenly-spaced-lq.png index d1521ed52a4aa1778dc207d42edd67a24409d8a3..fc655f8cbc35e43ffbe25f872417e382cd390941 100644 GIT binary patch literal 32426 zcmdsA3sh5Qw*Jo{;0b6wKza1iHph^PGtip0>z3OWi8d;U#r#pw=7#Wf4)lh z>5lE8riIJYYu;S5Cf&q;<-&Js+jj5Cqz^ArzkIEjsrjUBcZ?|S)FSmkrgRVdEqG4G zo2M4drdoFIsaZFNR=@n_sjy;~f0C)1-F?<2t)$eDYm7=MvoS@K4%Iw2Y z4I(CQd+HQ56<Fl{|&y6qH1U+B5DbnN}?cU;i`FOLg@L*(&H(%7YuE0+Nx#@y}kZP!QQ@h zp)sMbO?~h_?t#ObzQ34akK?NMIKP3HmM@N`pA@kKJrfbxVbtbYI1<@?kv}wr<{XY9 zC{|`{3H^mwiDHR{)Wynd{z-;GsZ*G#EDPHsRb~@pv51HzNOD77HqwRD88ei!U+zbh zA{3&mZhC9qG&mBvN=zS1(6pVdb6vp;g03J9Nw~jcb$#MEPS1X~%6>W5@k&k4^n`_| zA$y5(Rye<2Bx3JnFi4iufnt?U}n+IU-3K zm7$_wii!}y$JEp>iU>IHRVAqM&qqmB1d2sD#fc3Mv2t>5#!RKGeyP)%-us8s=d*sB zAAazk%eChvt50cX<&bp9adK{1pU5(31$>Db_U70KE!HrGlKQ6XBKG2C)xvTFN=M3oesSfFh?ZMfUTUVJ5>y%d$oYERThNyE`hw%qaU zUajkZ0x)93w&j$6wgoJ8x%J!JeAkWHxQ$m!nM12&gdRH1ory+1S;u`84?{@RHgUhLUxOT3L zG>X*Jd&Y)>GYQ)BKX%Q2qOLUa%7=!I@{il*N&TSINM0Y+Fl(QjC^$M|L0K|CE~q|A z(s|5m?ZLgkx|+2yh)yLD7p@mYD+|v&QT@L1{(&x0St>q?8dnO01Qx2%_Sd z!%G3%WL#_3x~v=D$8F^6PM*tMM>)wzh(&UpgD zZ#K2~_?;Ou;9!aqb9$MBO&Ct)BQrmX7Zrb;EWU71aq%UWJ>ymf$kIejBiHsypsY<<-QBY_cYP2BFS+!< z5H?;H=(^Jh+H-N1A%|_#rUrtJE&;nnZBQE5YdSwVNMNpsTkcgy&27OVF)OD; zBFh!MV`Z)p17KXZpIs-DigcArc?&$11l-o{JZ8Qgg5eFP9^6yTz9eQV#nUA~^MFsv zE0Gha88c4;Y${CzSCz(o9`$?#i{zxPavy8tp9J!y;x;89MW85DtPvt%F)4@NA?G31 zjc01-vZ=p%6(U`pSE`T7i38L)vl*Hu$h$*|De>yAKo$iI1M8Z z9TNY>4!q8>^8HD?bk1Qw<#quH27GM0^_cnEZ*d(j5BH0X1BH3};%Z=nfuK~zE{UdP zsDYt@pFGJEXJv?J2L$c9(&+)Z((K0OcMNvd(+8qjP~H;ab)YBvLs^Kx$2ffmzAeAe zJHE?3in}2`_7{}j1VfHTa3d@z?>{ISfm|AXO3<3(Ixu^m^^9Na{jk6Qb4ifk;As$6 ztUME%za!nTGr0vbSBC09G*=>s3)y{9Ow$L7j`fYDxT|_T@LUywL=FOddJ#ZIZf}{{ zJ|DMVF%f}jzp_dC0529z_re~VUH13^|LifOiCkjHK3IRl(*jUKAgV7BJV94?F+tPr zs&*>BsNFa)Ysg3>DiQBUayy2LMpdA}SKL;BeXE_TM7h_8uR3NDth%D^Y=VY!kq}8B z8|Hz{t~lDcLvynG;Qe@*SRYVfqPq02dmNIUGm3qb4*<9uVFhlh0f&&RYTG<%)7y&SOIJ>krK1yZ+LO}{xd=?okZ!cR zqp`e=$0gJKlVl;-@IM;iz6Z~Zj}kxK03-;APRQL?F6EhPmQA+qk`%mx2dPg409_6A z>)5W7kFu6T0~RN!XRA+hI>|bh^#}Q9{U}=mIj(U717<~VU$y|E*;kkFX*x!kgEF~D z&G#-y-xM7i%{t5pYwr)- z=y?Db1DZI_U)4ZFXb{b=*px`-f0gcNPHx8H)GYsS`L}}aN4^Gq?6rcH@;K++>3^#H4O>Ll71aPR1+u+$Up*r)&CVqV>pm}!gD zIG)A>k0}Ap(~aWovG*bKKTo%IBwxf_rX=twu~A&+fWX0}<($msNKK3D8Tp5Lx@j(G z97Kkn5lvTD*>(DS7d?8$KY+k}vK5aQV_Ed-OK|TJvUULc_k$mTyWt(L>n)MFUt%8B zJjX9=tb0k2L9T5-pXE`q;Cq;JP_ChZYX(ZRg?C8GY^3VkueZG5`c{(?gJKE=nRRK? z(v;@+8*#^ED4M)(mX}4=h)Pw;Y{Rz+j;lNHRGivB7Jcm7A*0ai6-URHLMgM4$ZU($ z^n8Vf*NOgNX@+0&bB|>j#b!`b*;J9}A(pLj==7QgF<3EM>K~K@HCUbkSmrEW^Cb#V z%1AO!$;xMUh!tJSuvB2C$Umn3micfgb=#$yM8fE&rjnJed0h=B|S$JFJRDl@zrC z5e@&GjVd3z9;vy4pWb7VpV8?-LEhcHVnsJ@SaxpuS$OauioT>Fpz`)9&50x2OV>}TG>AybUf^x3JK#}&o3r32)P$}WUa{oSLPopPS2I}YmaDPCI z_7fu{RKnxdwC@~I8k){ov+`H>BQ@>yWJjJq?hks9qq;$vA{ieuK&nw1PM<-f?&B;Z z6&tq%#iK1vZ*9~%X4m!WWR$`l=UgmQfj?HT>IEuwI1=p+gZit*uV+I;g2&E%rf|kI z{&ycS`J}%0ns&4B1(r}BHdylI)&-3*04e0_`N(g`Z``&Sr53kdwUl!u16Jqc^1{y~ zVnLcT2zQ2iF**Ic;>S4Wtbi$im|%!2qE;J_DuTUd%6ufN@9M@*ux+{aJBe7(`VL>Z zM3iPJe%x_h+x%4@`Mme~*{tpbf)-qQjrqF~#pBLUmfw80v%_Gy)ED=v?U(oV#`D-s z(AsT}a%%k!Vt`Z{W?HmoG3)uC`rf~3;jV^IRYyLS%6HdVk8lkGpu6`&!=>_{%c;eD zhn?CV^u7P8mHEC$iu*MLW8Q+ek$fA*PV~nQvCz|ndjvyeuN7WD?FbjBXxnQx1PZ_V zDJ38#Ahb<-SPn(z&TE(rIo)%4uk#6Cx3uX$E^o1bBg4JE<9Y}`F70(Sm*cC3Rd^|& zSF<8KAMNU_4Y+oFqVHX#oh5MB08b#~5U1%PfQ7gKf)nd`0-Tu7nmY9tNBKCoF}F7l z#i<8P&fSXJecs!_$7bS{$1otpoz1ds^mXO$xIVbQHrVA-{3!Qa#tF_GM=&4UX!+34 zIsG1=bW;4jF?3u}E+gde3D0QgOQv zTHOsE&O|RoyTw|`tfmg^a5`EGY!blD-t3y_wiD`WZ8GM~TsPP$i@p^zZr_+}oeJdR z{!l)PF}Ms2CG61ry^1UHZZk>?NxG%R2`JCVoBD4w1CBY9y7dqOtviw%KI(Cd<-M) z7>vyAdN`}o?yau1gZ9bmH)mXNRy&Tferhc^)*j@AuE%cs@4@~8ZeFb$8ANIg7^S^Z z(fX7o%gb%%d@MuD^D$R+rIf`G7!sy2bu|!7u)sM5Rc2C=U z4;oA#$3FTWF}Kxf>Mb@`d8s#;j~N2fbqv8BZ!N5E7kk>xTM(wY3m281wON++9yUUE z1220H(X<;e%krI~y-S95G4IW#3}5ojiHw#Ia>dW!E7rg!@0{=)0zoKiEq>AAU`l(JX7+@J zM+oIr%s*jXMQ^g#nKyXeh}?Iu3CG-pDebE@$+tKbMM&A5hRLqIu(8!qGm-Hbf`~2m zJIrhJD%wBP#y~TuTNw9(p--QUExp9w3s6Wi0E z6xh>bFZ3JtLFO=b-B2Ky@_ko|eN8YlOLBY8jrUE}=oA`PgYD;rJ{fg)7!t1aUW+iG zr@3k*Ex@hBw0?@Vak;bRmZs&n0TMkYRSj5HsHehYKVgpYtP8#mq*9B;_bFGqQo7dg z9pv4dIpoGJdJeU@a!epUOKE>lyTJW{M1tJjFkm#H#JN^)d(@rLUBcON4r3AuFSHi+ zw7Wac>E;OkM!!=J?kXv;2vB0P>79?dH}Ogs>BEs`yn_2iv)~p+SooNn(_(3~l(sI# zPiwrb?TDuFedllB_V3t(?*`Z9TSlz5tq5Zc%DK998$bDw$8+P4Z}F|{*R1<5e7m%b z?>i2RKb!96w#;+3Vv)&&NtamPeavw0&8G&8%M9`z#og{AFukp^bHu1qDTNWA z*YM48mguj(G?6(W-f_N$&oMJ}N0wD=hyr?g*u+pl@Y8PX9t^&Jc*qi|2^(X0im%O} z;Rv=VOR}sThG1-xu3&IH;~JinFZ@T0#*r%?Hb+$$iCuule)cfMm5*FU%DN#S9tX*_ zZHh0kV}Vp*o*|gVXUg{P#o*Muab_+|JO)wsF2LEDZhkND;kx}h;kA3%qt_7aalVI} z5H46^-!WnYrEfDtdz=qqv8=vt38N0d^ckJEn@hrQT}(m!5y$`Z&3P8gt0=y~?Z|A-J_Pzhxo%y8hZjL(I zM%n-X=r}PPJn?&<>e3?MYXQuz1^|sg_oJ-CcsQBiGLhjrk>N6);rbxMZ47fA#au@) z<}k(_!rTWj_W{hKAM^MF^SXnv?qFWGF;*YOQedo`sCN(Q-HrNmp}rlcZ#(MSh8=6g zj$l* zS0W*mNbqk+$ORVOB%BgSc(H_2ghUh~oP31)3&Q;w;pQRS zvq)qv9GL^3%z-22@F_WbG7CN>gQGImg~Cx%n1{eTIGvZC&J(A{h~QWdj!lE()8IG( z9G?Q8NrB^&;WJ54LL$WHL;M7Y9}n^4pu`v`DMp+WEl!RSC!ZFloW@ruAj1Lj(#r&)h9`_lvZHk9<;Ol*(rT)9yJb9(h$ z+MdL*J>M?1whukC98HNhKyEXz)U?`HzRAj12*|XE(L@TegP>uYsP%Z>-d(aozoQgS zvWS{iZQc4EgA%#SQ&V$NQUn4kQ|r5X0(vc|(PTF|sEj7tQD?mX(OvJd%|SJqnUs~y z9AeyVKx6iH48e}tYNe@!G8qj7EG!9xkO3OU#&#Z%tF?3YWDW zt;WTf^5=#Jk$*2XaeGql2aIQq@g!*`nPUSB;2AQcx}1^lI0Nf~ic1-s+;Y$^F`E=G@#0?^JQGnZ)p^e}iF>=o#TWPpgSXu2C4o zC}`m%c{uqK#sxiNIx4uwxK-Ci1w&^kx~9V9v$C%$Mw)a@OKAbw;pCo@e8@)pW-W-a zbM4*~y?VJb;`4DD=gU7yOZ8QPf7oWxSwo$rbrX`MdhIy}6Lxi3G1bvdFb@nBlG)X0 z@igGMkaGFXk<-J9fzj?G1xl`l)CP1=0-n#i75RgL-tvMg(6+DN3JHvhxEb9W5`U^L zM U)@YUDgr6dS(_uG<8oMCTe+i8l$p8QV From 1053de0a3da01955e182ae14cc5306d85ad7e333 Mon Sep 17 00:00:00 2001 From: Yevhenii Reizner Date: Sun, 28 May 2023 00:10:07 +0300 Subject: [PATCH 2/2] Remove no simd tests from CI. We no longer has this feature. --- .github/workflows/main.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 90667b2..3aac993 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,9 +29,6 @@ jobs: working-directory: path run: cargo test --verbose - - name: Run tests without SIMD - run: cargo test --verbose --no-default-features --features png-format - - name: Run tests with SSE2 env: RUSTFLAGS: -Ctarget-feature=+sse2 @@ -73,9 +70,6 @@ jobs: # - name: Build with minimal features (no_std) # run: cargo build --target wasm32-wasi --verbose --no-default-features --features no-std-float - - name: Run tests without SIMD - run: cargo test --target wasm32-wasi --verbose --no-default-features --features png-format - - name: Run tests with SIMD128 env: RUSTFLAGS: -Ctarget-feature=+simd128,+bulk-memory,+nontrapping-fptoint,+sign-ext @@ -100,9 +94,6 @@ jobs: # - name: Build with minimal features (no_std) # run: cross build --target aarch64-unknown-linux-gnu --verbose --no-default-features --features no-std-float - # - name: Run tests without SIMD - # run: cross test --target aarch64-unknown-linux-gnu --verbose --no-default-features --features png-format - - name: Run tests with Neon run: cross test --target aarch64-unknown-linux-gnu