diff --git a/benches/basic.rs b/benches/basic.rs index e2584df..5e68b0e 100644 --- a/benches/basic.rs +++ b/benches/basic.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use ispc_downsampler::{downsample, Format, Image}; +use ispc_downsampler::{downsample, Format, Image, Parameters}; use resize::{px::RGB, Type::Lanczos3}; use stb_image::image::{load, LoadResult}; use std::path::Path; @@ -17,8 +17,15 @@ pub fn ispc_downsampler(c: &mut Criterion) { let target_width = (img.width / 4) as u32; let target_height = (img.height / 4) as u32; + let params = Parameters { + // Input stb Image is gamma-corrected (i.e. expects to be passed through a CRT with exponent 2.2) + degamma: true, + // Output image is PNG which must be stored with a gamma of 1/2.2 + gamma: true, + }; + c.bench_function("Downsample `square_test.png` using ispc_downsampler", |b| { - b.iter(|| downsample(&src_img, target_width, target_height)) + b.iter(|| downsample(¶ms, &src_img, target_width, target_height)) }); } } diff --git a/examples/test.rs b/examples/test.rs index 3d58186..67f5a65 100644 --- a/examples/test.rs +++ b/examples/test.rs @@ -1,5 +1,5 @@ use image::{RgbImage, RgbaImage}; -use ispc_downsampler::{downsample, Format, Image}; +use ispc_downsampler::{downsample, Format, Image, Parameters}; use stb_image::image::{load, LoadResult}; use std::path::Path; use std::time::Instant; @@ -26,7 +26,13 @@ fn main() { let now = Instant::now(); println!("Downsampling started!"); - let downsampled_pixels = downsample(&src_img, target_width, target_height); + let params = Parameters { + // Input stb Image is gamma-corrected (i.e. expects to be passed through a CRT with exponent 2.2) + degamma: false, + // Output image is PNG which must be stored with a gamma of 1/2.2 + gamma: true, + }; + let downsampled_pixels = downsample(¶ms, &src_img, target_width, target_height); println!("Finished downsampling in {:.2?}!", now.elapsed()); std::fs::create_dir_all("example_outputs").unwrap(); diff --git a/src/ispc/downsample_ispc.rs b/src/ispc/downsample_ispc.rs index 044bf5f..e79bce5 100644 --- a/src/ispc/downsample_ispc.rs +++ b/src/ispc/downsample_ispc.rs @@ -2,16 +2,171 @@ pub mod downsample_ispc { /* automatically generated by rust-bindgen 0.61.0 */ +#[repr(C)] +#[repr(align(16))] +#[derive(Debug, Copy, Clone)] +pub struct uint32_t2 { + pub v: [u32; 2usize], +} +#[test] +fn bindgen_test_layout_uint32_t2() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(uint32_t2)) + ); + assert_eq!( + ::std::mem::align_of::(), + 16usize, + concat!("Alignment of ", stringify!(uint32_t2)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).v) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(uint32_t2), + "::", + stringify!(v) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct Parameters { + pub degamma: bool, + pub gamma: bool, +} +#[test] +fn bindgen_test_layout_Parameters() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 2usize, + concat!("Size of: ", stringify!(Parameters)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(Parameters)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).degamma) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(Parameters), + "::", + stringify!(degamma) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).gamma) as usize - ptr as usize }, + 1usize, + concat!( + "Offset of field: ", + stringify!(Parameters), + "::", + stringify!(gamma) + ) + ); +} +#[repr(C)] +#[repr(align(16))] +#[derive(Debug, Copy, Clone)] +pub struct Image { + pub data: *mut u8, + pub __bindgen_padding_0: u64, + pub size: uint32_t2, +} +#[test] +fn bindgen_test_layout_Image() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(Image)) + ); + assert_eq!( + ::std::mem::align_of::(), + 16usize, + concat!("Alignment of ", stringify!(Image)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(Image), + "::", + stringify!(data) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).size) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(Image), + "::", + stringify!(size) + ) + ); +} +#[repr(C)] +#[repr(align(16))] +#[derive(Debug, Copy, Clone)] +pub struct FloatImage { + pub data: *mut f32, + pub __bindgen_padding_0: u64, + pub size: uint32_t2, +} +#[test] +fn bindgen_test_layout_FloatImage() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(FloatImage)) + ); + assert_eq!( + ::std::mem::align_of::(), + 16usize, + concat!("Alignment of ", stringify!(FloatImage)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(FloatImage), + "::", + stringify!(data) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).size) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(FloatImage), + "::", + stringify!(size) + ) + ); +} extern "C" { pub fn resample( - width: u32, - height: u32, - stride: u32, + params: *const Parameters, + src: *const Image, + degamma: *mut FloatImage, + dst: *mut Image, num_channels: u8, - target_width: u32, - target_height: u32, - src_data: *const u8, - out_data: *mut u8, ); } extern "C" { diff --git a/src/ispc/kernels/image.ispc b/src/ispc/kernels/image.ispc index c906c24..66f5f16 100644 --- a/src/ispc/kernels/image.ispc +++ b/src/ispc/kernels/image.ispc @@ -1,4 +1,9 @@ struct Image { uniform uint8* data; - uniform int<2> size; + uniform uint<2> size; +}; + +struct FloatImage { + uniform float* data; + uniform uint<2> size; }; diff --git a/src/ispc/kernels/lanczos3.ispc b/src/ispc/kernels/lanczos3.ispc index 6767450..be0b23d 100644 --- a/src/ispc/kernels/lanczos3.ispc +++ b/src/ispc/kernels/lanczos3.ispc @@ -1,18 +1,29 @@ #include "image.ispc" -#define M_PI 3.14159265358979 - -static inline float clean(float t) +const uniform float M_PI = 3.14159265358979f; +const uniform float GAMMA = 2.2f; +const uniform float DEGAMMA = 1.0f / GAMMA; + +struct Parameters { + // Whether to linearize the input before downsampling. Assumes the input has a gamma of 1/2.2 + // that needs to be linearized by applying exponent 2.2. + bool degamma; + // Whether to apply gamma (make the output nonlinear) to make it compatible with typical CRTs + // that have a gamma of 2.2, by giving linear values a gamma of 1/2.2. + bool gamma; +}; + +static inline uniform float clean(uniform float t) { - const float EPSILON = .0000125f; + const uniform float EPSILON = .0000125f; if (abs(t) < EPSILON) return 0.0f; - return (float)t; + return t; } -static inline float sinc(float x) +static inline uniform float sinc(uniform float x) { - x = (x * M_PI); + x = x * M_PI; // if ((x < 0.01f) && (x > -0.01f)) // return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); @@ -20,7 +31,7 @@ static inline float sinc(float x) return sin(x) / x; } -static inline float lanczos3_filter(float t) +static inline uniform float lanczos3_filter(uniform float t) { t = abs(t); @@ -30,19 +41,61 @@ static inline float lanczos3_filter(float t) return 0.0f; } -static inline float frac(float f) { - float absf = abs(f); - return absf - floor(absf); +static inline float byte_to_float(uint8 b/*, uniform bool degamma*/) { + const uniform float inv_255 = rcp(255.0); + // floatbits(0x3f800000 | (b << (23 - 8))) - 1.0; + return (float)b * inv_255; +} + +static inline uint8 float_to_byte(float d, uniform bool gamma) { + if (gamma) { + d = pow(d, DEGAMMA); + } + int b = d * 255; + return clamp(b, 0, 255); +} + +template +static float<4> sample_image(const uniform IT &image, const int<2> coord, const uniform uint8 num_channels) { + return 0.0f; +} + +template<> +static float<4> sample_image(const uniform Image &image, const int<2> coord, const uniform uint8 num_channels) { + float<4> col = 0.0; + int x = clamp(coord.x, 0, image.size.x - 1); + int y = clamp(coord.y, 0, image.size.y - 1); + int addr = (x + y * image.size.x) * num_channels; + + col[0] = byte_to_float(image.data[addr + 0]); + col[1] = byte_to_float(image.data[addr + 1]); + col[2] = byte_to_float(image.data[addr + 2]); + if (num_channels == 4) + col[3] = byte_to_float(image.data[addr + 3]); + + return col; } -static inline float byte_to_float(uint b) { - //return floatbits(0x3f800000 | (b << (23 - 8))) - 1.0; - return (float)b; +template<> +static float<4> sample_image(const uniform FloatImage &image, const int<2> coord, const uniform uint8 num_channels) { + float<4> col = 0.0; + int x = clamp(coord.x, 0, image.size.x - 1); + int y = clamp(coord.y, 0, image.size.y - 1); + int addr = (x + y * image.size.x) * num_channels; + + col[0] = image.data[addr + 0]; + col[1] = image.data[addr + 1]; + col[2] = image.data[addr + 2]; + if (num_channels == 4) + col[3] = image.data[addr + 3]; + + return col; } -static inline uint8<4> resample_internal(uniform Image src_image, float<2> uv, uniform uint8 num_channels) { +template +static inline float<4> resample_internal(const uniform IT src_image, const float<2> uv, const uniform uint8 num_channels) { float<4> col = 0.0; - float weight = 0.0; + uniform float weight = 0.0; // Truncate floating point coordinate to integer: const int<2> src_coord = uv * src_image.size; @@ -52,57 +105,62 @@ static inline uint8<4> resample_internal(uniform Image src_image, float<2> uv, u // right and bottom of the target pixel. for (uniform int x = -3; x < 3; x++) { for (uniform int y = -3; y < 3; y++) { - float wx = lanczos3_filter((uniform float)x + 0.5); - float wy = lanczos3_filter((uniform float)y + 0.5); + const uniform float wx = lanczos3_filter((uniform float)x + 0.5); + const uniform float wy = lanczos3_filter((uniform float)y + 0.5); + const uniform float w = wx * wy; + const uniform int<2> texel_offset = {x, y}; - float w = wx * wy; - int<2> texel_offset = {x, y}; - int<2> src_kernel_coord = src_coord + texel_offset; + int<2> c = src_coord + texel_offset; // TODO: Let the user specify a boundary mode! // https://github.com/Traverse-Research/ispc-downsampler/issues/25#issuecomment-1584915050 - src_kernel_coord.x = clamp(src_kernel_coord.x, 0, src_image.size.x - 1); - src_kernel_coord.y = clamp(src_kernel_coord.y, 0, src_image.size.y - 1); - - int addr = (src_kernel_coord.x + src_kernel_coord.y * src_image.size.x) * num_channels; - - float<4> texel; - - const float inv_255 = rcp(255.0); + // TODO: For some obscure reason this must happen in sample_image() or the whole thing segfaults because + // values become <0 !?!? + // c.x = clamp(c.x, 0, src_image.size.x - 1); + // c.y = clamp(c.y, 0, src_image.size.y - 1); - if (num_channels == 3) { - texel.x = byte_to_float(src_image.data[addr + 0]) * inv_255; - texel.y = byte_to_float(src_image.data[addr + 1]) * inv_255; - texel.z = byte_to_float(src_image.data[addr + 2]) * inv_255; - } else if (num_channels == 4) { - texel.x = byte_to_float(src_image.data[addr + 0]) * inv_255; - texel.y = byte_to_float(src_image.data[addr + 1]) * inv_255; - texel.z = byte_to_float(src_image.data[addr + 2]) * inv_255; - texel.w = byte_to_float(src_image.data[addr + 3]) * inv_255; - } - - col += w * texel; weight += w; + col += w * sample_image(src_image, c, num_channels); } } col /= weight; - return col * 255; + return col; } -export void resample(uniform uint32 width, uniform uint32 height, uniform uint32 stride, uniform uint8 num_channels, uniform uint32 target_width, uniform uint32 target_height, uniform const uint8 src_data[], uniform uint8 out_data[]) { - uniform Image src = {src_data, {width, height}}; - uniform float<2> target_size = {(float)target_width, (float)target_height}; - uniform float<2> inv_target_size = 1.0f / target_size; +export void resample( + uniform const Parameters ¶ms, + uniform const Image &src, + uniform FloatImage °amma, + uniform Image &dst, + // Passed separately because it should be the same between input and output: + uniform uint8 num_channels +) { + const uniform float<2> inv_target_size = 1.0f / dst.size; + + if (params.degamma) { + foreach_tiled(y = 0 ... src.size.y, x = 0 ... src.size.x) + { + uint p = (x + y * src.size.x) * num_channels; + for (uniform int i = 0; i < num_channels; i++) { + uint c = p + i; + degamma.data[c] = pow(byte_to_float(src.data[c]), GAMMA); + } + } + } - foreach_tiled (y = 0 ... target_height, x = 0 ... target_width) { + foreach_tiled (y = 0 ... dst.size.y, x = 0 ... dst.size.x) { float<2> uv = {x, y}; // Use the center of each pixel, not the top-left: uv += 0.5f; // Convert to uniform space: uv *= inv_target_size; - uint8<4> s = resample_internal(src, uv, num_channels); + float<4> col; + if (params.degamma) + col = resample_internal(degamma, uv, num_channels); + else + col = resample_internal(src, uv, num_channels); for (uniform int i = 0; i < num_channels; i++) - out_data[(x + y * target_width) * num_channels + i] = s[i]; + dst.data[(x + y * dst.size.x) * num_channels + i] = float_to_byte(col[i], params.gamma); } } diff --git a/src/lib.rs b/src/lib.rs index 3f6d36b..fd03711 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,25 @@ impl<'a> Image<'a> { } } +#[derive(Clone, Debug)] +pub struct Parameters { + /// Whether to linearize the input before downsampling. Assumes the input has a gamma of + /// `1/2.2` that needs to be linearized by applying exponent `2.2`. + pub degamma: bool, + /// Whether to apply gamma (make the output nonlinear) to make it compatible with typical CRTs + /// that have a gamma of `2.2`, by giving linear values a gamma of `1/2.2`. + pub gamma: bool, +} + +impl Parameters { + fn to_ispc(&self) -> ispc::downsample_ispc::Parameters { + ispc::downsample_ispc::Parameters { + degamma: self.degamma, + gamma: self.gamma, + } + } +} + /// Scales the alpha to the downscaled texture to preserve the overall alpha coverage. /// /// If alpha cutoff is specified, any alpha value above it is considered visible of @@ -70,23 +89,53 @@ pub fn scale_alpha_to_original_coverage( /// Runs the ISPC kernel on the source image, sampling it down to the `target_width` and `target_height`. Returns the downsampled pixel data as a `Vec`. /// /// Will panic if the target width or height are higher than that of the source image. -pub fn downsample(src: &Image<'_>, target_width: u32, target_height: u32) -> Vec { +pub fn downsample( + params: &Parameters, + src: &Image<'_>, + target_width: u32, + target_height: u32, +) -> Vec { assert!(src.width >= target_width, "The width of the source image is less than the target's width. You are trying to upsample rather than downsample"); - assert!(src.height >= target_height, "The width of the source image is less than the target's width. You are trying to upsample rather than downsample"); + assert!(src.height >= target_height, "The height of the source image is less than the target's height. You are trying to upsample rather than downsample"); let num_channels = src.format.num_channels(); + + let src_raw = ispc::downsample_ispc::Image { + data: src.pixels.as_ptr() as *mut _, + __bindgen_padding_0: 0, + size: ispc::downsample_ispc::uint32_t2 { + v: [src.width, src.height], + }, + }; + + let mut degamma = params.degamma.then(|| { + let mut degamma = vec![0f32; (src.width * src.height * num_channels as u32) as usize]; + ispc::downsample_ispc::FloatImage { + data: degamma.as_mut_ptr(), + __bindgen_padding_0: 0, + size: ispc::downsample_ispc::uint32_t2 { + v: [src.width, src.height], + }, + } + }); + let mut output = vec![0; (target_width * target_height * num_channels as u32) as usize]; + let mut dst = ispc::downsample_ispc::Image { + data: output.as_mut_ptr(), + __bindgen_padding_0: 0, + size: ispc::downsample_ispc::uint32_t2 { + v: [target_width, target_height], + }, + }; + unsafe { ispc::downsample_ispc::resample( - src.width, - src.height, - src.width, + ¶ms.to_ispc(), + &src_raw, + degamma.as_mut().map_or(std::ptr::null_mut(), |x| x), + &mut dst, num_channels, - target_width, - target_height, - src.pixels.as_ptr(), - output.as_mut_ptr(), ) }