diff --git a/README.md b/README.md index 4203d15..a795f7c 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The crate comes with the bindings and precompiled libraries for Windows, Linux a ## Usage -Create a new `ispc_downsampler::Image` from a slice of the texture's pixels, the dimensions of the source image, and the format it is in. Currently only works with RGB8 and RGBA8 textures. +Create a new `ispc_downsampler::Image` from a slice of the texture's pixels, the dimensions of the source image, and the format it is in. Call `ispc_downsampler::downsample` with the source image, and the target dimension for downsampled image. The function will return a `Vec` with the pixels of the downsampled image in the same format as the source image. #### Example diff --git a/examples/test.rs b/examples/test.rs index 2b7ba25..00e3291 100644 --- a/examples/test.rs +++ b/examples/test.rs @@ -1,4 +1,4 @@ -use image::{RgbImage, RgbaImage}; +use image::{GrayAlphaImage, GrayImage, RgbImage, RgbaImage}; use ispc_downsampler::{downsample_with_custom_scale, Format, Image}; use stb_image::image::{load, LoadResult}; use std::path::Path; @@ -11,10 +11,18 @@ fn main() { LoadResult::ImageU8(img) => { assert!(!img.data.is_empty()); - let src_fmt = if img.data.len() / (img.width * img.height) == 4 { + let num_channels = img.data.len() / (img.width * img.height); + + let src_fmt = if num_channels == 4 { Format::Rgba8 - } else { + } else if num_channels == 3 { Format::Rgb8 + } else if num_channels == 2 { + Format::Rg8 + } else if num_channels == 1 { + Format::R8 + } else { + panic!("We expect a number of channels in the [1, 4] range"); }; println!("Loaded image!"); @@ -32,6 +40,22 @@ fn main() { std::fs::create_dir_all("example_outputs").unwrap(); match src_fmt { + Format::R8 => { + let save_image = + GrayImage::from_vec(target_width, target_height, downsampled_pixels) + .unwrap(); + save_image + .save("example_outputs/square_test_result.png") + .unwrap() + } + Format::Rg8 => { + let save_image = + GrayAlphaImage::from_vec(target_width, target_height, downsampled_pixels) + .unwrap(); + save_image + .save("example_outputs/square_test_result.png") + .unwrap() + } Format::Rgba8 | Format::Srgba8 => { let save_image = RgbaImage::from_vec(target_width, target_height, downsampled_pixels) diff --git a/src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib b/src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib index 14b8010..4ed6e7d 100644 Binary files a/src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib and b/src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib differ diff --git a/src/ispc/kernels/lanczos3.ispc b/src/ispc/kernels/lanczos3.ispc index 456bfc0..32fbe45 100644 --- a/src/ispc/kernels/lanczos3.ispc +++ b/src/ispc/kernels/lanczos3.ispc @@ -90,6 +90,34 @@ struct SampleWeights { uniform const WeightCollection* horizontal_weights; }; +uint8<1> sample_1_channel(const uniform uint8* varying pixel_ptr) { + const uniform uint8<1>* pixel_ptr1 = (const uniform uint8<1>*)(pixel_ptr); + varying uint8<1> dst = {0}; + dst = *pixel_ptr1; + return dst; +} + +void clean_and_write_1_channel(varying float<1> color, uniform uint8* varying pixel_ptr) { + pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f); +} + +uint8<2> sample_2_channels(const uniform uint8* varying pixel_ptr) { + // Memory reinterpretation to read all channels at once rather than one-by-one. + // While testing, this proved more performant than reading one-by-one. + const uniform uint8<2>* pixel_ptr2 = (const uniform uint8<2>*)(pixel_ptr); + varying uint8<2> dst = {0, 0}; + dst = *pixel_ptr2; + return dst; +} + +void clean_and_write_2_channels(varying float<2> color, uniform uint8* varying pixel_ptr) { + // The final color is a sum of numbers that are multiplied by the weights of their respective pixels. + // Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slight margin. + // This would cause an underflow/overflow, which we avoid with the clamps. + pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f); + pixel_ptr[1] = clamp(color[1], 0.0f, 255.0f); +} + uint8<3> sample_3_channels(const uniform uint8* varying pixel_ptr) { // Memory reinterpretation to read all channels at once rather than one-by-one. // While testing, this proved more performant than reading one-by-one. @@ -136,6 +164,8 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uint32 num_horizontal_weights = horizontal_weight_collection->weight_counts[x]; float* horizontal_weights = horizontal_weight_collection->values[x]; + float<1> color1 = {0.0f}; + float<2> color2 = {0.0f, 0.0f}; float<3> color3 = {0.0f, 0.0f, 0.0f}; float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f}; for (uint32 i = 0; i < num_horizontal_weights; i++) { @@ -143,7 +173,11 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uint32 src_x = src_width_start + i; uint64 src_read_address = (y * src_width + src_x) * num_channels; - if (num_channels == 3) + if (num_channels == 1) + color1 += sample_1_channel(src_data + src_read_address) * weight; + else if (num_channels == 2) + color2 += sample_2_channels(src_data + src_read_address) * weight; + else if (num_channels == 3) color3 += sample_3_channels(src_data + src_read_address) * weight; else color4 += sample_4_channels(src_data + src_read_address) * weight; @@ -151,7 +185,11 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uint64 scratch_write_address = (y * target_width + x) * num_channels; - if (num_channels == 3) + if (num_channels == 1) + clean_and_write_1_channel(color1, scratch_space + scratch_write_address); + else if (num_channels == 2) + clean_and_write_2_channels(color2, scratch_space + scratch_write_address); + else if (num_channels == 3) clean_and_write_3_channels(color3, scratch_space + scratch_write_address); else clean_and_write_4_channels(color4, scratch_space + scratch_write_address); @@ -163,6 +201,9 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uint32 src_height_start = vertical_weight_collection->starts[y]; uint32 num_vertical_weights = vertical_weight_collection->weight_counts[y]; float* vertical_weights = vertical_weight_collection->values[y]; + + float<1> color1 = {0.0f}; + float<2> color2 = {0.0f, 0.0f}; float<3> color3 = {0.0f, 0.0f, 0.0f}; float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f}; for (uint32 i = 0; i < num_vertical_weights; i++) { @@ -173,7 +214,11 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uniform uint8<3>* varying scratch_pixel_ptr = (uniform uint8<3>* varying)(scratch_space + scratch_read_address); uint8<3> scratch_color = *scratch_pixel_ptr; - if (num_channels == 3) + if (num_channels == 1) + color1 += sample_1_channel(scratch_space + scratch_read_address) * weight; + else if (num_channels == 2) + color2 += sample_2_channels(scratch_space + scratch_read_address) * weight; + else if (num_channels == 3) color3 += sample_3_channels(scratch_space + scratch_read_address) * weight; else color4 += sample_4_channels(scratch_space + scratch_read_address) * weight; @@ -181,7 +226,12 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr uint64 out_write_address = (y * target_width + x) * num_channels; assert(out_write_address < target_height * target_width * num_channels); - if (num_channels == 3) + + if (num_channels == 1) + clean_and_write_1_channel(color1, out_data + out_write_address); + else if (num_channels == 2) + clean_and_write_2_channels(color2, out_data + out_write_address); + else if (num_channels == 3) clean_and_write_3_channels(color3, out_data + out_write_address); else clean_and_write_4_channels(color4, out_data + out_write_address); diff --git a/src/lib.rs b/src/lib.rs index 28eb9a5..72474b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,8 @@ mod ispc; #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub enum Format { + R8, + Rg8, Rgb8, Srgb8, Rgba8, @@ -15,6 +17,8 @@ pub enum Format { impl Format { fn num_channels(&self) -> u8 { match self { + Self::R8 => 1, + Self::Rg8 => 2, Self::Rgb8 | Self::Srgb8 => 3, Self::Rgba8 | Self::Srgba8 => 4, }