Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The crate comes with the bindings and precompiled libraries for Windows, Linux a

## Usage

Create a new `ispc_downsampler::Image` from a slice of the texture's pixels, the dimensions of the source image, and the format it is in. Currently only works with RGB8 and RGBA8 textures.
Create a new `ispc_downsampler::Image` from a slice of the texture's pixels, the dimensions of the source image, and the format it is in.
Call `ispc_downsampler::downsample` with the source image, and the target dimension for downsampled image. The function will return a `Vec<u8>` with the pixels of the downsampled image in the same format as the source image.

#### Example
Expand Down
30 changes: 27 additions & 3 deletions examples/test.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use image::{RgbImage, RgbaImage};
use image::{GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
use ispc_downsampler::{downsample_with_custom_scale, Format, Image};
use stb_image::image::{load, LoadResult};
use std::path::Path;
Expand All @@ -11,10 +11,18 @@ fn main() {
LoadResult::ImageU8(img) => {
assert!(!img.data.is_empty());

let src_fmt = if img.data.len() / (img.width * img.height) == 4 {
let num_channels = img.data.len() / (img.width * img.height);

let src_fmt = if num_channels == 4 {
Format::Rgba8
} else {
} else if num_channels == 3 {
Format::Rgb8
} else if num_channels == 2 {
Format::Rg8
} else if num_channels == 1 {
Format::R8
} else {
panic!("We expect a number of channels in the [1, 4] range");
};

println!("Loaded image!");
Expand All @@ -32,6 +40,22 @@ fn main() {

std::fs::create_dir_all("example_outputs").unwrap();
match src_fmt {
Format::R8 => {
let save_image =
GrayImage::from_vec(target_width, target_height, downsampled_pixels)
.unwrap();
save_image
.save("example_outputs/square_test_result.png")
.unwrap()
}
Format::Rg8 => {
let save_image =
GrayAlphaImage::from_vec(target_width, target_height, downsampled_pixels)
.unwrap();
save_image
.save("example_outputs/square_test_result.png")
.unwrap()
}
Format::Rgba8 | Format::Srgba8 => {
let save_image =
RgbaImage::from_vec(target_width, target_height, downsampled_pixels)
Expand Down
Binary file modified src/ispc/downsample_ispcx86_64-pc-windows-msvc.lib
Binary file not shown.
58 changes: 54 additions & 4 deletions src/ispc/kernels/lanczos3.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,34 @@ struct SampleWeights {
uniform const WeightCollection* horizontal_weights;
};

uint8<1> sample_1_channel(const uniform uint8* varying pixel_ptr) {
const uniform uint8<1>* pixel_ptr1 = (const uniform uint8<1>*)(pixel_ptr);
varying uint8<1> dst = {0};
dst = *pixel_ptr1;
return dst;
}

void clean_and_write_1_channel(varying float<1> color, uniform uint8* varying pixel_ptr) {
pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f);
}

uint8<2> sample_2_channels(const uniform uint8* varying pixel_ptr) {
// Memory reinterpretation to read all channels at once rather than one-by-one.
// While testing, this proved more performant than reading one-by-one.
const uniform uint8<2>* pixel_ptr2 = (const uniform uint8<2>*)(pixel_ptr);
varying uint8<2> dst = {0, 0};
dst = *pixel_ptr2;
return dst;
}

void clean_and_write_2_channels(varying float<2> color, uniform uint8* varying pixel_ptr) {
// The final color is a sum of numbers that are multiplied by the weights of their respective pixels.
// Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slight margin.
// This would cause an underflow/overflow, which we avoid with the clamps.
pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f);
pixel_ptr[1] = clamp(color[1], 0.0f, 255.0f);
}

uint8<3> sample_3_channels(const uniform uint8* varying pixel_ptr) {
// Memory reinterpretation to read all channels at once rather than one-by-one.
// While testing, this proved more performant than reading one-by-one.
Expand Down Expand Up @@ -136,22 +164,32 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
uint32 num_horizontal_weights = horizontal_weight_collection->weight_counts[x];
float* horizontal_weights = horizontal_weight_collection->values[x];

float<1> color1 = {0.0f};
float<2> color2 = {0.0f, 0.0f};
float<3> color3 = {0.0f, 0.0f, 0.0f};
float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f};
for (uint32 i = 0; i < num_horizontal_weights; i++) {
float weight = horizontal_weights[i];
uint32 src_x = src_width_start + i;
uint64 src_read_address = (y * src_width + src_x) * num_channels;

if (num_channels == 3)
if (num_channels == 1)
color1 += sample_1_channel(src_data + src_read_address) * weight;
else if (num_channels == 2)
color2 += sample_2_channels(src_data + src_read_address) * weight;
else if (num_channels == 3)
color3 += sample_3_channels(src_data + src_read_address) * weight;
else
color4 += sample_4_channels(src_data + src_read_address) * weight;
}

uint64 scratch_write_address = (y * target_width + x) * num_channels;

if (num_channels == 3)
if (num_channels == 1)
clean_and_write_1_channel(color1, scratch_space + scratch_write_address);
else if (num_channels == 2)
clean_and_write_2_channels(color2, scratch_space + scratch_write_address);
else if (num_channels == 3)
clean_and_write_3_channels(color3, scratch_space + scratch_write_address);
else
clean_and_write_4_channels(color4, scratch_space + scratch_write_address);
Expand All @@ -163,6 +201,9 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
uint32 src_height_start = vertical_weight_collection->starts[y];
uint32 num_vertical_weights = vertical_weight_collection->weight_counts[y];
float* vertical_weights = vertical_weight_collection->values[y];

float<1> color1 = {0.0f};
float<2> color2 = {0.0f, 0.0f};
float<3> color3 = {0.0f, 0.0f, 0.0f};
float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f};
for (uint32 i = 0; i < num_vertical_weights; i++) {
Expand All @@ -173,15 +214,24 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
uniform uint8<3>* varying scratch_pixel_ptr = (uniform uint8<3>* varying)(scratch_space + scratch_read_address);
uint8<3> scratch_color = *scratch_pixel_ptr;

if (num_channels == 3)
if (num_channels == 1)
color1 += sample_1_channel(scratch_space + scratch_read_address) * weight;
else if (num_channels == 2)
color2 += sample_2_channels(scratch_space + scratch_read_address) * weight;
else if (num_channels == 3)
color3 += sample_3_channels(scratch_space + scratch_read_address) * weight;
else
color4 += sample_4_channels(scratch_space + scratch_read_address) * weight;
}

uint64 out_write_address = (y * target_width + x) * num_channels;
assert(out_write_address < target_height * target_width * num_channels);
if (num_channels == 3)

if (num_channels == 1)
clean_and_write_1_channel(color1, out_data + out_write_address);
else if (num_channels == 2)
clean_and_write_2_channels(color2, out_data + out_write_address);
else if (num_channels == 3)
clean_and_write_3_channels(color3, out_data + out_write_address);
else
clean_and_write_4_channels(color4, out_data + out_write_address);
Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ mod ispc;

#[derive(Clone, Copy, Eq, PartialEq, Debug)]
pub enum Format {
R8,
Rg8,
Rgb8,
Srgb8,
Rgba8,
Expand All @@ -15,6 +17,8 @@ pub enum Format {
impl Format {
fn num_channels(&self) -> u8 {
match self {
Self::R8 => 1,
Self::Rg8 => 2,
Self::Rgb8 | Self::Srgb8 => 3,
Self::Rgba8 | Self::Srgba8 => 4,
}
Expand Down