Skip to content

Commit 693fcfb

Browse files
committed
fix rebase conflicts
1 parent 1d43583 commit 693fcfb

2 files changed

Lines changed: 66 additions & 13 deletions

File tree

examples/test.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use image::{RgbImage, RgbaImage};
1+
use image::{GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
22
use ispc_downsampler::{downsample_with_custom_scale, Format, Image};
33
use stb_image::image::{load, LoadResult};
44
use std::path::Path;
@@ -11,10 +11,18 @@ fn main() {
1111
LoadResult::ImageU8(img) => {
1212
assert!(!img.data.is_empty());
1313

14-
let src_fmt = if img.data.len() / (img.width * img.height) == 4 {
14+
let num_channels = img.data.len() / (img.width * img.height);
15+
16+
let src_fmt = if num_channels == 4 {
1517
Format::Rgba8
16-
} else {
18+
} else if num_channels == 3 {
1719
Format::Rgb8
20+
} else if num_channels == 2 {
21+
Format::Rg8
22+
} else if num_channels == 1 {
23+
Format::R8
24+
} else {
25+
panic!("We expect a number of channels in the [1, 4] range");
1826
};
1927

2028
println!("Loaded image!");
@@ -32,6 +40,22 @@ fn main() {
3240

3341
std::fs::create_dir_all("example_outputs").unwrap();
3442
match src_fmt {
43+
Format::R8 => {
44+
let save_image =
45+
GrayImage::from_vec(target_width, target_height, downsampled_pixels)
46+
.unwrap();
47+
save_image
48+
.save("example_outputs/square_test_result.png")
49+
.unwrap()
50+
}
51+
Format::Rg8 => {
52+
let save_image =
53+
GrayAlphaImage::from_vec(target_width, target_height, downsampled_pixels)
54+
.unwrap();
55+
save_image
56+
.save("example_outputs/square_test_result.png")
57+
.unwrap()
58+
}
3559
Format::Rgba8 | Format::Srgba8 => {
3660
let save_image =
3761
RgbaImage::from_vec(target_width, target_height, downsampled_pixels)

src/ispc/kernels/lanczos3.ispc

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,32 @@ struct SampleWeights {
9090
uniform const WeightCollection* horizontal_weights;
9191
};
9292

93-
uint8<3> sample_3_channels(const uniform uint8* varying pixel_ptr) {
93+
uint8<1> sample_1_channel(const uniform uint8* varying pixel_ptr) {
94+
const uniform uint8<1>* pixel_ptr1 = (const uniform uint8<1>*)(pixel_ptr);
95+
varying uint8<1> dst = {0};
96+
dst = *pixel_ptr1;
97+
return dst;
98+
}
99+
100+
void clean_and_write_1_channel(varying float<1> color, uniform uint8* varying pixel_ptr) {
101+
pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f);
102+
}
103+
104+
uint8<2> sample_2_channels(const uniform uint8* varying pixel_ptr) {
94105
// Memory reinterpretation to read all channels at once rather than one-by-one.
95106
// While testing, this proved more performant than reading one-by-one.
96-
const uniform uint8<3>* pixel_ptr3 = (const uniform uint8<3>*)(pixel_ptr);
97-
varying uint8<3> dst = {0, 0, 0};
98-
dst = *pixel_ptr3;
107+
const uniform uint8<2>* pixel_ptr2 = (const uniform uint8<2>*)(pixel_ptr);
108+
varying uint8<2> dst = {0, 0};
109+
dst = *pixel_ptr2;
99110
return dst;
100111
}
101112

102-
void clean_and_write_3_channels(varying float<3> color, uniform uint8* varying pixel_ptr) {
113+
void clean_and_write_2_channels(varying float<2> color, uniform uint8* varying pixel_ptr) {
103114
// The final color is a sum of numbers that are multiplied by the weights of their respective pixels.
104115
// Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slight margin.
105116
// This would cause an underflow/overflow, which we avoid with the clamps.
106117
pixel_ptr[0] = clamp(color[0], 0.0f, 255.0f);
107118
pixel_ptr[1] = clamp(color[1], 0.0f, 255.0f);
108-
pixel_ptr[2] = clamp(color[2], 0.0f, 255.0f);
109119
}
110120

111121
uint8<4> sample_4_channels(const uniform uint8* varying pixel_ptr) {
@@ -136,22 +146,32 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
136146
uint32 num_horizontal_weights = horizontal_weight_collection->weight_counts[x];
137147
float* horizontal_weights = horizontal_weight_collection->values[x];
138148

149+
float<1> color1 = {0.0f};
150+
float<2> color2 = {0.0f, 0.0f};
139151
float<3> color3 = {0.0f, 0.0f, 0.0f};
140152
float<4> color4 = {0.0f, 0.0f, 0.0f, 0.0f};
141153
for (uint32 i = 0; i < num_horizontal_weights; i++) {
142154
float weight = horizontal_weights[i];
143155
uint32 src_x = src_width_start + i;
144156
uint64 src_read_address = (y * src_width + src_x) * num_channels;
145157

146-
if (num_channels == 3)
158+
if (num_channels == 1)
159+
color1 += sample_1_channel(src_data + src_read_address) * weight;
160+
else if (num_channels == 2)
161+
color2 += sample_2_channels(src_data + src_read_address) * weight;
162+
else if (num_channels == 3)
147163
color3 += sample_3_channels(src_data + src_read_address) * weight;
148164
else
149165
color4 += sample_4_channels(src_data + src_read_address) * weight;
150166
}
151167

152168
uint64 scratch_write_address = (y * target_width + x) * num_channels;
153169

154-
if (num_channels == 3)
170+
if (num_channels == 1)
171+
clean_and_write_1_channel(color1, scratch_space + scratch_write_address);
172+
else if (num_channels == 2)
173+
clean_and_write_3_channels(color2, scratch_space + scratch_write_address);
174+
else if (num_channels == 3)
155175
clean_and_write_3_channels(color3, scratch_space + scratch_write_address);
156176
else
157177
clean_and_write_4_channels(color4, scratch_space + scratch_write_address);
@@ -173,15 +193,24 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
173193
uniform uint8<3>* varying scratch_pixel_ptr = (uniform uint8<3>* varying)(scratch_space + scratch_read_address);
174194
uint8<3> scratch_color = *scratch_pixel_ptr;
175195

176-
if (num_channels == 3)
196+
if (num_channels == 1)
197+
color1 += sample_1_channel(scratch_space + scratch_read_address) * weight;
198+
else if (num_channels == 2)
199+
color2 += sample_2_channels(scratch_space + scratch_read_address) * weight;
200+
else if (num_channels == 3)
177201
color3 += sample_3_channels(scratch_space + scratch_read_address) * weight;
178202
else
179203
color4 += sample_4_channels(scratch_space + scratch_read_address) * weight;
180204
}
181205

182206
uint64 out_write_address = (y * target_width + x) * num_channels;
183207
assert(out_write_address < target_height * target_width * num_channels);
184-
if (num_channels == 3)
208+
209+
if (num_channels == 1)
210+
clean_and_write_1_channel(color1, out_data + out_write_address);
211+
else if (num_channels == 2)
212+
clean_and_write_2_channels(color2, out_data + out_write_address);
213+
else if (num_channels == 3)
185214
clean_and_write_3_channels(color3, out_data + out_write_address);
186215
else
187216
clean_and_write_4_channels(color4, out_data + out_write_address);

0 commit comments

Comments
 (0)