@@ -90,22 +90,32 @@ struct SampleWeights {
9090 uniform const WeightCollection* horizontal_weights;
9191};
9292
93- uint8 < 3 > sample_3_channels (const uniform uint8 * varying pixel_ptr) {
93+ uint8 < 1 > sample_1_channel (const uniform uint8 * varying pixel_ptr) {
94+ const uniform uint8 < 1 > * pixel_ptr1 = (const uniform uint8 < 1 > * )(pixel_ptr);
95+ varying uint8 < 1 > dst = {0 };
96+ dst = * pixel_ptr1;
97+ return dst;
98+ }
99+
100+ void clean_and_write_1_channel (varying float < 1 > color, uniform uint8 * varying pixel_ptr) {
101+ pixel_ptr[0 ] = clamp (color[0 ], 0.0f , 255.0f );
102+ }
103+
104+ uint8 < 2 > sample_2_channels (const uniform uint8 * varying pixel_ptr) {
94105 // Memory reinterpretation to read all channels at once rather than one-by-one.
95106 // While testing, this proved more performant than reading one-by-one.
96- const uniform uint8 < 3 > * pixel_ptr3 = (const uniform uint8 < 3 > * )(pixel_ptr);
97- varying uint8 < 3 > dst = {0 , 0 , 0 };
98- dst = * pixel_ptr3 ;
107+ const uniform uint8 < 2 > * pixel_ptr2 = (const uniform uint8 < 2 > * )(pixel_ptr);
108+ varying uint8 < 2 > dst = {0 , 0 };
109+ dst = * pixel_ptr2 ;
99110 return dst;
100111}
101112
102- void clean_and_write_3_channels (varying float < 3 > color, uniform uint8 * varying pixel_ptr) {
113+ void clean_and_write_2_channels (varying float < 2 > color, uniform uint8 * varying pixel_ptr) {
103114 // The final color is a sum of numbers that are multiplied by the weights of their respective pixels.
104115 // Because of their numbers, floating point precision leads to the final color being potentially outside of the 0-255 range by a slight margin.
105116 // This would cause an underflow/overflow, which we avoid with the clamps.
106117 pixel_ptr[0 ] = clamp (color[0 ], 0.0f , 255.0f );
107118 pixel_ptr[1 ] = clamp (color[1 ], 0.0f , 255.0f );
108- pixel_ptr[2 ] = clamp (color[2 ], 0.0f , 255.0f );
109119}
110120
111121uint8 < 4 > sample_4_channels (const uniform uint8 * varying pixel_ptr) {
@@ -136,22 +146,32 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
136146 uint32 num_horizontal_weights = horizontal_weight_collection-> weight_counts[x];
137147 float * horizontal_weights = horizontal_weight_collection-> values[x];
138148
149+ float < 1 > color1 = {0.0f };
150+ float < 2 > color2 = {0.0f , 0.0f };
139151 float < 3 > color3 = {0.0f , 0.0f , 0.0f };
140152 float < 4 > color4 = {0.0f , 0.0f , 0.0f , 0.0f };
141153 for (uint32 i = 0 ; i < num_horizontal_weights; i++ ) {
142154 float weight = horizontal_weights[i];
143155 uint32 src_x = src_width_start + i;
144156 uint64 src_read_address = (y * src_width + src_x) * num_channels;
145157
146- if (num_channels == 3 )
158+ if (num_channels == 1 )
159+ color1 += sample_1_channel (src_data + src_read_address) * weight;
160+ else if (num_channels == 2 )
161+ color2 += sample_2_channels (src_data + src_read_address) * weight;
162+ else if (num_channels == 3 )
147163 color3 += sample_3_channels (src_data + src_read_address) * weight;
148164 else
149165 color4 += sample_4_channels (src_data + src_read_address) * weight;
150166 }
151167
152168 uint64 scratch_write_address = (y * target_width + x) * num_channels;
153169
154- if (num_channels == 3 )
170+ if (num_channels == 1 )
171+ clean_and_write_1_channel (color1, scratch_space + scratch_write_address);
172+ else if (num_channels == 2 )
173+ clean_and_write_3_channels (color2, scratch_space + scratch_write_address);
174+ else if (num_channels == 3 )
155175 clean_and_write_3_channels (color3, scratch_space + scratch_write_address);
156176 else
157177 clean_and_write_4_channels (color4, scratch_space + scratch_write_address);
@@ -173,15 +193,24 @@ void resample_with_cached_weights(uniform uint32 num_channels, uniform uint32 sr
173193 uniform uint8 < 3 > * varying scratch_pixel_ptr = (uniform uint8 < 3 > * varying )(scratch_space + scratch_read_address);
174194 uint8 < 3 > scratch_color = * scratch_pixel_ptr;
175195
176- if (num_channels == 3 )
196+ if (num_channels == 1 )
197+ color1 += sample_1_channel (scratch_space + scratch_read_address) * weight;
198+ else if (num_channels == 2 )
199+ color2 += sample_2_channels (scratch_space + scratch_read_address) * weight;
200+ else if (num_channels == 3 )
177201 color3 += sample_3_channels (scratch_space + scratch_read_address) * weight;
178202 else
179203 color4 += sample_4_channels (scratch_space + scratch_read_address) * weight;
180204 }
181205
182206 uint64 out_write_address = (y * target_width + x) * num_channels;
183207 assert (out_write_address < target_height * target_width * num_channels);
184- if (num_channels == 3 )
208+
209+ if (num_channels == 1 )
210+ clean_and_write_1_channel (color1, out_data + out_write_address);
211+ else if (num_channels == 2 )
212+ clean_and_write_2_channels (color2, out_data + out_write_address);
213+ else if (num_channels == 3 )
185214 clean_and_write_3_channels (color3, out_data + out_write_address);
186215 else
187216 clean_and_write_4_channels (color4, out_data + out_write_address);
0 commit comments