@@ -36,28 +36,26 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram {
3636 this . params = [ stride , zeroPad ] ;
3737 this . userCode = `
3838 void main() {
39- vec4 coords = getOutputCoords();
40- float wR = coords.x;
41- float wC = coords.y;
42- float d1 = coords.z;
43- float d2 = coords.w;
39+ ivec4 coords = getOutputCoords();
40+ int wR = coords.x;
41+ int wC = coords.y;
42+ int d1 = coords.z;
43+ int d2 = coords.w;
4444
4545 // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
4646 // ? = to be determined. : = across all values in that axis.
4747 float dotProd = 0.0;
48- for (int iyR = 0; iyR < ${ yNumRows } ; iyR++) {
49- float yR = float(iyR);
50- float xR = wR + yR * ${ stride } .0 - ${ zeroPad } .0;
48+ for (int yR = 0; yR < ${ yNumRows } ; yR++) {
49+ int xR = wR + yR * ${ stride } - ${ zeroPad } ;
5150
52- if (xR < 0.0 || xR >= ${ xNumRows } .0 ) {
51+ if (xR < 0 || xR >= ${ xNumRows } ) {
5352 continue;
5453 }
5554
56- for (int iyC = 0; iyC < ${ yNumCols } ; iyC++) {
57- float yC = float(iyC);
58- float xC = wC + yC * ${ stride } .0 - ${ zeroPad } .0;
55+ for (int yC = 0; yC < ${ yNumCols } ; yC++) {
56+ int xC = wC + yC * ${ stride } - ${ zeroPad } ;
5957
60- if (xC < 0.0 || xC >= ${ xNumCols } .0 ) {
58+ if (xC < 0 || xC >= ${ xNumCols } ) {
6159 continue;
6260 }
6361
@@ -94,42 +92,41 @@ export class Conv2DTransposeProgram implements GPGPUProgram {
9492 this . params = [ pad , fSize , origStride , hasBias ] ;
9593
9694 this . userCode = `
95+ const ivec2 pads = ivec2(${ pad } , ${ pad } );
96+
9797 void main() {
98- vec3 coords = getOutputCoords();
99- float yR = coords.x;
100- float yC = coords.y;
101- float d2 = coords.z;
98+ ivec3 coords = getOutputCoords();
99+ int d2 = coords.z;
102100
103- vec2 xRCCorner = vec2(yR, yC) - vec2( ${ pad } .0, ${ pad } .0) ;
104- float xRCorner = xRCCorner.x;
105- float xCCorner = xRCCorner.y;
101+ ivec2 xRCCorner = coords.xy - pads ;
102+ int xRCorner = xRCCorner.x;
103+ int xCCorner = xRCCorner.y;
106104
107105 // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2).
108106 // ? = to be determined. : = across all values in that axis.
109107 float dotProd = 0.0;
110- for (int iwR = 0; iwR < ${ fSize } ; iwR++) {
111- float wR = float(iwR);
112- float xR = (xRCorner + wR) / ${ origStride } .0;
108+ for (int wR = 0; wR < ${ fSize } ; wR++) {
109+ float xR = float(xRCorner + wR) / ${ origStride } .0;
113110
114111 if (xR < 0.0 || xR >= ${ xRows } .0 || fract(xR) > 0.0) {
115112 continue;
116113 }
114+ int ixR = int(xR);
117115
118- float wRPerm = ${ fSize } .0 - 1.0 - wR;
116+ int wRPerm = ${ fSize } - 1 - wR;
119117
120- for (int iwC = 0; iwC < ${ fSize } ; iwC++) {
121- float wC = float(iwC);
122- float xC = (xCCorner + wC) / ${ origStride } .0;
118+ for (int wC = 0; wC < ${ fSize } ; wC++) {
119+ float xC = float(xCCorner + wC) / ${ origStride } .0;
123120
124121 if (xC < 0.0 || xC >= ${ xCols } .0 || fract(xC) > 0.0) {
125122 continue;
126123 }
124+ int ixC = int(xC);
127125
128- float wCPerm = ${ fSize } .0 - 1.0 - wC;
126+ int wCPerm = ${ fSize } - 1 - wC;
129127
130- for (int id1 = 0; id1 < ${ origOutputDepth } ; id1++) {
131- float d1 = float(id1);
132- float xValue = getX(xR, xC, d1);
128+ for (int d1 = 0; d1 < ${ origOutputDepth } ; d1++) {
129+ float xValue = getX(ixR, ixC, d1);
133130 float wValue = getW(wRPerm, wCPerm, d2, d1);
134131 dotProd += xValue * wValue;
135132 }
@@ -153,13 +150,11 @@ export class Conv2DDerBiasProgram implements GPGPUProgram {
153150 this . outputShape = [ outputDepth ] ;
154151 this . userCode = `
155152 void main() {
156- float d2 = getOutputCoords();
153+ int d2 = getOutputCoords();
157154
158155 float derBias = 0.0;
159- for (int iyR = 0; iyR < ${ yNumRows } ; iyR++) {
160- float yR = float(iyR);
161- for (int iyC = 0; iyC < ${ yNumCols } ; iyC++) {
162- float yC = float(iyC);
156+ for (int yR = 0; yR < ${ yNumRows } ; yR++) {
157+ for (int yC = 0; yC < ${ yNumCols } ; yC++) {
163158 derBias += getDy(yR, yC, d2);
164159 }
165160 }
0 commit comments