From 0afe61f88ce3d2f445c5f01ae5567cb1b0b7f303 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Wed, 14 Feb 2024 16:33:31 -0800 Subject: Modify convolution operators to improve bias handling Accumulator size moves to an enumerated attribute, out_t for floating-point changes to be the size of the input. Bias for floating-point also becomes the bit width of the input type. Signed-off-by: Eric Kunze Change-Id: I7369417adbb1106ce34a1978e7f511a30272c318 --- pseudocode/operators/CONV2D.tosac | 17 ++++++++------- pseudocode/operators/CONV3D.tosac | 5 +++-- pseudocode/operators/DEPTHWISE_CONV2D.tosac | 5 +++-- pseudocode/operators/TRANSPOSE_CONV2D.tosac | 34 ++++++++++++++++------------- 4 files changed, 34 insertions(+), 27 deletions(-) (limited to 'pseudocode') diff --git a/pseudocode/operators/CONV2D.tosac b/pseudocode/operators/CONV2D.tosac index fe61747..0ae0e81 100644 --- a/pseudocode/operators/CONV2D.tosac +++ b/pseudocode/operators/CONV2D.tosac @@ -17,24 +17,25 @@ ERROR_IF(OW != idiv_check(IW - 1 + pad_left + pad_right - (KW - 1) * dilation_x, ERROR_IF(BC != OC && BC != 1); for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= oc < OC) { - out_t acc = 0; + acc_t acc = 0; index_t iy = oy * stride_y - pad_top; index_t ix = ox * stride_x - pad_left; for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = static_cast(tensor_read(input, + acc_t value = static_cast(tensor_read(input, [N,IH,IW,IC], [n,y,x,ic])); - out_t weight = static_cast(tensor_read(weight, + acc_t weight = static_cast(tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); - value = apply_sub_s(value, static_cast(input_zp)); - weight = apply_sub_s(weight, static_cast(weight_zp)); - acc = apply_add_s(acc, apply_mul_s(value, weight)); + value = apply_sub_s(value, static_cast(input_zp)); + weight = apply_sub_s(weight, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add_s(acc, bias[(BC == 1) ? 0 : oc]); - tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); + out_t out = static_cast(acc); + out = apply_add_s(out, bias[(BC == 1) ? 0 : oc]); + tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], out); } diff --git a/pseudocode/operators/CONV3D.tosac b/pseudocode/operators/CONV3D.tosac index 7568564..e53b7eb 100644 --- a/pseudocode/operators/CONV3D.tosac +++ b/pseudocode/operators/CONV3D.tosac @@ -38,6 +38,7 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW, 0 <= oc < OC) { acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add_s(acc, bias[(BC == 1) ? 0 : oc]); - tensor_write(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); + out_t out = static_cast(acc); + out = apply_add_s(out, bias[(BC == 1) ? 0 : oc]); + tensor_write(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], out); } diff --git a/pseudocode/operators/DEPTHWISE_CONV2D.tosac b/pseudocode/operators/DEPTHWISE_CONV2D.tosac index a473375..419d2eb 100644 --- a/pseudocode/operators/DEPTHWISE_CONV2D.tosac +++ b/pseudocode/operators/DEPTHWISE_CONV2D.tosac @@ -35,6 +35,7 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C, 0 <= m < M) { acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add_s(acc, bias[(BC == 1) ? 0 : (c * M) + m]); - tensor_write(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); + out_t out = static_cast(acc); + out = apply_add_s(out, bias[(BC == 1) ? 0 : (c * M) + m]); + tensor_write(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], out); } diff --git a/pseudocode/operators/TRANSPOSE_CONV2D.tosac b/pseudocode/operators/TRANSPOSE_CONV2D.tosac index ab61348..6713b30 100644 --- a/pseudocode/operators/TRANSPOSE_CONV2D.tosac +++ b/pseudocode/operators/TRANSPOSE_CONV2D.tosac @@ -16,20 +16,24 @@ ERROR_IF(OH != (IH - 1) * stride_y + out_pad_top + out_pad_bottom + KH); ERROR_IF(OW != (IW - 1) * stride_x + out_pad_left + out_pad_right + KW); ERROR_IF(BC != OC && BC != 1); -for_each(index in [N, OH, OW, OC]) { - tensor_write(output, [N,OH,OW,OC], index, bias[(BC == 1) ? 0 : index[3]]); -} -for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, - 0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) { - index_t oy = iy * stride_y + out_pad_top + ky; - index_t ox = ix * stride_x + out_pad_left + kx; - if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = static_cast(tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc])); - out_t value = static_cast(tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic])); - out_t weight = static_cast(tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); - value = apply_sub_s(value, static_cast(input_zp)); - weight = apply_sub_s(weight, static_cast(weight_zp)); - acc = apply_add_s(acc, apply_mul_s(value, weight)); - tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); +for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= dy < stride_y, 0 <= dx < stride_x, 0 <= oc < OC) { + acc_t acc = 0; + index_t oy = iy * stride_y + dy + out_pad_top; + index_t ox = ix * stride_x + dx + out_pad_left; + + for_each(0 <= sy * stride_y < KY - dy, 0 <= sx * stride_x < KX - dx, 0 <= ic < IC) { + index_t y = iy - sy; + index_t x = ix - sx; + index_t ky = dy + sy * stride_y; + index_t kx = dx + sx * stride_x; + acc_t value = static_cast(tensor_read(input, [N,IH,IW,IC], [n,y,x,ic])); + acc_t weight_value = static_cast(tensor_read(weight, [OH,KH,KW,IC], [oc,ky,kx,ic])); + value = apply_sub_s(value, static_cast(input_zp)); + weight_value = apply_sub_s(weight_value, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight_value)); } + + out_t out = static_cast(acc); + out = apply_add_s(out, bias[(BC == 1) ? 0 : oc]); + tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], out); } -- cgit v1.2.1