From 9424cc4d4c5002c1da441f20052342b6b1b6ac7c Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Wed, 4 Nov 2020 13:49:18 -0800 Subject: Fix output argument type for convolutions Was out_t, which wasn't in the data types table. Switch to acc_t, the size of the accumulator, which is the output type for the convolutions. Also added some types into the pseudocode to clarify what types variables are. Signed-off-by: Eric Kunze Change-Id: I210220199c2d39b16938094022339286df040545 --- chapters/tensor_ops.adoc | 60 ++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 9bb6496..dda04db 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -34,13 +34,13 @@ None assert(axis >= 0 && axis < k && k <=4) left_shape = input_shape[0:axis-1] right_shape = input_shape[axis+1:k-1] -assert( concat(left_shape, right_shape) == output_shape ) +assert( concat(left_shape, right_shape) == output_shape ) for_each ( left_index in left_shape, right_index in right_shape ) in_t max_value = minimum_value int32 max_index = 0; for (i=0; i(input, input_shape, index) + in_t value = tensor_read(input, input_shape, index) if (value > max_value) { max_value = value; max_index=i; } } index = concat(left_index, right_index) @@ -98,14 +98,14 @@ for_each ( 0<=n(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad) - acc = apply_add<32>(acc, value) + in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad) + acc = apply_add(acc, value) if (0<=y(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad) - weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) + in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad) + weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) acc = apply_add(acc, value * weight) } acc = apply_add(acc, bias[oc]) @@ -198,7 +198,7 @@ Performs a 3D convolution over the given input tensor. |Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x] |Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x] -|Output|out_t*|output|[N,D,H,W,OC]|Output tensor +|Output|acc_t*|output|[N,D,H,W,OC]|Output tensor |=== *Quantization Parameters:* @@ -226,8 +226,8 @@ for_each (0<=n(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad) - weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp) + in_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad) + weight_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp) acc = apply_add(acc, value * weight) } acc = apply_add(acc, bias[oc]) @@ -242,7 +242,7 @@ for_each (0<=n(input, [N,H,W,C], [n,y,x,c], input_zp, pad) - weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp) + in_t value = tensor_read(input, [N,H,W,C], [n,y,x,c], input_zp, pad) + weight_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp) acc = apply_add(acc, value * weight) } acc = apply_add(acc, bias[(c*M) + m]) @@ -320,7 +320,7 @@ Performs a fully connected network. |Input|in_t*|input|[N,IC]|Input tensor |Attribute|weight_t*|weight|[OC,IC]|Weights |Attribute|acc_t*|bias|[OC]|Per output channel bias data. -|Output|out_t*|output|[N,OC]|Output tensor +|Output|acc_t*|output|[N,OC]|Output tensor |=== *Quantization Parameters:* @@ -341,8 +341,8 @@ assert(weight_t == aint8_t || weight_zp == 0) for_each (0<=n(input, [N,IC], [n,ic], input_zp) - weight = tensor_read(weight, [OC,IC], [oc,ic], weight_zp) + in_t value = tensor_read(input, [N,IC], [n,ic], input_zp) + weight_t weight = tensor_read(weight, [OC,IC], [oc,ic], weight_zp) acc = apply_add(acc, value * weight) } acc = apply_add(acc, bias[oc]) @@ -371,7 +371,7 @@ Performs a two dimensional matrix multiplication. This allows both inputs to be |Input|in_t*|A|[M,K]|Input tensor A |Input|in_t*|B|[K,N]|Input tensor B -|Output|out_t*|C|[M,N]|Output tensor C +|Output|acc_t*|C|[M,N]|Output tensor C |=== *Quantization Parameters:* @@ -391,8 +391,8 @@ assert(in_t==aint8_t || (A_zp==0 && B_zp==0)) // Zero point only for asymmetric for_each (0<=m(A, [M,K], [m,k], A_zp) - value2 = tensor_read(B, [K,N], [k,n], B_zp) + in_t value1 = tensor_read(A, [M,K], [m,k], A_zp) + in_t value2 = tensor_read(B, [K,N], [k,n], B_zp) acc = apply_add(acc, value1 * value2) } tensor_write(C, [M,N], [m,n], acc) @@ -434,13 +434,13 @@ None ---- pad=concat([0,0],pad,[0,0]) for_each ( 0<=n; + in_t acc = minimum_value; iy = oy * stride_y - pad_top ix = ox * stride_x - pad_left for_each ( 0<=ky(input, [N,IH,IW,IC], [n,y,x,c], pad) + in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,c], pad) acc = apply_max(acc, value) } tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc) @@ -469,11 +469,11 @@ Performs a 2D transposed convolution over the given tensor input, using the weig |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. -|Attribute|int*|outpad|[2]|[outpad_top, outpad_left] +|Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Attribute|int*|out_shape|[4]|[N,OH,OW,OC] -|Output|out_t*|output|[N,OH,OW,OC]|Output tensor +|Output|acc_t*|output|[N,OH,OW,OC]|Output tensor |=== *Quantization Parameters:* @@ -495,12 +495,12 @@ for_each (index in out_shape) { tensor_write(output, [N,OH,OW,OC], index, bias[index[3]]) } for_each (0<=n=0 && oy=0 && ox(output, [N,OH,OW,OC], [n,oy,ox,oc]) - value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp) - weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) + acc_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]) + in_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp) + weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) acc = apply_add(acc, value * weight) tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc) } -- cgit v1.2.1