From 1e9ba65f263a15f1f9cf9b9484047ea51237187a Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Wed, 17 Feb 2021 19:23:39 -0800 Subject: Consistency cleanup Attempt to get consistent across the pseudocode. Change the data types to all be intN_t instead of some cases of intN. Use float_t as the general floating point data type. Be consistent on use of the term "floating-point" Move general pseudocode helpers to their own section. Change-Id: Ie77666cd3ee438c71f39c62b9c424fe687b0bb51 Signed-off-by: Eric Kunze --- chapters/tensor_ops.adoc | 319 ++++++++++++++++++++++++----------------------- 1 file changed, 160 insertions(+), 159 deletions(-) (limited to 'chapters/tensor_ops.adoc') diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 571b9aa..341f51d 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -18,9 +18,9 @@ This returns the index with the largest value across the given axis of the input |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|input_shape|Input tensor dimension k \<=4 +|Input|in_t*|input|shape1|Input tensor dimension k \<=4 |Attribute|int|axis|-|Axis in range 0 to k-1 -|Output|out_t*|output|output_shape|Output tensor dimension k-1 +|Output|out_t*|output|shape|Output tensor dimension k-1 |=== *Quantization Parameters:* @@ -31,20 +31,20 @@ None [source,c] ---- -assert(axis >= 0 && axis < k && k <=4) -left_shape = input_shape[0:axis-1] -right_shape = input_shape[axis+1:k-1] -assert( concat(left_shape, right_shape) == output_shape ) -for_each ( left_index in left_shape, right_index in right_shape ) - in_t max_value = minimum_value - int32 max_index = 0; - for (i=0; i(input, input_shape, index) +assert(axis >= 0 && axis < k && k <=4); +left_shape = shape1[0:axis-1]; +right_shape = shape1[axis+1:k-1]; +assert(flatten(left_shape, right_shape) == shape); +for_each(left_index in left_shape, right_index in right_shape ) + in_t max_value = minimum_value; + int32_t max_index = 0; + for (i = 0; i < shape[axis]; i++) { + index = flatten(left_index, [i], right_index); + in_t value = tensor_read(input, shape1, index); if (value > max_value) { max_value = value; max_index=i; } } - index = concat(left_index, right_index) - tensor_write(output, output_shape, index, max_index) + index = flatten(left_index, right_index); + tensor_write(output, shape, index, max_index); } ---- @@ -53,9 +53,9 @@ for_each ( left_index in left_shape, right_index in right_shape ) |=== |Profile|Mode|in_t|out_t -|Any|signed 8|int8|int32 -|Any|signed 16|int16|int32 -|MI, MT|float|float|int32 +|Any|signed 8|int8_t|int32_t +|Any|signed 16|int16_t|int32_t +|MI, MT|floating-point|float_t|int32_t |=== ==== AVG_POOL2D @@ -67,11 +67,11 @@ This performs an average pooling over the given input tensor. A sliding window o |=== |Argument|Type|Name|Shape|Description -|Input|in_t *|input|[N,H,W,C]|Input tensor 4D -|Attribute|int *|kernel|[2]|[kernel_y, kernel_x] -|Attribute|int *|stride|[2]|[stride_y, stride_x] -|Attribute|int *|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] -|Output|out_t *|output|[N,H,W,C]|Output tensor 4D +|Input|in_t*|input|[N,H,W,C]|Input tensor 4D +|Attribute|int*|kernel|[2]|[kernel_y, kernel_x] +|Attribute|int*|stride|[2]|[stride_y, stride_x] +|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] +|Output|in_t*|output|[N,H,W,C]|Output tensor 4D |=== *Quantization Parameters:* @@ -80,46 +80,47 @@ This performs an average pooling over the given input tensor. A sliding window o |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point -|Attribute|out_t|output_zp|-|Output tensor zero point +|Attribute|in_t|output_zp|-|Output tensor zero point |=== *Operation Function:* [source,c] ---- -assert(in_t == int8_t || input_zp == 0) // Zero point only for int8 -assert(out_t == int8_t || output_zp == 0) // Zero point only for int8 -pad=concat([0,0],pad,[0,0]) -for_each ( 0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { +assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t +assert(in_t == int8_t || output_zp == 0); // Zero point only for int8_t +pad = flatten([0,0], pad, [0,0]); +for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { + in_t output_val; acc_t acc = 0; int count = 0; - iy = oy * stride_y - pad_top - ix = ox * stride_x - pad_left - for_each ( 0 <= ky < kernel_y, 0 <= kx < kernel_x) { - y = iy + ky - x = ix + kx - in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad) - acc = apply_add(acc, value) - if (0<=y(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad); + acc = apply_add(acc, value); + if (0 <= y < IH and 0 <= x < IW) count++ } if (is_float(out_t)) { - value = value / (float)count; + output_val = acc / (float)count; } else { - scale_t scale = reciprocal_scale(count) - acc = apply_scale_32(acc, scale.multiplier, scale.shift, false) - acc = apply_clip(acc + output_zp, output_min, output_max) + scale_t scale = reciprocal_scale(count); + acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); + output_val = apply_clip(acc + output_zp, minimum, maximum) } - tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc) + tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], output_val); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|acc_t|out_t +|Profile|Mode|in_t|acc_t -|Any|signed 8|int8|int32_t|int8 -|Any|signed 16|int16|int32_t|int16 -|MI, MT|float|float|float|float +|Any|signed 8|int8_t|int32_t +|Any|signed 16|int16_t|int32_t +|MI, MT|floating-point|float_t|float_t |=== ==== CONV2D @@ -153,22 +154,22 @@ Performs a 2D convolution over the given tensor input, using the weight tensor. [source,c] ---- -assert(in_t == int8_t || input_zp == 0) // Zero point only for int8 -assert(weight_t == int8_t || weight_zp == 0) -pad=concat([0,0], pad, [0,0]) -for_each (0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { - acc_t acc = 0 - iy = oy * stride_y - pad_top - ix = ox * stride_x - pad_left - for_each (0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { - y = iy + ky * dilation_y - x = ix + kx * dilation_x - in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad) - weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) - acc = apply_add(acc, value * weight) +assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t +assert(weight_t == int8_t || weight_zp == 0); +pad = flatten([0,0], pad, [0,0]); +for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { + acc_t acc = 0; + iy = oy * stride_y - pad_top; + ix = ox * stride_x - pad_left; + for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { + y = iy + ky * dilation_y; + x = ix + kx * dilation_x; + in_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad); + weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp); + acc = apply_add(acc, value * weight); } - acc = apply_add(acc, bias[oc]) - tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc) + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); } ---- @@ -177,10 +178,10 @@ for_each (0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { |=== |Profile|Mode|in_t|weight_t|acc_t -|Any|signed 8x8|int8|int8|int32 -|Any|signed 8x4|int8|int4|int32 -|Any|signed 16x8|int16|int8|int48 -|MI, MT|float|float|float|float +|Any|signed 8x8|int8_t|int8_t|int32_t +|Any|signed 8x4|int8_t|int4_t|int32_t +|Any|signed 16x8|int16_t|int8_t|int48_t +|MI, MT|floating-point|float_t|float_t|float_t |=== ==== CONV3D @@ -214,24 +215,24 @@ Performs a 3D convolution over the given input tensor. [source,c] ---- -assert(in_t == int8_t || input_zp == 0) // Zero point only for int8 -assert(weight_t == int8_t || weight_zp == 0) -pad=concat([0,0], pad, [0,0]) -for_each (0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { - acc_t acc = 0 - id = od * stride_d - pad_d0 - iy = oy * stride_y - pad_top - ix = ox * stride_x - pad_left - for_each (0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { - d = id + kd * dilation_d - y = iy + ky * dilation_y - x = ix + kx * dilation_x - in_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad) - weight_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp) - acc = apply_add(acc, value * weight) +assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t +assert(weight_t == int8_t || weight_zp == 0); +pad = flatten([0,0], pad, [0,0]); +for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { + acc_t acc = 0; + id = od * stride_d - pad_d0; + iy = oy * stride_y - pad_top; + ix = ox * stride_x - pad_left; + for_each(0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { + d = id + kd * dilation_d; + y = iy + ky * dilation_y; + x = ix + kx * dilation_x; + in_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad); + weight_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp); + acc = apply_add(acc, value * weight); } - acc = apply_add(acc, bias[oc]) - tensor_write(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc) + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc); } ---- @@ -240,10 +241,10 @@ for_each (0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { |=== |Profile|Mode|in_t|weight_t|acc_t -|Any|signed 8x8|int8|int8|int32 -|Any|signed 8x4|int8|int4|int32 -|Any|signed 16x8|int16|int8|int48 -|MI, MT|float|float|float|float +|Any|signed 8x8|int8_t|int8_t|int32_t +|Any|signed 8x4|int8_t|int4_t|int32_t +|Any|signed 16x8|int16_t|int8_t|int48_t +|MI, MT|floating-point|float_t|float_t|float_t |=== @@ -278,22 +279,22 @@ Performs 2D convolutions separately over each channel of the given tensor input, [source,c] ---- -assert(in_t == int8_t || input_zp == 0) // Zero point only for int8 -assert(weight_t == int8_t || weight_zp == 0) -pad=concat([0,0], pad, [0,0]) -for_each (0 <= n(input, [N,H,W,C], [n,y,x,c], input_zp, pad) - weight_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp) - acc = apply_add(acc, value * weight) +assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t +assert(weight_t == int8_t || weight_zp == 0); +pad = flatten([0,0], pad, [0,0]); +for_each(0 <= n(input, [N,H,W,C], [n,y,x,c], input_zp, pad); + weight_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp); + acc = apply_add(acc, value * weight); } - acc = apply_add(acc, bias[(c*M) + m]) - tensor_write(output, [N,H,W,C*M], [n,oy,ox,c*M+m], acc) + acc = apply_add(acc, bias[(c * M) + m]); + tensor_write(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -302,10 +303,10 @@ for_each (0 <= n(input, [N,IC], [n,ic], input_zp) - weight_t weight = tensor_read(weight, [OC,IC], [oc,ic], weight_zp) - acc = apply_add(acc, value * weight) +assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t +assert(weight_t == int8_t || weight_zp == 0); +for_each(0 <= n < N, 0 <= oc < OC) { + acc_t acc = 0; + for_each(0 <= ic < IC) { + in_t value = tensor_read(input, [N,IC], [n,ic], input_zp); + weight_t weight = tensor_read(weight, [OC,IC], [oc,ic], weight_zp); + acc = apply_add(acc, value * weight); } - acc = apply_add(acc, bias[oc]) - tensor_write(output, [N,OC], [n,oc], acc) + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,OC], [n,oc], acc); } ---- @@ -355,10 +356,10 @@ for_each (0 <= n < N, 0 <= oc < OC) { |=== |Profile|Mode|in_t|weight_t|acc_t -|Any|signed 8x8|int8|int8|int32 -|Any|signed 8x4|int8|int4|int32 -|Any|signed 16x8 |int16|int8|int48 -|MI, MT|float|float|float|float +|Any|signed 8x8|int8_t|int8_t|int32_t +|Any|signed 8x4|int8_t|int4_t|int32_t +|Any|signed 16x8 |int16_t|int8_t|int48_t +|MI, MT|floating-point|float_t|float_t|float_t |=== ==== MATMUL @@ -387,15 +388,15 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a [source,c] ---- -assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)) // Zero point only for int8 -for_each (0 <= n < N, 0 <= h < H, 0 <= w < W) { - acc_t acc = 0 - for_each (0 <= c < C) { - in_t value1 = tensor_read(A, [N,H,C], [n,h,c], A_zp) - in_t value2 = tensor_read(B, [N,C,W], [n,c,w], B_zp) - acc = apply_add(acc, value1 * value2) +assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_t +for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { + acc_t acc = 0; + for_each(0 <= c < C) { + in_t value1 = tensor_read(A, [N,H,C], [n,h,c], A_zp); + in_t value2 = tensor_read(B, [N,C,W], [n,c,w], B_zp); + acc = apply_add(acc, value1 * value2); } - tensor_write(output, [N,H,W], [n,h,w], acc) + tensor_write(output, [N,H,W], [n,h,w], acc); } ---- @@ -404,9 +405,9 @@ for_each (0 <= n < N, 0 <= h < H, 0 <= w < W) { |=== |Profile|Mode|in_t|acc_t -|Any|signed 8x8|int8|int32 -|Any|signed 16x16|int16|int48 -|MI, MT|float|float|float +|Any|signed 8x8|int8_t|int32_t +|Any|signed 16x16|int16_t|int48_t +|MI, MT|floating-point|float_t|float_t |=== ==== MAX_POOL2D @@ -421,7 +422,7 @@ This performs a max pooling over the given input tensor. A sliding window of siz |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] -|Output|out_t*|output|[N,H,W,C]|Output tensor 4D +|Output|in_t*|output|[N,H,W,C]|Output tensor 4D |=== *Quantization Parameters:* @@ -432,29 +433,29 @@ None [source,c] ---- -pad=concat([0,0], pad, [0,0]) -for_each (0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { +pad = flatten([0,0], pad, [0,0]); +for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t acc = minimum_value; - iy = oy * stride_y - pad_top - ix = ox * stride_x - pad_left - for_each ( 0<=ky(input, [N,IH,IW,IC], [n,y,x,c], pad) - acc = apply_max(acc, value) + iy = oy * stride_y - pad_top; + ix = ox * stride_x - pad_left; + for_each( 0<=ky(input, [N,IH,IW,IC], [n,y,x,c], pad); + acc = apply_max(acc, value); } - tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc) + tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|out_t +|Profile|Mode|in_t -|Any|signed 8|int8|int8 -|Any|16-bit|int16|int16 -|MI, MT|float|float|float +|Any|signed 8|int8_t +|Any|16-bit|int16_t +|MI, MT|floating-point|float_t |=== ==== TRANSPOSE_CONV2D @@ -488,21 +489,21 @@ Performs a 2D transposed convolution over the given tensor input, using the weig [source,c] ---- -assert(in_t == int8_t || input_zp == 0) // Zero point only allowed for int8 -assert(weight_t == int8_t || weight_zp == 0) -for_each (index in out_shape) { +assert(in_t == int8_t || input_zp == 0); // Zero point only allowed for int8_t +assert(weight_t == int8_t || weight_zp == 0); +for_each(index in out_shape) { tensor_write(output, [N,OH,OW,OC], index, bias[index[3]]) } -for_each (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, +for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, 0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) { - oy = iy * stride_y - out_pad_top + ky - ox = ix * stride_x - out_pad_left + kx - if (oy>=0 && oy=0 && ox(output, [N,OH,OW,OC], [n,oy,ox,oc]) - in_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp) - weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) - acc = apply_add(acc, value * weight) - tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc) + oy = iy * stride_y - out_pad_top + ky; + ox = ix * stride_x - out_pad_left + kx; + if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { + acc_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]); + in_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp); + weight_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp); + acc = apply_add(acc, value * weight); + tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } ---- @@ -512,8 +513,8 @@ for_each (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, |=== |Profile|Mode|in_t|weight_t|acc_t -|Any|signed 8x8|int8|int8|int32 -|Any|signed 8x4|int8|int4|int32 -|Any|signed 16x8|int16|int8|int48 -|MI, MT|float|float|float|float +|Any|signed 8x8|int8_t|int8_t|int32_t +|Any|signed 8x4|int8_t|int4_t|int32_t +|Any|signed 16x8|int16_t|int8_t|int48_t +|MI, MT|floating-point|float_t|float_t|float_t |=== -- cgit v1.2.1