From 82f19e2ad25bcbdde8e7f8b6bd6a6064a207fe36 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Mon, 25 Oct 2021 16:13:22 -0700 Subject: Readability fixes for pseudocode Avoid use of acc for variables when they are not convolution accumulators. Use argument types appropriately. Add missing pseudocode for some MI operators Change-Id: I9113f9228dbcafb85206bcc39310e9599cb12c08 --- chapters/comparison.adoc | 26 +++++++------- chapters/ewise_binary.adoc | 88 ++++++++++++++++++++++++++------------------- chapters/ewise_ternary.adoc | 16 ++++----- chapters/ewise_unary.adoc | 79 ++++++++++++++++++++++++++++++++++++---- chapters/image.adoc | 4 +-- chapters/pseudocode.adoc | 74 ++++++++++++++++++++++++-------------- chapters/reduction.adoc | 76 ++++++++++++++++++++------------------- chapters/tensor_ops.adoc | 8 ++--- 8 files changed, 238 insertions(+), 133 deletions(-) diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc index 43f0787..ad574fb 100644 --- a/chapters/comparison.adoc +++ b/chapters/comparison.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020 ARM Limited +// (C) COPYRIGHT 2020-2021 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -30,10 +30,10 @@ Elementwise comparison operation for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - int32_t value1 = tensor_read(input1, shape1, index1); - int32_t value2 = tensor_read(input2, shape2, index2); - bool_t acc = (value1 == value2) ? True : False; - tensor_write(output, shape, index, acc); + in_t value1 = tensor_read(input1, shape1, index1); + in_t value2 = tensor_read(input2, shape2, index2); + out_t result = (value1 == value2) ? True : False; + tensor_write(output, shape, index, result); } ---- @@ -67,10 +67,10 @@ Elementwise greater than comparison operation for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - int32_t value1 = tensor_read(input1, shape1, index1); - int32_t value2 = tensor_read(input2, shape2, index2); - bool_t acc = (value1 > value2) ? True : False; - tensor_write(output, shape, index, acc); + in_t value1 = tensor_read(input1, shape1, index1); + in_t value2 = tensor_read(input2, shape2, index2); + out_t result = (value1 > value2) ? True : False; + tensor_write(output, shape, index, result); } ---- @@ -103,10 +103,10 @@ Elementwise comparison operation for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - int32_t value1 = tensor_read(input1, shape1, index1); - int32_t value2 = tensor_read(input2, shape2, index2); - bool_t acc = (value1 >= value2) ? True : False; - tensor_write(output, shape, index, acc); + in_t value1 = tensor_read(input1, shape1, index1); + in_t value2 = tensor_read(input2, shape2, index2); + out_t result = (value1 >= value2) ? True : False; + tensor_write(output, shape, index, result); } ---- diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index f44f7f5..4173aab 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -33,8 +33,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = apply_add(value1, value2); - tensor_write(output, shape, index, acc); + in_t result = apply_add(value1, value2); + tensor_write(output, shape, index, result); ---- *Supported Data Types:* @@ -77,12 +77,12 @@ for_each(index in shape) { (in_t == int16_t && 0 <= value2 && value2 <= 15) || (in_t == int8_t && 0 <= value2 && value2 <= 7)); - in_t acc = value1 >> value2; + in_t result = value1 >> value2; if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { - acc = acc + 1; + result = result + 1; } - acc = apply_clip(acc, minimum, maximum) - tensor_write(output, shape, index, acc) + result = apply_clip(result, minimum, maximum); + tensor_write(output, shape, index, result); } ---- @@ -120,8 +120,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 & value2; - tensor_write(output, shape, index, acc); + in_t result = value1 & value2; + tensor_write(output, shape, index, result); } ---- @@ -159,8 +159,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 | value2; - tensor_write(output, shape, index, acc); + in_t result = value1 | value2; + tensor_write(output, shape, index, result); } ---- @@ -198,8 +198,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 ^ value2; - tensor_write(output, shape, index, acc); + in_t result = value1 ^ value2; + tensor_write(output, shape, index, result); } ---- @@ -244,8 +244,8 @@ for_each(index in shape) { // This catches the case where we divide minimum by -1 // which is not representable in two's complement REQUIRE((int64_t)value1 / value2 <= maximum); - in_t acc = value1 / value2; - tensor_write(output, shape, index, acc); + in_t result = value1 / value2; + tensor_write(output, shape, index, result); } ---- @@ -280,8 +280,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 && value2; - tensor_write(output, shape, index, acc); + in_t result = value1 && value2; + tensor_write(output, shape, index, result); } ---- @@ -318,8 +318,8 @@ for_each(index in shape) { in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); REQUIRE(0 <= value2 && value2 <= 31); - in_t acc = value1 << value2; - tensor_write(output, shape, index, acc); + in_t result = value1 << value2; + tensor_write(output, shape, index, result); } ---- @@ -358,8 +358,8 @@ for_each(index in shape) { in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); REQUIRE(0 <= value2 && value2 <= 31); - in_t acc = (unsigned in_t)value1 >> value2; - tensor_write(output, shape, index, acc); + in_t result = (in_t)((unsigned in_t)value1 >> value2); + tensor_write(output, shape, index, result); } ---- @@ -397,8 +397,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 || value2; - tensor_write(output, shape, index, acc); + in_t result = value1 || value2; + tensor_write(output, shape, index, result); } ---- @@ -434,8 +434,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = value1 != value2; - tensor_write(output, shape, index, acc); + in_t result = value1 != value2; + tensor_write(output, shape, index, result); } ---- @@ -471,8 +471,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = apply_max(value1, value2); - tensor_write(output, shape, index, acc); + in_t result = apply_max(value1, value2); + tensor_write(output, shape, index, result); } ---- @@ -509,8 +509,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = apply_min(value1, value2); - tensor_write(output, shape, index, acc); + in_t result = apply_min(value1, value2); + tensor_write(output, shape, index, result); } ---- @@ -548,12 +548,13 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); + out_t result; if (in_t == int32_t && shift > 0) { - out_t acc = apply_scale_32(value1, value2, shift); + result = apply_scale_32(value1, value2, shift); } else { - out_t acc = value1 * value2; // low 32-bits of result for int32_t + result = value1 * value2; // low 32-bits of result for int32_t } - tensor_write(output, shape, index, acc); + tensor_write(output, shape, index, result); } ---- @@ -582,6 +583,20 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + index1 = apply_broadcast(shape, shape1, index); + index2 = apply_broadcast(shape, shape2, index); + in_t value1 = tensor_read(input1, shape1, index1); + in_t value2 = tensor_read(input2, shape2, index2); + in_t result = apply_pow(value1, value2); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -614,8 +629,8 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - in_t acc = apply_sub(value1, value2); - tensor_write(output, shape, index, acc); + in_t result = apply_sub(value1, value2); + tensor_write(output, shape, index, result); } ---- @@ -661,13 +676,14 @@ An int16_t to int16_t table lookup can be constructed in TOSA as follows: REQUIRE(length(table) == TABLE_SIZE); for_each(index in shape) { in_t value = tensor_read(input, shape, index); + out_t result; if (in_t == int8_t) { // value is a signed int, convert to a 0 based index - out_t acc = table[value + 128]; + result = table[value + 128]; } else { - out_t acc = apply_lookup(table, value); + result = apply_lookup(table, value); } - tensor_write(output, shape, index, acc); + tensor_write(output, shape, index, result); } ---- diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc index ecf40d1..c6babbc 100644 --- a/chapters/ewise_ternary.adoc +++ b/chapters/ewise_ternary.adoc @@ -18,7 +18,7 @@ Elementwise select of the output based on a condition. |=== |Argument|Type|Name|Shape|Description -|Input|bool_t|input1|shape1|Input selector tensor +|Input|cmp_t|input1|shape1|Input selector tensor |Input|in_t*|input2|shape2|Input value tensor if input1 is True |Input|in_t*|input3|shape3|Input value tensor if input1 is False |Output|in_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary @@ -32,22 +32,22 @@ for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); index3 = apply_broadcast(shape, shpae3, index); - bool_t value1 = tensor_read(input1, shape1, index1); + cmp_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); in_t value3 = tensor_read(input3, shape3, index3); - in_t acc = 0; - if (value1 == True) { - acc = value2; + in_t result; + if (value1) { + result = value2; } else { - acc = value3; + result = value3; } - tensor_write(output, shape, index, acc); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|bool_t|in_t +|Profile|Mode|cmp_t|in_t |Any|Boolean|bool_t|bool_t |Any|signed 8|bool_t|int8_t diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index e2b754a..633b8ac 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -62,8 +62,8 @@ Elementwise bitwise NOT of input tensor. ---- for_each(index in shape) { in_t value1 = tensor_read(input1, shape, index); - in_t acc = ~value1; - tensor_write(output, shape, index, acc); + in_t result = ~value1; + tensor_write(output, shape, index, result); } ---- @@ -90,6 +90,17 @@ Elementwise ceiling operation |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape, index); + in_t result = apply_ceil(value1); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -116,10 +127,9 @@ Elementwise count leading zeros operation [source,c++] ---- for_each(index in shape) { - in_t acc = 0; in_t value1 = tensor_read(input1, shape, index); - acc = count_leading_zeros(value1); - tensor_write(output, shape, index, acc); + in_t result = count_leading_zeros(value1); + tensor_write(output, shape, index, result); } ---- @@ -143,6 +153,17 @@ Elementwise e to the x operation |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape, index); + in_t result = apply_exp(value1); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -164,6 +185,17 @@ Elementwise floor operation |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape, index); + in_t result = apply_floor(value1); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -185,6 +217,17 @@ Elementwise natural logarithm operation |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape, index); + in_t result = apply_log(value1); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -212,8 +255,8 @@ Elementwise logical NOT of input. ---- for_each(index in shape) { in_t value1 = tensor_read(input1, shape1, index); - in_t acc = !value1; - tensor_write(output, shape, index, acc); + in_t result = !value1; + tensor_write(output, shape, index, result); } ---- @@ -279,6 +322,17 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape1, index); + in_t result = 1.0 / value1; + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== @@ -300,6 +354,17 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho |Output|in_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Operation Function:* + +[source,c++] +---- +for_each(index in shape) { + in_t value1 = tensor_read(input1, shape1, index); + in_t result = 1.0 / apply_sqrt(value1); + tensor_write(output, shape, index, result); +} +---- + *Supported Data Types:* |=== diff --git a/chapters/image.adoc b/chapters/image.adoc index 2491ea5..7476d8a 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -74,8 +74,8 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { y = oy * stride_y + offset_y; x = ox * stride_x + offset_x; if (resize_t == float_t) { - iy = (int)floor(y); dy = y - (float_t)iy; - ix = (int)floor(x); dx = x - (float_t)ix; + iy = (int)apply_floor(y); dy = y - (float_t)iy; + ix = (int)apply_floor(x); dx = x - (float_t)ix; } else { iy = y >> shift; dy = y - (iy<> shift; dx = x - (ix<(acc_t a, acc_t b) { - if (acc_t == float_t) return a + b; +in_t apply_add(in_t a, in_t b) { + if ( == float_t) return a + b; int64_t c = (int64_t)a + (int64_t)b; - REQUIRE(c >= minimum && c <= maximum); - return (acc_t)c; + REQUIRE(c >= minimum && c <= maximum); + return (in_t)c; } -acc_t apply_sub(acc_t a, acc_t b) { - if (acc_t == float_t) return a - b; - int64_t c = (int64_t)a - (int64_t)b; - REQUIRE(c >= minimum && c <= maximum); - return (acc_t)c; +in_t apply_ceil(in_t input) { + return input value rounded up to nearest integer } ----- -The following functions are used in the pseudocode to take maximum, -minimum, clip values to a range, or count leading zeros. -[[count_leading_zeros]] -[source,c++] ----- - apply_max( a, b) { +in_t apply_clip(in_t value, in_t min_val, in_t max_val) { + REQUIRE(min_val <= max_val); + value = apply_max(value, min_val); + value = apply_min(value, max_val); + return value; +} + +in_t apply_exp(in_t input) { + return e to the power input +} + +in_t apply_floor(in_t input) { + return input value rounded down to nearest integer +} + +in_t apply_log(in_t input) { + return the natural logarithm of input +} + +in_t apply_max(in_t a, in_t b) { if (a >= b) return a; else return b; } - apply_min( a, b) { +in_t apply_min(in_t a, in_t b) { if (a < b) return a; else return b; } - apply_clip( value, min_val, max_val) { - REQUIRE(min_val <= max_val); - value = apply_max(value, min_val); - value = apply_min(value, max_val); - return value; +in_t apply_pow(in_t a, in_t b) { + return a ** b; // a raised to the power b +} + +in_t apply_sqrt(in_t input) { + return the square root of input +} + +in_t apply_sub(in_t a, in_t b) { + if (in_t == float_t) return a - b; + int64_t c = (int64_t)a - (int64_t)b; + REQUIRE(c >= minimum && c <= maximum); + return (in_t)c; } int32_t count_leading_zeros(int32_t a) { @@ -146,15 +164,17 @@ Generic helper functions used to keep the pseudocode concise. [source,c++] ---- + +int idiv(int input1, int input2) { + return input1 / input2; // Integer divide that truncates towards zero +} + int length(in_t input) return number of elements in input list -int floor(in_t input) - return input value rounded down to nearest integer - int rank(in_t input) return rank of an input tensor int sum(in_t input[]) return the sum of values of an input list ----- \ No newline at end of file +---- diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index b687896..11db960 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -29,16 +29,18 @@ Reduce a tensor along the given axis with a logical AND operation ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); + +// Initialize output state to true for_each(index in shape) { tensor_write(output, shape, index, true); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = acc && value; - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = state && value; + tensor_write(output, shape, out_index, state); } ---- @@ -70,16 +72,18 @@ Reduce a tensor along the given axis with a logical OR operation ---- ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); + +// Initialize output state to false for_each(index in shape) { tensor_write(output, shape, index, false); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = acc || value; - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = state || value; + tensor_write(output, shape, out_index, state); } ---- @@ -115,12 +119,12 @@ for_each(index in shape) { tensor_write(output, shape, index, minimum); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = apply_max(acc, value); - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = apply_max(state, value); + tensor_write(output, shape, out_index, state); } ---- @@ -158,12 +162,12 @@ for_each(index in shape) { tensor_write(output, shape, index, maximum); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = apply_min(acc, value); - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = apply_min(state, value); + tensor_write(output, shape, out_index, state); } ---- @@ -202,12 +206,12 @@ for_each(index in shape) { tensor_write(output, shape, index, 1.0); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = acc * value; - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = state * value; + tensor_write(output, shape, out_index, state); } ---- @@ -243,12 +247,12 @@ for_each(index in shape) { tensor_write(output, shape, index, 0); } for_each(index in shape1) { - tmp_index = index; - tmp_index[axis]=0; - value = tensor_read(input, shape1, index); - acc = tensor_read(output, shape, tmp_index); - acc = apply_add(acc, value); - tensor_write(output, shape, tmp_index, acc); + out_index = index; + out_index[axis] = 0; + in_t value = tensor_read(input, shape1, index); + in_t state = tensor_read(output, shape, out_index); + state = apply_add(state, value); + tensor_write(output, shape, out_index, state); } ---- diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index d7ced25..cfab5ba 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -98,8 +98,8 @@ ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); // Output shape must match expected shape given the input shape // and arguments provided -ERROR_IF(H != floor((IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y)) -ERROR_IF(W != floor((IW + pad_left + pad_right + stride_x - kernel_x) / stride_x)) +ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)); +ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t output_val; @@ -444,8 +444,8 @@ ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); // Output shape must match expected shape given the input shape // and arguments provided -ERROR_IF(H != floor((IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y)) -ERROR_IF(W != floor((IW + pad_left + pad_right + stride_x - kernel_x) / stride_x)) +ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)); +ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t acc = minimum_value; -- cgit v1.2.1