aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2021-10-25 16:13:22 -0700
committerEric Kunze <eric.kunze@arm.com>2021-10-28 09:57:55 -0700
commit82f19e2ad25bcbdde8e7f8b6bd6a6064a207fe36 (patch)
treec7a685d03bf2a9dd5a171b6c7409baf8c687da4f
parent3fe5256340778f586b80ac02b0632b54a39723f1 (diff)
downloadspecification-82f19e2ad25bcbdde8e7f8b6bd6a6064a207fe36.tar.gz
Readability fixes for pseudocode
Avoid use of acc for variables when they are not convolution accumulators. Use argument types appropriately. Add missing pseudocode for some MI operators Change-Id: I9113f9228dbcafb85206bcc39310e9599cb12c08
-rw-r--r--chapters/comparison.adoc26
-rw-r--r--chapters/ewise_binary.adoc88
-rw-r--r--chapters/ewise_ternary.adoc16
-rw-r--r--chapters/ewise_unary.adoc79
-rw-r--r--chapters/image.adoc4
-rw-r--r--chapters/pseudocode.adoc74
-rw-r--r--chapters/reduction.adoc76
-rw-r--r--chapters/tensor_ops.adoc8
8 files changed, 238 insertions, 133 deletions
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc
index 43f0787..ad574fb 100644
--- a/chapters/comparison.adoc
+++ b/chapters/comparison.adoc
@@ -1,7 +1,7 @@
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
@@ -30,10 +30,10 @@ Elementwise comparison operation
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- int32_t value1 = tensor_read<in_t>(input1, shape1, index1);
- int32_t value2 = tensor_read<in_t>(input2, shape2, index2);
- bool_t acc = (value1 == value2) ? True : False;
- tensor_write<out_t>(output, shape, index, acc);
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ out_t result = (value1 == value2) ? True : False;
+ tensor_write<out_t>(output, shape, index, result);
}
----
@@ -67,10 +67,10 @@ Elementwise greater than comparison operation
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- int32_t value1 = tensor_read<in_t>(input1, shape1, index1);
- int32_t value2 = tensor_read<in_t>(input2, shape2, index2);
- bool_t acc = (value1 > value2) ? True : False;
- tensor_write<out_t>(output, shape, index, acc);
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ out_t result = (value1 > value2) ? True : False;
+ tensor_write<out_t>(output, shape, index, result);
}
----
@@ -103,10 +103,10 @@ Elementwise comparison operation
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- int32_t value1 = tensor_read<in_t>(input1, shape1, index1);
- int32_t value2 = tensor_read<in_t>(input2, shape2, index2);
- bool_t acc = (value1 >= value2) ? True : False;
- tensor_write<out_t>(output, shape, index, acc);
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ out_t result = (value1 >= value2) ? True : False;
+ tensor_write<out_t>(output, shape, index, result);
}
----
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index f44f7f5..4173aab 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -33,8 +33,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = apply_add<in_t>(value1, value2);
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = apply_add<in_t>(value1, value2);
+ tensor_write<in_t>(output, shape, index, result);
----
*Supported Data Types:*
@@ -77,12 +77,12 @@ for_each(index in shape) {
(in_t == int16_t && 0 <= value2 && value2 <= 15) ||
(in_t == int8_t && 0 <= value2 && value2 <= 7));
- in_t acc = value1 >> value2;
+ in_t result = value1 >> value2;
if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
- acc = acc + 1;
+ result = result + 1;
}
- acc = apply_clip<in_t>(acc, minimum<in_t>, maximum<in_t>)
- tensor_write<in_t>(output, shape, index, acc)
+ result = apply_clip<in_t>(result, minimum<in_t>, maximum<in_t>);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -120,8 +120,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 & value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 & value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -159,8 +159,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 | value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 | value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -198,8 +198,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 ^ value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 ^ value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -244,8 +244,8 @@ for_each(index in shape) {
// This catches the case where we divide minimum<in_t> by -1
// which is not representable in two's complement
REQUIRE((int64_t)value1 / value2 <= maximum<in_t>);
- in_t acc = value1 / value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 / value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -280,8 +280,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 && value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 && value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -318,8 +318,8 @@ for_each(index in shape) {
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
- in_t acc = value1 << value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 << value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -358,8 +358,8 @@ for_each(index in shape) {
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
- in_t acc = (unsigned in_t)value1 >> value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = (in_t)((unsigned in_t)value1 >> value2);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -397,8 +397,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 || value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 || value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -434,8 +434,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = value1 != value2;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = value1 != value2;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -471,8 +471,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = apply_max(value1, value2);
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = apply_max(value1, value2);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -509,8 +509,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = apply_min(value1, value2);
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = apply_min(value1, value2);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -548,12 +548,13 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ out_t result;
if (in_t == int32_t && shift > 0) {
- out_t acc = apply_scale_32(value1, value2, shift);
+ result = apply_scale_32(value1, value2, shift);
} else {
- out_t acc = value1 * value2; // low 32-bits of result for int32_t
+ result = value1 * value2; // low 32-bits of result for int32_t
}
- tensor_write<out_t>(output, shape, index, acc);
+ tensor_write<out_t>(output, shape, index, result);
}
----
@@ -582,6 +583,20 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ index1 = apply_broadcast(shape, shape1, index);
+ index2 = apply_broadcast(shape, shape2, index);
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ in_t result = apply_pow<in_t>(value1, value2);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -614,8 +629,8 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t acc = apply_sub<in_t>(value1, value2);
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = apply_sub<in_t>(value1, value2);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -661,13 +676,14 @@ An int16_t to int16_t table lookup can be constructed in TOSA as follows:
REQUIRE(length(table) == TABLE_SIZE);
for_each(index in shape) {
in_t value = tensor_read<in_t>(input, shape, index);
+ out_t result;
if (in_t == int8_t) {
// value is a signed int, convert to a 0 based index
- out_t acc = table[value + 128];
+ result = table[value + 128];
} else {
- out_t acc = apply_lookup(table, value);
+ result = apply_lookup(table, value);
}
- tensor_write<out_t>(output, shape, index, acc);
+ tensor_write<out_t>(output, shape, index, result);
}
----
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
index ecf40d1..c6babbc 100644
--- a/chapters/ewise_ternary.adoc
+++ b/chapters/ewise_ternary.adoc
@@ -18,7 +18,7 @@ Elementwise select of the output based on a condition.
|===
|Argument|Type|Name|Shape|Description
-|Input|bool_t|input1|shape1|Input selector tensor
+|Input|cmp_t|input1|shape1|Input selector tensor
|Input|in_t*|input2|shape2|Input value tensor if input1 is True
|Input|in_t*|input3|shape3|Input value tensor if input1 is False
|Output|in_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary
@@ -32,22 +32,22 @@ for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
index3 = apply_broadcast(shape, shpae3, index);
- bool_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ cmp_t value1 = tensor_read<cmp_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t value3 = tensor_read<in_t>(input3, shape3, index3);
- in_t acc = 0;
- if (value1 == True) {
- acc = value2;
+ in_t result;
+ if (value1) {
+ result = value2;
} else {
- acc = value3;
+ result = value3;
}
- tensor_write<in_t>(output, shape, index, acc);
+ tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|bool_t|in_t
+|Profile|Mode|cmp_t|in_t
|Any|Boolean|bool_t|bool_t
|Any|signed 8|bool_t|int8_t
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index e2b754a..633b8ac 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -62,8 +62,8 @@ Elementwise bitwise NOT of input tensor.
----
for_each(index in shape) {
in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t acc = ~value1;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = ~value1;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -90,6 +90,17 @@ Elementwise ceiling operation
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_t result = apply_ceil<in_t>(value1);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -116,10 +127,9 @@ Elementwise count leading zeros operation
[source,c++]
----
for_each(index in shape) {
- in_t acc = 0;
in_t value1 = tensor_read<in_t>(input1, shape, index);
- acc = count_leading_zeros(value1);
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = count_leading_zeros(value1);
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -143,6 +153,17 @@ Elementwise e to the x operation
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_t result = apply_exp<in_t>(value1);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -164,6 +185,17 @@ Elementwise floor operation
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_t result = apply_floor<in_t>(value1);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -185,6 +217,17 @@ Elementwise natural logarithm operation
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_t result = apply_log<in_t>(value1);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -212,8 +255,8 @@ Elementwise logical NOT of input.
----
for_each(index in shape) {
in_t value1 = tensor_read<in_t>(input1, shape1, index);
- in_t acc = !value1;
- tensor_write<in_t>(output, shape, index, acc);
+ in_t result = !value1;
+ tensor_write<in_t>(output, shape, index, result);
}
----
@@ -279,6 +322,17 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape1, index);
+ in_t result = 1.0 / value1;
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
@@ -300,6 +354,17 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho
|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
|===
+*Operation Function:*
+
+[source,c++]
+----
+for_each(index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape1, index);
+ in_t result = 1.0 / apply_sqrt<in_t>(value1);
+ tensor_write<in_t>(output, shape, index, result);
+}
+----
+
*Supported Data Types:*
|===
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 2491ea5..7476d8a 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -74,8 +74,8 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
y = oy * stride_y + offset_y;
x = ox * stride_x + offset_x;
if (resize_t == float_t) {
- iy = (int)floor(y); dy = y - (float_t)iy;
- ix = (int)floor(x); dx = x - (float_t)ix;
+ iy = (int)apply_floor(y); dy = y - (float_t)iy;
+ ix = (int)apply_floor(x); dx = x - (float_t)ix;
} else {
iy = y >> shift; dy = y - (iy<<shift);
ix = x >> shift; dx = x - (ix<<shift);
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 16e7e67..d5f05db 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -54,43 +54,61 @@ void ERROR_IF(condition) {
This section contains general pseudocode utility functions used throughout the specification.
-The following functions provide basic arithmetic while defining requirements such that values stay in the valid range.
+The following functions provide arithmetic while defining requirements such that values stay in the valid range.
[source,c++]
----
-acc_t apply_add<acc_t>(acc_t a, acc_t b) {
- if (acc_t == float_t) return a + b;
+in_t apply_add<in_t>(in_t a, in_t b) {
+ if (<in_t> == float_t) return a + b;
int64_t c = (int64_t)a + (int64_t)b;
- REQUIRE(c >= minimum<acc_t> && c <= maximum<acc_t>);
- return (acc_t)c;
+ REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
+ return (in_t)c;
}
-acc_t apply_sub<acc_t>(acc_t a, acc_t b) {
- if (acc_t == float_t) return a - b;
- int64_t c = (int64_t)a - (int64_t)b;
- REQUIRE(c >= minimum<acc_t> && c <= maximum<acc_t>);
- return (acc_t)c;
+in_t apply_ceil<in_t>(in_t input) {
+ return input value rounded up to nearest integer
}
-----
-The following functions are used in the pseudocode to take maximum,
-minimum, clip values to a range, or count leading zeros.
-[[count_leading_zeros]]
-[source,c++]
-----
-<type> apply_max<type>(<type> a, <type> b) {
+in_t apply_clip<in_t>(in_t value, in_t min_val, in_t max_val) {
+ REQUIRE(min_val <= max_val);
+ value = apply_max(value, min_val);
+ value = apply_min(value, max_val);
+ return value;
+}
+
+in_t apply_exp<in_t>(in_t input) {
+ return e to the power input
+}
+
+in_t apply_floor<in_t>(in_t input) {
+ return input value rounded down to nearest integer
+}
+
+in_t apply_log<in_t>(in_t input) {
+ return the natural logarithm of input
+}
+
+in_t apply_max<in_t>(in_t a, in_t b) {
if (a >= b) return a; else return b;
}
-<type> apply_min<type>(<type> a, <type> b) {
+in_t apply_min<in_t>(in_t a, in_t b) {
if (a < b) return a; else return b;
}
-<type> apply_clip<type>(<type> value, <type> min_val, <type> max_val) {
- REQUIRE(min_val <= max_val);
- value = apply_max(value, min_val);
- value = apply_min(value, max_val);
- return value;
+in_t apply_pow<in_t>(in_t a, in_t b) {
+ return a ** b; // a raised to the power b
+}
+
+in_t apply_sqrt<in_t>(in_t input) {
+ return the square root of input
+}
+
+in_t apply_sub<in_t>(in_t a, in_t b) {
+ if (in_t == float_t) return a - b;
+ int64_t c = (int64_t)a - (int64_t)b;
+ REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
+ return (in_t)c;
}
int32_t count_leading_zeros(int32_t a) {
@@ -146,15 +164,17 @@ Generic helper functions used to keep the pseudocode concise.
[source,c++]
----
+
+int idiv(int input1, int input2) {
+ return input1 / input2; // Integer divide that truncates towards zero
+}
+
int length(in_t input)
return number of elements in input list
-int floor(in_t input)
- return input value rounded down to nearest integer
-
int rank(in_t input)
return rank of an input tensor
int sum(in_t input[])
return the sum of values of an input list
----- \ No newline at end of file
+----
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index b687896..11db960 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -29,16 +29,18 @@ Reduce a tensor along the given axis with a logical AND operation
----
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
+
+// Initialize output state to true
for_each(index in shape) {
tensor_write<in_t>(output, shape, index, true);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = acc && value;
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = state && value;
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
@@ -70,16 +72,18 @@ Reduce a tensor along the given axis with a logical OR operation
----
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
+
+// Initialize output state to false
for_each(index in shape) {
tensor_write<in_t>(output, shape, index, false);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = acc || value;
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = state || value;
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
@@ -115,12 +119,12 @@ for_each(index in shape) {
tensor_write<in_t>(output, shape, index, minimum<in_t>);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = apply_max<in_t>(acc, value);
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = apply_max<in_t>(state, value);
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
@@ -158,12 +162,12 @@ for_each(index in shape) {
tensor_write<in_t>(output, shape, index, maximum<in_t>);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = apply_min<in_t>(acc, value);
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = apply_min<in_t>(state, value);
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
@@ -202,12 +206,12 @@ for_each(index in shape) {
tensor_write<in_t>(output, shape, index, 1.0);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = acc * value;
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = state * value;
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
@@ -243,12 +247,12 @@ for_each(index in shape) {
tensor_write<in_t>(output, shape, index, 0);
}
for_each(index in shape1) {
- tmp_index = index;
- tmp_index[axis]=0;
- value = tensor_read<in_t>(input, shape1, index);
- acc = tensor_read<in_t>(output, shape, tmp_index);
- acc = apply_add<in_t>(acc, value);
- tensor_write<in_t>(output, shape, tmp_index, acc);
+ out_index = index;
+ out_index[axis] = 0;
+ in_t value = tensor_read<in_t>(input, shape1, index);
+ in_t state = tensor_read<in_t>(output, shape, out_index);
+ state = apply_add<in_t>(state, value);
+ tensor_write<in_t>(output, shape, out_index, state);
}
----
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index d7ced25..cfab5ba 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -98,8 +98,8 @@ ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x);
ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y);
// Output shape must match expected shape given the input shape
// and arguments provided
-ERROR_IF(H != floor((IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y))
-ERROR_IF(W != floor((IW + pad_left + pad_right + stride_x - kernel_x) / stride_x))
+ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y));
+ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x));
for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
in_t output_val;
@@ -444,8 +444,8 @@ ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x);
ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y);
// Output shape must match expected shape given the input shape
// and arguments provided
-ERROR_IF(H != floor((IH + pad_top + pad_bottom + stride_y - kernel_y) / stride_y))
-ERROR_IF(W != floor((IW + pad_left + pad_right + stride_x - kernel_x) / stride_x))
+ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y));
+ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x));
for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
in_t acc = minimum_value<in_t>;