aboutsummaryrefslogtreecommitdiff
path: root/chapters/ewise_binary.adoc
diff options
context:
space:
mode:
authorDominic Symes <dominic.symes@arm.com>2020-10-22 10:56:36 +0100
committerDominic Symes <dominic.symes@arm.com>2020-10-28 16:42:06 +0000
commitf7179b5c3f42fa27835f286c78ca943772c867d6 (patch)
tree4ba379472f169de93f3bc2f078a1706bff386ada /chapters/ewise_binary.adoc
parent3da62dfa0da290dbbb39e411ca2187703429f916 (diff)
downloadspecification-f7179b5c3f42fa27835f286c78ca943772c867d6.tar.gz
MUL: Add right shift on 32x32 multiply
The result of 32x32 elementwise multiply exceeds the int32_t result type range. This change adds a right scaling shift argument to shift down the result. Change-Id: I6ae17e6dc3fe342d052304533158ad2d0e7bb7be Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Diffstat (limited to 'chapters/ewise_binary.adoc')
-rw-r--r--chapters/ewise_binary.adoc8
1 files changed, 7 insertions, 1 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 4da63de..241ca32 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -479,6 +479,7 @@ Elementwise multiplication (Hadamard product) of input tensor 0 and input tensor
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Attribute|uint6_t|shift|-|Result right shift (int32 data type only)
|Output|out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
@@ -486,12 +487,17 @@ Elementwise multiplication (Hadamard product) of input tensor 0 and input tensor
[source,c]
----
+assert(in_t==int32_t || shift==0);
for_each (index in shape) {
index1 = apply_broadcast(shape, shape1, index)
index2 = apply_broadcast(shape, shape2, index)
in_t value1 = tensor_read<in_t>(input1, shape1, index1)
in_t value2 = tensor_read<in_t>(input2, shape2, index2)
- in_t acc = value1 * value2 // takes low bits for int32_t
+ if (shift>0) {
+ out_t acc = apply_scale_32(value1, value2, shift)
+ } else {
+ out_t acc = value1 * value2; // low 32-bits of result for int32_t
+ }
tensor_write<out_t>(output, shape, index, acc)
}
----