From d4cf0738c55ce73dae5e25e0fbec4d8b3f603e69 Mon Sep 17 00:00:00 2001
From: Dominic Symes <dominic.symes@arm.com>
Date: Wed, 24 Nov 2021 16:59:56 +0000
Subject: MUL: Correct code for the case of value2<0

The previous call to apply_scale() was not
valid for value2<0.

Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Change-Id: I3fbf38eaad964efdb0c4920da1a61aee67c9bba9
---
 chapters/ewise_binary.adoc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index aa1c86c..6fecc2a 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -544,6 +544,7 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
 
 [source,c++]
 ----
+ERROR_IF(in_t != int32_t && shift > 0);
 for_each(index in shape) {
     index1 = apply_broadcast(shape, shape1, index);
     index2 = apply_broadcast(shape, shape2, index);
@@ -551,7 +552,11 @@ for_each(index in shape) {
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
     out_t result;
     if (in_t == int32_t && shift > 0) {
-        result = apply_scale_32(value1, value2, shift);
+        int64_t product = (int64_t)value1 * (int64_t)value2;
+        int64_t round   = (int64_t)1 << (shift-1);
+        product = (product + round) >> shift;
+        REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>)
+        result = product;
     } else {
         result = value1 * value2;  // low 32-bits of result for int32_t
     }
-- 
cgit v1.2.1