From 0e46364dfea65a7898639dd381250014ccca3efa Mon Sep 17 00:00:00 2001
From: Jeremy Johnson <jeremy.johnson@arm.com>
Date: Tue, 3 May 2022 12:10:23 +0100
Subject: Fix for NEGATE using 32-bit accumulator

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: Ie5d119dc317303a0d2a71d018ac94ce6800ecbf5
---
 reference_model/src/ops/ewise_unary.cc | 32 ++++++++++++++++++--------------
 verif/generator/tosa_arg_gen.py        | 22 +++++-----------------
 2 files changed, 23 insertions(+), 31 deletions(-)
diff --git a/reference_model/src/ops/ewise_unary.cc b/reference_model/src/ops/ewise_unary.cc
index 0f38056..8b83a50 100644
--- a/reference_model/src/ops/ewise_unary.cc
+++ b/reference_model/src/ops/ewise_unary.cc
@@ -231,28 +231,32 @@ int OpNegate<Rank, Dtype>::register_fcn()
         case DType_INT32:
             this->fcn = [this](InEigenType a) -> OutEigenType {
                 int64_t res_in_64 = 0L - a;
-                int64_t max_in_64, min_in_64;
-                if (Dtype == DType_INT16) {
-                    max_in_64 = static_cast<int64_t>(std::numeric_limits<int16_t>::max());
-                    min_in_64 = static_cast<int64_t>(std::numeric_limits<int16_t>::min());
+                int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
+                int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
+                REQUIRE(res_in_64 <= i32_max_in_64 && res_in_64 >= i32_min_in_64, "OpNegate: result not in acc type range (int32)");
+
+                int64_t max_clip_in_64, min_clip_in_64;
+                if (Dtype == DType_INT16)
+                {
+                    max_clip_in_64 = static_cast<int64_t>(std::numeric_limits<int16_t>::max());
+                    min_clip_in_64 = static_cast<int64_t>(std::numeric_limits<int16_t>::min());
                 }
                 else
                 {
-                    max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
-                    min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
+                    max_clip_in_64 = i32_max_in_64;
+                    min_clip_in_64 = i32_min_in_64;
                 }
-                REQUIRE(res_in_64 <= max_in_64 && res_in_64 >= min_in_64, "OpNegate: result not in input type range");
-                return static_cast<InEigenType>(res_in_64);
+                return static_cast<InEigenType>(std::min<int64_t>(max_clip_in_64, std::max<int64_t>(min_clip_in_64, res_in_64)));
             };
             break;
         case DType_INT8:
             this->fcn = [this](InEigenType a) -> OutEigenType {
-                int32_t res_in_32 = 0 - (a - this->qinfo->input_zp());
-                int32_t max_in_32 = static_cast<int32_t>(std::numeric_limits<int8_t>::max());
-                int32_t min_in_32 = static_cast<int32_t>(std::numeric_limits<int8_t>::min());
-                REQUIRE(res_in_32 <= max_in_32 && res_in_32 >= min_in_32, "OpNegate: result not in i8 range");
-                res_in_32 += this->qinfo->output_zp();
-                InEigenType result = static_cast<InEigenType>(std::min(std::max(res_in_32, static_cast<int32_t>(QMin)), static_cast<int32_t>(QMax)));
+                int64_t res_in_64 = 0 - (a - this->qinfo->input_zp());
+                int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
+                int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
+                REQUIRE(res_in_64 <= i32_max_in_64 && res_in_64 >= i32_min_in_64, "OpNegate: result not in acc type range (int32)");
+                res_in_64 += this->qinfo->output_zp();
+                InEigenType result = static_cast<InEigenType>(std::min(std::max(res_in_64, static_cast<int64_t>(QMin)), static_cast<int64_t>(QMax)));
                 return result;
             };
             break;
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index 9f02489..b1f8942 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -563,27 +563,15 @@ class TosaTensorValuesGen:
 
     @staticmethod
     def tvgNegate(testGen, op, dtypeList, shapeList, testArgs, qinfo, error_name=None):
-        if dtypeList[0] != DType.FLOAT and error_name is None:
+        if dtypeList[0] == DType.INT32 and error_name is None:
             pCount, cCount = op["operands"]
             assert (
                 pCount == 1 and cCount == 0
             ), "Op.NEGATE must have 1 placeholders, 0 consts"
-            # Must create tensors with values within negatable ranges
-            if dtypeList[0] == DType.INT8:
-                # Must be within int8, adjustable by input_zp and then negatable
-                # and be within int8
-                # For use: qinfo.ints[0][1] = input_zp, qinfo.ints[1][1] = output_zp
-                max_val = min(127, 127 + qinfo.ints[0][1])
-                min_val = max(-127, -127 + qinfo.ints[0][1])
-            elif dtypeList[0] == DType.INT16:
-                max_val = 32767
-                min_val = -max_val
-            else:
-                assert (
-                    dtypeList[0] == DType.INT32
-                ), "Op.NEGATE found with unsupported input type"
-                max_val = (1 << 31) - 1
-                min_val = -max_val
+            # Must create tensors with values within accumulator (int32) negatable
+            # range
+            max_val = (1 << 31) - 1
+            min_val = -max_val
             arr = np.int32(
                 testGen.rng.integers(low=min_val, high=(max_val + 1), size=shapeList[0])
             )
-- 
cgit v1.2.1