From 84b00fde6ab324726ac9c90687c4a017a2c7a00b Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Thu, 7 Sep 2023 00:31:54 +0000
Subject: Support new RESCALE attributes

input_unsigned and output_unsigned were added to the specification.

Older TOSA files with uint data types are still supported.

Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I059484086887c3bd5e1af5b1aac0dacf0703f827
---
 reference_model/include/operators.h        |   4 +-
 reference_model/src/operators.cc           |  13 ++-
 reference_model/src/ops/type_conversion.cc | 179 +++++++++++++++++++++++------
 thirdparty/serialization_lib               |   2 +-
 4 files changed, 156 insertions(+), 42 deletions(-)

diff --git a/reference_model/include/operators.h b/reference_model/include/operators.h
index b0a3227..2c31c30 100644
--- a/reference_model/include/operators.h
+++ b/reference_model/include/operators.h
@@ -329,6 +329,8 @@ extern "C"
                                    const int32_t client_shift[],
                                    const bool client_scale32,
                                    const bool client_double_round,
+                                   const bool client_input_unsigned,
+                                   const bool client_output_unsigned,
                                    const bool client_per_channel);
 
     tosa_status_t tosa_run_identity(tosa_tensor_t client_input1, tosa_tensor_t client_output);
@@ -337,4 +339,4 @@ extern "C"
 }
 #endif /* __cplusplus */
 
-#endif    // OPERATORS_H_
\ No newline at end of file
+#endif    // OPERATORS_H_
diff --git a/reference_model/src/operators.cc b/reference_model/src/operators.cc
index ae5963d..cb71ae8 100644
--- a/reference_model/src/operators.cc
+++ b/reference_model/src/operators.cc
@@ -2339,6 +2339,8 @@ extern "C"
                                    const int32_t client_shift[],
                                    const bool client_scale32,
                                    const bool client_double_round,
+                                   const bool client_input_unsigned,
+                                   const bool client_output_unsigned,
                                    const bool client_per_channel)
     {
         // Create operator attributes
@@ -2346,10 +2348,13 @@ extern "C"
         const int32_t output_zp = client_output_zp;
         const std::vector<int32_t> multiplier(&client_multiplier[0], &client_multiplier[0] + client_multiplier_len);
         const std::vector<int32_t> shift(&client_shift[0], &client_shift[0] + client_shift_len);
-        const bool scale32      = client_scale32;
-        const bool double_round = client_double_round;
-        const bool per_channel  = client_per_channel;
-        TosaRescaleAttribute attr(input_zp, output_zp, multiplier, shift, scale32, double_round, per_channel);
+        const bool scale32         = client_scale32;
+        const bool double_round    = client_double_round;
+        const bool per_channel     = client_per_channel;
+        const bool input_unsigned  = client_input_unsigned;
+        const bool output_unsigned = client_output_unsigned;
+        TosaRescaleAttribute attr(input_zp, output_zp, multiplier, shift, scale32, double_round, per_channel,
+                                  input_unsigned, output_unsigned);
 
         // Create tensors
         tosa::TosaSerializationTensor* input  = translate_client_tensor(client_input, "input");
diff --git a/reference_model/src/ops/type_conversion.cc b/reference_model/src/ops/type_conversion.cc
index 9464fd9..0135d1b 100644
--- a/reference_model/src/ops/type_conversion.cc
+++ b/reference_model/src/ops/type_conversion.cc
@@ -102,6 +102,18 @@ int OpRescale<Rank, InDtype, OutDtype>::checkTensorAttributes()
     return 0;
 }
 
+// helpers to convert types
+static int64_t zero_extend(int8_t val)
+{
+    uint8_t* rval = reinterpret_cast<uint8_t*>(&val);
+    return static_cast<int64_t>(*rval);
+}
+static int64_t zero_extend(int16_t val)
+{
+    uint16_t* rval = reinterpret_cast<uint16_t*>(&val);
+    return static_cast<int64_t>(*rval);
+}
+
 template <int Rank, TOSA_REF_TYPE InDtype, TOSA_REF_TYPE OutDtype>
 int OpRescale<Rank, InDtype, OutDtype>::eval()
 {
@@ -112,6 +124,8 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
     bool scale32                    = attribute->scale32();
     bool double_round               = attribute->double_round();
     bool per_channel                = attribute->per_channel();
+    bool input_unsigned             = attribute->input_unsigned();
+    bool output_unsigned            = attribute->output_unsigned();
 
     // reshape [d0, d1, ..., dn] into [d0 * d1 ..., dn]
     Eigen::array<Eigen::Index, 2> shape_2d;
@@ -143,22 +157,68 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
         {
             for (int32_t i = 0; i < shape_2d[1]; i++)
             {
-                begin                        = Eigen::array<Eigen::Index, 2>({ 0, i });
-                curr_channel_slice_prescaled = input_reshaped.slice(begin, size);
-                channel_multiplier           = multiplier[i];
-                channel_shift                = shift[i];
-                curr_channel_slice_postscaled =
-                    curr_channel_slice_prescaled.unaryExpr([input_zp, output_zp, channel_multiplier, channel_shift,
-                                                            double_round, scale32](InEigenType in_val) -> OutEigenType {
-                        InEigenType input_zp_shifted = in_val - (InEigenType)input_zp;
+                begin                         = Eigen::array<Eigen::Index, 2>({ 0, i });
+                curr_channel_slice_prescaled  = input_reshaped.slice(begin, size);
+                channel_multiplier            = multiplier[i];
+                channel_shift                 = shift[i];
+                curr_channel_slice_postscaled = curr_channel_slice_prescaled.unaryExpr(
+                    [input_zp, output_zp, channel_multiplier, channel_shift, double_round, scale32, input_unsigned,
+                     output_unsigned](InEigenType in_val) -> OutEigenType {
+                        int64_t input_zp_shifted;
+                        if (input_unsigned)
+                        {
+                            int64_t in_val64;
+                            int64_t in_zp64;
+                            switch (GetNumBits<InDtype>::value)
+                            {
+                                case 8:
+                                    in_val64 = zero_extend(static_cast<int8_t>(in_val));
+                                    in_zp64  = zero_extend(static_cast<int8_t>(input_zp));
+                                    break;
+                                case 16:
+                                    in_val64 = zero_extend(static_cast<int16_t>(in_val));
+                                    in_zp64  = zero_extend(static_cast<int16_t>(input_zp));
+                                    break;
+                                default:
+                                    in_val64 = static_cast<int64_t>(in_val);
+                                    in_zp64  = static_cast<int64_t>(input_zp);
+                                    break;
+                            }
+                            input_zp_shifted = in_val64 - in_zp64;
+                        }
+                        else
+                        {
+                            input_zp_shifted = in_val - input_zp;
+                        }
                         int32_t scaled;
                         if (scale32)
-                            scaled = TosaReference::QuantUtil::apply_scale_32(input_zp_shifted, channel_multiplier,
-                                                                              channel_shift, double_round);
+                            scaled = TosaReference::QuantUtil::apply_scale_32(static_cast<int32_t>(input_zp_shifted),
+                                                                              channel_multiplier, channel_shift,
+                                                                              double_round);
                         else
                             scaled = TosaReference::QuantUtil::apply_scale_16(input_zp_shifted, channel_multiplier,
                                                                               channel_shift);
-                        int64_t res_in_64     = static_cast<int64_t>(scaled) + output_zp;
+                        int64_t output_zp_extended;
+                        if (output_unsigned)
+                        {
+                            switch (GetNumBits<OutDtype>::value)
+                            {
+                                case 8:
+                                    output_zp_extended = zero_extend(static_cast<int8_t>(output_zp));
+                                    break;
+                                case 16:
+                                    output_zp_extended = zero_extend(static_cast<int16_t>(output_zp));
+                                    break;
+                                default:
+                                    output_zp_extended = static_cast<int64_t>(output_zp);
+                                    break;
+                            }
+                        }
+                        else
+                        {
+                            output_zp_extended = static_cast<int64_t>(output_zp);
+                        }
+                        int64_t res_in_64     = static_cast<int64_t>(scaled) + output_zp_extended;
                         int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
                         int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
                         if (res_in_64 > i32_max_in_64 || res_in_64 < i32_min_in_64)
@@ -190,31 +250,78 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
         int32_t tensor_shift      = shift[0];
         try
         {
-            output_2d = input_reshaped.unaryExpr([input_zp, output_zp, tensor_multiplier, tensor_shift, double_round,
-                                                  scale32](InEigenType in_val) -> OutEigenType {
-                InEigenType input_zp_shifted = in_val - (InEigenType)input_zp;
-                int32_t scaled;
-                if (scale32)
-                    scaled = TosaReference::QuantUtil::apply_scale_32(input_zp_shifted, tensor_multiplier, tensor_shift,
-                                                                      double_round);
-                else
-                    scaled =
-                        TosaReference::QuantUtil::apply_scale_16(input_zp_shifted, tensor_multiplier, tensor_shift);
-                int64_t res_in_64     = static_cast<int64_t>(scaled) + output_zp;
-                int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
-                int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
-                if (res_in_64 > i32_max_in_64 || res_in_64 < i32_min_in_64)
-                {
-                    std::string desc = "scaling result [" + std::to_string(scaled) + "] plus output_zp [" +
-                                       std::to_string(output_zp) + "] not in i32 range";
-                    throw desc;
-                }
-
-                OutEigenType out_val = static_cast<OutEigenType>(res_in_64);
-                out_val              = std::max<OutEigenType>(out_val, QMin);
-                out_val              = std::min<OutEigenType>(out_val, QMax);
-                return out_val;
-            });
+            output_2d =
+                input_reshaped.unaryExpr([input_zp, output_zp, tensor_multiplier, tensor_shift, double_round, scale32,
+                                          input_unsigned, output_unsigned](InEigenType in_val) -> OutEigenType {
+                    int64_t input_zp_shifted;
+                    if (input_unsigned)
+                    {
+                        int64_t in_val64;
+                        int64_t in_zp64;
+                        switch (GetNumBits<InDtype>::value)
+                        {
+                            case 8:
+                                in_val64 = zero_extend(static_cast<int8_t>(in_val));
+                                in_zp64  = zero_extend(static_cast<int8_t>(input_zp));
+                                break;
+                            case 16:
+                                in_val64 = zero_extend(static_cast<int16_t>(in_val));
+                                in_zp64  = zero_extend(static_cast<int16_t>(input_zp));
+                                break;
+                            default:
+                                in_val64 = static_cast<int64_t>(in_val);
+                                in_zp64  = static_cast<int64_t>(input_zp);
+                                break;
+                        }
+                        input_zp_shifted = in_val64 - in_zp64;
+                    }
+                    else
+                    {
+                        input_zp_shifted = in_val - input_zp;
+                    }
+                    int32_t scaled;
+                    if (scale32)
+                        scaled = TosaReference::QuantUtil::apply_scale_32(input_zp_shifted, tensor_multiplier,
+                                                                          tensor_shift, double_round);
+                    else
+                        scaled =
+                            TosaReference::QuantUtil::apply_scale_16(input_zp_shifted, tensor_multiplier, tensor_shift);
+
+                    int64_t output_zp_extended;
+                    if (output_unsigned)
+                    {
+                        switch (GetNumBits<OutDtype>::value)
+                        {
+                            case 8:
+                                output_zp_extended = zero_extend(static_cast<int8_t>(output_zp));
+                                break;
+                            case 16:
+                                output_zp_extended = zero_extend(static_cast<int16_t>(output_zp));
+                                break;
+                            default:
+                                output_zp_extended = static_cast<int64_t>(output_zp);
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        output_zp_extended = static_cast<int64_t>(output_zp);
+                    }
+                    int64_t res_in_64     = static_cast<int64_t>(scaled) + output_zp_extended;
+                    int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
+                    int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
+                    if (res_in_64 > i32_max_in_64 || res_in_64 < i32_min_in_64)
+                    {
+                        std::string desc = "scaling result [" + std::to_string(scaled) + "] plus output_zp [" +
+                                           std::to_string(output_zp) + "] not in i32 range";
+                        throw desc;
+                    }
+
+                    OutEigenType out_val = static_cast<OutEigenType>(res_in_64);
+                    out_val              = std::max<OutEigenType>(out_val, QMin);
+                    out_val              = std::min<OutEigenType>(out_val, QMax);
+                    return out_val;
+                });
         }
         catch (std::string desc)
         {
diff --git a/thirdparty/serialization_lib b/thirdparty/serialization_lib
index 54fcf85..29c294c 160000
--- a/thirdparty/serialization_lib
+++ b/thirdparty/serialization_lib
@@ -1 +1 @@
-Subproject commit 54fcf8538291b16eb2038097059e517cf704a3ff
+Subproject commit 29c294c4e74b1b48967cd4831f6f508500b1f1d1
-- 
cgit v1.2.1