Fix the wrong QMax and QMin type assignment in rescale op

Signed integer type is used to retain QMax and QMin no matter what the value of `output_unsigned` is, but the value of QMax and QMin are unsigned integer when output_unsigned is true. Also add a handful of arithmetic helpers to align the pseudo code. Change-Id: Ie3cd444a290ddae08884186cd4b349a88acad032 Signed-off-by: TatWai Chong <tatwai.chong@arm.com>
author: TatWai Chong <tatwai.chong@arm.com> 2024-04-02 15:45:29 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2024-04-08 17:09:38 +0000
commit: 0dac6c90a1ed753d1018077be83941834f937601 (patch)
tree: a6c59981ac161d12df7a4deb65bca14905465d4d
parent: 129201df8126a16abb0e4fbf7372354021f8a55d (diff)
download: reference_model-0dac6c90a1ed753d1018077be83941834f937601.tar.gz
3 files changed, 74 insertions, 9 deletions
diff --git a/reference_model/src/arith_util.h b/reference_model/src/arith_util.h
index fee9fef..fa6d136 100644
--- a/reference_model/src/arith_util.h
+++ b/reference_model/src/arith_util.h
@@ -317,4 +317,50 @@ int32_t getUnsignedMinimum()
     return 0;
 }
 
+template <typename T>
+T applyMax(T a, T b)
+{
+    if (std::is_floating_point<T>::value)
+    {
+        if (std::isnan(a) || std::isnan(b))
+        {
+            return NAN;
+        }
+    }
+    return (a >= b) ? a : b;
+}
+
+template <typename T>
+T applyMin(T a, T b)
+{
+    if (std::is_floating_point<T>::value)
+    {
+        if (std::isnan(a) || std::isnan(b))
+        {
+            return NAN;
+        }
+    }
+    return (a < b) ? a : b;
+}
+
+// Clip the input value of type T into the range [min, max] of type U, and return the result as type T.
+template <typename T, typename U>
+T applyClip(T value, U min_val, U max_val)
+{
+    assert(min_val <= max_val);
+    assert(sizeof(T) == sizeof(U));
+
+    value = applyMax<T>(value, min_val);
+
+    // Handle the numbers of an unsigned type U that becomes unrepresentable when type casting to signed.
+    if (std::is_signed_v<T> && std::is_unsigned_v<U> && max_val > std::numeric_limits<T>::max())
+    {
+        max_val = std::numeric_limits<T>::max();
+    }
+
+    value = applyMin<T>(value, max_val);
+
+    return value;
+}
+
 #endif /* _ARITH_UTIL_H */
diff --git a/reference_model/src/ops/type_conversion.cc b/reference_model/src/ops/type_conversion.cc
index 85f8c58..7bca697 100644
--- a/reference_model/src/ops/type_conversion.cc
+++ b/reference_model/src/ops/type_conversion.cc
@@ -37,6 +37,11 @@ OpRescale<Rank, InDtype, OutDtype>::OpRescale(SubgraphTraverser* sgt_, TosaAttri
 {
     setRequiredOperands(3, 1);
     INIT_ATTRIBUTE(Rescale);
+
+    QMax_s = getSignedMaximum<OutDtype>();
+    QMin_s = getSignedMinimum<OutDtype>();
+    QMax_u = getUnsignedMaximum<OutDtype>();
+    QMin_u = getUnsignedMinimum<OutDtype>();
 }
 
 template <int Rank, TOSA_REF_TYPE InDtype, TOSA_REF_TYPE OutDtype>
@@ -155,9 +160,6 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
     bool input_unsigned  = attribute->input_unsigned();
     bool output_unsigned = attribute->output_unsigned();
 
-    int32_t QMin = output_unsigned ? getUnsignedMinimum<OutDtype>() : getSignedMinimum<OutDtype>();
-    int32_t QMax = output_unsigned ? getUnsignedMaximum<OutDtype>() : getSignedMaximum<OutDtype>();
-
     // reshape [d0, d1, ..., dn] into [d0 * d1 ..., dn]
     Eigen::array<Eigen::Index, 2> shape_2d;
     shape_2d[0] = 1;
@@ -270,18 +272,24 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
                         {
                             output_zp_extended = static_cast<int64_t>(output_zp);
                         }
+
                         int64_t res_in_64     = static_cast<int64_t>(scaled) + output_zp_extended;
                         int64_t i32_max_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
                         int64_t i32_min_in_64 = static_cast<int64_t>(std::numeric_limits<int32_t>::min());
+
                         if (res_in_64 > i32_max_in_64 || res_in_64 < i32_min_in_64)
                         {
                             std::string desc = "scaling result [" + std::to_string(scaled) + "] plus output_zp [" +
                                                std::to_string(output_zp) + "] not in i32 range";
                             throw desc;
                         }
-                        OutEigenType out_val = static_cast<OutEigenType>(res_in_64);
-                        out_val              = std::max<OutEigenType>(out_val, QMin);
-                        out_val              = std::min<OutEigenType>(out_val, QMax);
+
+                        // Treat the output values as unsigned if `output_unsigned` is true.
+                        int32_t clipped_val = (output_unsigned)
+                                                  ? applyClip<int32_t, uint32_t>(res_in_64, QMin_u, QMax_u)
+                                                  : applyClip<int32_t, int32_t>(res_in_64, QMin_s, QMax_s);
+
+                        OutEigenType out_val = static_cast<OutEigenType>(clipped_val);
                         return out_val;
                     });
 
@@ -370,9 +378,11 @@ int OpRescale<Rank, InDtype, OutDtype>::eval()
                     throw desc;
                 }
 
-                OutEigenType out_val = static_cast<OutEigenType>(res_in_64);
-                out_val              = std::max<OutEigenType>(out_val, QMin);
-                out_val              = std::min<OutEigenType>(out_val, QMax);
+                // Treat the output values as unsigned if `output_unsigned` is true.
+                int32_t clipped_val = (output_unsigned) ? applyClip<int32_t, uint32_t>(res_in_64, QMin_u, QMax_u)
+                                                        : applyClip<int32_t, int32_t>(res_in_64, QMin_s, QMax_s);
+
+                OutEigenType out_val = static_cast<OutEigenType>(clipped_val);
                 return out_val;
             });
         }
diff --git a/reference_model/src/ops/type_conversion.h b/reference_model/src/ops/type_conversion.h
index cf95f16..0636357 100644
--- a/reference_model/src/ops/type_conversion.h
+++ b/reference_model/src/ops/type_conversion.h
@@ -60,6 +60,15 @@ protected:
     TosaReference::TensorTemplate<TMultiplierI16>* multiplierI16;
     TosaReference::TensorTemplate<TMultiplierI32>* multiplierI32;
     TosaReference::TensorTemplate<TShift>* shift;
+
+    // The maximum value when interpreting OutDtype as a signed value
+    int32_t QMax_s;
+    // The minimum value when interpreting OutDtype as a signed value
+    int32_t QMin_s;
+    // The maximum value when interpreting OutDtype as an unsigned value
+    uint32_t QMax_u;
+    // The minimum value when interpreting OutDtype as an unsigned value
+    uint32_t QMin_u;
 };
 
 template <TOSA_REF_TYPE InDtype, TOSA_REF_TYPE OutDtype>
author	TatWai Chong <tatwai.chong@arm.com>	2024-04-02 15:45:29 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2024-04-08 17:09:38 +0000
commit	0dac6c90a1ed753d1018077be83941834f937601 (patch)
tree	a6c59981ac161d12df7a4deb65bca14905465d4d
parent	129201df8126a16abb0e4fbf7372354021f8a55d (diff)
download	reference_model-0dac6c90a1ed753d1018077be83941834f937601.tar.gz