From 0dac6c90a1ed753d1018077be83941834f937601 Mon Sep 17 00:00:00 2001 From: TatWai Chong Date: Tue, 2 Apr 2024 15:45:29 -0700 Subject: Fix the wrong QMax and QMin type assignment in rescale op Signed integer type is used to retain QMax and QMin no matter what the value of `output_unsigned` is, but the value of QMax and QMin are unsigned integer when output_unsigned is true. Also add a handful of arithmetic helpers to align the pseudo code. Change-Id: Ie3cd444a290ddae08884186cd4b349a88acad032 Signed-off-by: TatWai Chong --- reference_model/src/arith_util.h | 46 ++++++++++++++++++++++++++++++ reference_model/src/ops/type_conversion.cc | 28 ++++++++++++------ reference_model/src/ops/type_conversion.h | 9 ++++++ 3 files changed, 74 insertions(+), 9 deletions(-) diff --git a/reference_model/src/arith_util.h b/reference_model/src/arith_util.h index fee9fef..fa6d136 100644 --- a/reference_model/src/arith_util.h +++ b/reference_model/src/arith_util.h @@ -317,4 +317,50 @@ int32_t getUnsignedMinimum() return 0; } +template +T applyMax(T a, T b) +{ + if (std::is_floating_point::value) + { + if (std::isnan(a) || std::isnan(b)) + { + return NAN; + } + } + return (a >= b) ? a : b; +} + +template +T applyMin(T a, T b) +{ + if (std::is_floating_point::value) + { + if (std::isnan(a) || std::isnan(b)) + { + return NAN; + } + } + return (a < b) ? a : b; +} + +// Clip the input value of type T into the range [min, max] of type U, and return the result as type T. +template +T applyClip(T value, U min_val, U max_val) +{ + assert(min_val <= max_val); + assert(sizeof(T) == sizeof(U)); + + value = applyMax(value, min_val); + + // Handle the numbers of an unsigned type U that becomes unrepresentable when type casting to signed. + if (std::is_signed_v && std::is_unsigned_v && max_val > std::numeric_limits::max()) + { + max_val = std::numeric_limits::max(); + } + + value = applyMin(value, max_val); + + return value; +} + #endif /* _ARITH_UTIL_H */ diff --git a/reference_model/src/ops/type_conversion.cc b/reference_model/src/ops/type_conversion.cc index 85f8c58..7bca697 100644 --- a/reference_model/src/ops/type_conversion.cc +++ b/reference_model/src/ops/type_conversion.cc @@ -37,6 +37,11 @@ OpRescale::OpRescale(SubgraphTraverser* sgt_, TosaAttri { setRequiredOperands(3, 1); INIT_ATTRIBUTE(Rescale); + + QMax_s = getSignedMaximum(); + QMin_s = getSignedMinimum(); + QMax_u = getUnsignedMaximum(); + QMin_u = getUnsignedMinimum(); } template @@ -155,9 +160,6 @@ int OpRescale::eval() bool input_unsigned = attribute->input_unsigned(); bool output_unsigned = attribute->output_unsigned(); - int32_t QMin = output_unsigned ? getUnsignedMinimum() : getSignedMinimum(); - int32_t QMax = output_unsigned ? getUnsignedMaximum() : getSignedMaximum(); - // reshape [d0, d1, ..., dn] into [d0 * d1 ..., dn] Eigen::array shape_2d; shape_2d[0] = 1; @@ -270,18 +272,24 @@ int OpRescale::eval() { output_zp_extended = static_cast(output_zp); } + int64_t res_in_64 = static_cast(scaled) + output_zp_extended; int64_t i32_max_in_64 = static_cast(std::numeric_limits::max()); int64_t i32_min_in_64 = static_cast(std::numeric_limits::min()); + if (res_in_64 > i32_max_in_64 || res_in_64 < i32_min_in_64) { std::string desc = "scaling result [" + std::to_string(scaled) + "] plus output_zp [" + std::to_string(output_zp) + "] not in i32 range"; throw desc; } - OutEigenType out_val = static_cast(res_in_64); - out_val = std::max(out_val, QMin); - out_val = std::min(out_val, QMax); + + // Treat the output values as unsigned if `output_unsigned` is true. + int32_t clipped_val = (output_unsigned) + ? applyClip(res_in_64, QMin_u, QMax_u) + : applyClip(res_in_64, QMin_s, QMax_s); + + OutEigenType out_val = static_cast(clipped_val); return out_val; }); @@ -370,9 +378,11 @@ int OpRescale::eval() throw desc; } - OutEigenType out_val = static_cast(res_in_64); - out_val = std::max(out_val, QMin); - out_val = std::min(out_val, QMax); + // Treat the output values as unsigned if `output_unsigned` is true. + int32_t clipped_val = (output_unsigned) ? applyClip(res_in_64, QMin_u, QMax_u) + : applyClip(res_in_64, QMin_s, QMax_s); + + OutEigenType out_val = static_cast(clipped_val); return out_val; }); } diff --git a/reference_model/src/ops/type_conversion.h b/reference_model/src/ops/type_conversion.h index cf95f16..0636357 100644 --- a/reference_model/src/ops/type_conversion.h +++ b/reference_model/src/ops/type_conversion.h @@ -60,6 +60,15 @@ protected: TosaReference::TensorTemplate* multiplierI16; TosaReference::TensorTemplate* multiplierI32; TosaReference::TensorTemplate* shift; + + // The maximum value when interpreting OutDtype as a signed value + int32_t QMax_s; + // The minimum value when interpreting OutDtype as a signed value + int32_t QMin_s; + // The maximum value when interpreting OutDtype as an unsigned value + uint32_t QMax_u; + // The minimum value when interpreting OutDtype as an unsigned value + uint32_t QMin_u; }; template -- cgit v1.2.1