From a62129a02397ba87171ebf4477795f628dcec0f6 Mon Sep 17 00:00:00 2001
From: Viet-Hoa Do <viet-hoa.do@arm.com>
Date: Wed, 26 Apr 2023 15:38:45 +0100
Subject: Fix fully connected and matmul mismatches

* There is an issue with quantized fully connected and matmul
  when the lower bound of bounded ReLU is negative.
* Use int32_t for the calculation of min/max quantized value
  rather than PixelValue to avoid this issue.

Partially resolves: COMPMID-5996
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I7b22e9d56a2441fc6a4c5c4e627f57d6e00d6ff1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9502
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/utils/quantization/AsymmHelpers.h | 12 ------
 src/core/utils/quantization/AsymmHelpers.cpp       | 25 ++++++-----
 src/core/utils/quantization/AsymmHelpers.h         | 49 ++++++++++++++++++++++
 src/cpu/operators/CpuFullyConnected.cpp            |  9 ++--
 src/cpu/operators/CpuMatMul.cpp                    |  9 ++--
 tests/validation/fixtures/MatMulFixture.h          | 16 +++----
 6 files changed, 80 insertions(+), 40 deletions(-)
 create mode 100644 src/core/utils/quantization/AsymmHelpers.h
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index 9e1e9668fb..a9041762f5 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -82,18 +82,6 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
  */
 std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_type);
 
-/** Get minimum and maximum output of the activation function after quantization.
- *
- * Only ReLU, upper bounded ReLU and lower+upper bounded ReLU are supported.
- *
- * @param[in] q_info    Output quantization info.
- * @param[in] act_info  Activation function information.
- * @param[in] data_type Output data type (either QASYMM8 or QASYMM8_SIGNED).
- *
- * @return The minimum and maximum output of the activation function after quantization.
- */
-std::tuple<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type);
-
 /** Compute quantized per-channel multipliers and shifts. As many multipliers
  * and shifts as output channels are computed. If weights are not quantized
  * per-channel, multipliers and shifts will end up being the same for each
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index ba9a97aef9..f5b69c7a44 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -24,6 +24,7 @@
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/core/Helpers.h"
 #include "support/ToolchainSupport.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
 
 #include <cmath>
 #include <limits>
@@ -177,11 +178,15 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty
     return std::make_pair(min_quant_val, max_quant_val);
 }
 
-std::tuple<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
+std::tuple<int32_t, int32_t> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
 {
-    PixelValue type_min{};
-    PixelValue type_max{};
-    std::tie(type_min, type_max)         = get_min_max(data_type);
+    ARM_COMPUTE_ERROR_ON(data_type != DataType::QASYMM8 && data_type != DataType::QASYMM8_SIGNED);
+
+    const auto min_max = get_min_max(data_type);
+
+    int32_t type_min = std::get<0>(min_max).get<int32_t>();
+    int32_t type_max = std::get<1>(min_max).get<int32_t>();
+
     const UniformQuantizationInfo q_unif = q_info.uniform();
 
     if(act_info.enabled())
@@ -189,15 +194,15 @@ std::tuple<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const
         switch(act_info.activation())
         {
             case ActivationLayerInfo::ActivationFunction::RELU:
-                type_min = PixelValue(q_unif.offset);
+                type_min = q_unif.offset;
                 break;
             case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
-                type_min = PixelValue(q_unif.offset);
-                type_max = PixelValue(act_info.a(), data_type, q_info);
+                type_min = q_unif.offset;
+                type_max = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.a(), q_info) : quantize_qasymm8_signed(act_info.a(), q_info);
                 break;
             case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
-                type_min = PixelValue(act_info.b(), data_type, q_info);
-                type_max = PixelValue(act_info.a(), data_type, q_info);
+                type_min = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.b(), q_info) : quantize_qasymm8_signed(act_info.b(), q_info);
+                type_max = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.a(), q_info) : quantize_qasymm8_signed(act_info.a(), q_info);
                 break;
             default:
                 ARM_COMPUTE_ERROR("Activation function not supported.");
@@ -205,7 +210,7 @@ std::tuple<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const
         }
     }
 
-    return std::make_pair(type_min, type_max);
+    return std::make_tuple(type_min, type_max);
 }
 
 void compute_quantized_multipliers_and_shifts(const ITensorInfo *input,
diff --git a/src/core/utils/quantization/AsymmHelpers.h b/src/core/utils/quantization/AsymmHelpers.h
new file mode 100644
index 0000000000..f9701095cb
--- /dev/null
+++ b/src/core/utils/quantization/AsymmHelpers.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H
+#define ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace quantization {
+
+/** Get minimum and maximum output of the activation function after quantization.
+ *
+ * Only ReLU, upper bounded ReLU and lower+upper bounded ReLU are supported.
+ *
+ * @param[in] q_info    Output quantization info.
+ * @param[in] act_info  Activation function information.
+ * @param[in] data_type Output data type (either QASYMM8 or QASYMM8_SIGNED).
+ *
+ * @return The minimum and maximum output of the activation function after quantization.
+ */
+std::tuple<int32_t, int32_t> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type);
+
+} // namespace quantization
+} // namespace arm_compute
+
+#endif // ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H
diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp
index 460257902d..395d8d2aa5 100644
--- a/src/cpu/operators/CpuFullyConnected.cpp
+++ b/src/cpu/operators/CpuFullyConnected.cpp
@@ -32,6 +32,7 @@
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/MemoryHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
 #include "src/cpu/kernels/CpuTransposeKernel.h"
 #include "src/cpu/operators/CpuConvertFullyConnectedWeights.h"
 #include "src/cpu/operators/CpuFlatten.h"
@@ -63,16 +64,16 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo
 
     ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
 
-    PixelValue type_min{};
-    PixelValue type_max{};
+    int32_t type_min = 0;
+    int32_t type_max = 0;
     std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
 
     gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
     gemmlowp_output_stage_info.gemmlowp_shift      = output_shift;
     gemmlowp_output_stage_info.gemmlowp_offset     = oq_unif.offset;
     gemmlowp_output_stage_info.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
-    gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min.get<int32_t>();
-    gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max.get<int32_t>();
+    gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min;
+    gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max;
 
     return Status{};
 }
diff --git a/src/cpu/operators/CpuMatMul.cpp b/src/cpu/operators/CpuMatMul.cpp
index 369466b669..46858f4659 100644
--- a/src/cpu/operators/CpuMatMul.cpp
+++ b/src/cpu/operators/CpuMatMul.cpp
@@ -34,6 +34,7 @@
 #include "src/core/CPP/Validate.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/MemoryHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
 #include "src/cpu/utils/CpuAuxTensorHandler.h"
 
 using namespace arm_compute::experimental;
@@ -60,16 +61,16 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo
 
     ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
 
-    PixelValue type_min{};
-    PixelValue type_max{};
+    int32_t type_min = 0;
+    int32_t type_max = 0;
     std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
 
     gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
     gemmlowp_output_stage_info.gemmlowp_shift      = output_shift;
     gemmlowp_output_stage_info.gemmlowp_offset     = oq_unif.offset;
     gemmlowp_output_stage_info.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
-    gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min.get<int32_t>();
-    gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max.get<int32_t>();
+    gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min;
+    gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max;
 
     return Status{};
 }
diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h
index f8f038af3f..15719024b1 100644
--- a/tests/validation/fixtures/MatMulFixture.h
+++ b/tests/validation/fixtures/MatMulFixture.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/reference/ActivationLayer.h"
 #include "tests/validation/reference/GEMM.h"
@@ -162,16 +163,16 @@ protected:
 
     template <typename TT>
     typename std::enable_if<!std::is_integral<TT>::value, SimpleTensor<TT>>::type
-    compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo)
+    compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const QuantizationInfo &o_qinfo)
     {
-        ARM_COMPUTE_UNUSED(act_info, o_qinfo);
+        ARM_COMPUTE_UNUSED(o_qinfo);
 
         return reference::gemm(a, b, c, alpha, beta);
     }
 
     template <typename TT>
     typename std::enable_if<std::is_integral<TT>::value, SimpleTensor<TT>>::type
-    compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo)
+    compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const QuantizationInfo &o_qinfo)
     {
         ARM_COMPUTE_UNUSED(alpha, beta);
 
@@ -187,17 +188,12 @@ protected:
         std::vector<int32_t> output_multipliers{ output_multiplier };
         std::vector<int32_t> output_shifts{ output_shift };
 
-        PixelValue output_min{};
-        PixelValue output_max{};
-        std::tie(output_min, output_max) = quantization::get_quantized_asymmetric_output_min_max(
-            o_qinfo, act_info, a.data_type());
-
         const auto tmp = reference::gemmlowp_matrix_multiply_core<int32_t>(
             a, b, c.shape(), aq.offset, bq.offset);
 
         auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TT>(
             tmp, output_multipliers, output_shifts, oq.offset,
-            output_min.get<int32_t>(), output_max.get<int32_t>());
+            std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max());
         output.quantization_info(o_qinfo);
 
         return output;
@@ -253,7 +249,7 @@ protected:
         // Setting beta to 0 will effectively disable C for the
         // computation of the reference: alpha * A * B + 0 * C
         // Use transposed tensors if boolean enabled else use original tensors
-        auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, act_info, o_qinfo);
+        auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, o_qinfo);
 
         result = reference::activation_layer<T>(result, act_info, o_qinfo);
 
-- 
cgit v1.2.1