From a62129a02397ba87171ebf4477795f628dcec0f6 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 26 Apr 2023 15:38:45 +0100 Subject: Fix fully connected and matmul mismatches * There is an issue with quantized fully connected and matmul when the lower bound of bounded ReLU is negative. * Use int32_t for the calculation of min/max quantized value rather than PixelValue to avoid this issue. Partially resolves: COMPMID-5996 Signed-off-by: Viet-Hoa Do Change-Id: I7b22e9d56a2441fc6a4c5c4e627f57d6e00d6ff1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9502 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/utils/quantization/AsymmHelpers.h | 12 ------ src/core/utils/quantization/AsymmHelpers.cpp | 25 ++++++----- src/core/utils/quantization/AsymmHelpers.h | 49 ++++++++++++++++++++++ src/cpu/operators/CpuFullyConnected.cpp | 9 ++-- src/cpu/operators/CpuMatMul.cpp | 9 ++-- tests/validation/fixtures/MatMulFixture.h | 16 +++---- 6 files changed, 80 insertions(+), 40 deletions(-) create mode 100644 src/core/utils/quantization/AsymmHelpers.h diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index 9e1e9668fb..a9041762f5 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -82,18 +82,6 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, */ std::pair get_min_max_values_from_quantized_data_type(DataType data_type); -/** Get minimum and maximum output of the activation function after quantization. - * - * Only ReLU, upper bounded ReLU and lower+upper bounded ReLU are supported. - * - * @param[in] q_info Output quantization info. - * @param[in] act_info Activation function information. - * @param[in] data_type Output data type (either QASYMM8 or QASYMM8_SIGNED). - * - * @return The minimum and maximum output of the activation function after quantization. - */ -std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type); - /** Compute quantized per-channel multipliers and shifts. As many multipliers * and shifts as output channels are computed. If weights are not quantized * per-channel, multipliers and shifts will end up being the same for each diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index ba9a97aef9..f5b69c7a44 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/core/Helpers.h" #include "support/ToolchainSupport.h" +#include "src/core/utils/quantization/AsymmHelpers.h" #include #include @@ -177,11 +178,15 @@ std::pair get_min_max_values_from_quantized_data_type(DataType data_ty return std::make_pair(min_quant_val, max_quant_val); } -std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type) +std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type) { - PixelValue type_min{}; - PixelValue type_max{}; - std::tie(type_min, type_max) = get_min_max(data_type); + ARM_COMPUTE_ERROR_ON(data_type != DataType::QASYMM8 && data_type != DataType::QASYMM8_SIGNED); + + const auto min_max = get_min_max(data_type); + + int32_t type_min = std::get<0>(min_max).get(); + int32_t type_max = std::get<1>(min_max).get(); + const UniformQuantizationInfo q_unif = q_info.uniform(); if(act_info.enabled()) @@ -189,15 +194,15 @@ std::tuple get_quantized_asymmetric_output_min_max(const switch(act_info.activation()) { case ActivationLayerInfo::ActivationFunction::RELU: - type_min = PixelValue(q_unif.offset); + type_min = q_unif.offset; break; case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: - type_min = PixelValue(q_unif.offset); - type_max = PixelValue(act_info.a(), data_type, q_info); + type_min = q_unif.offset; + type_max = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.a(), q_info) : quantize_qasymm8_signed(act_info.a(), q_info); break; case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: - type_min = PixelValue(act_info.b(), data_type, q_info); - type_max = PixelValue(act_info.a(), data_type, q_info); + type_min = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.b(), q_info) : quantize_qasymm8_signed(act_info.b(), q_info); + type_max = (data_type == DataType::QASYMM8) ? quantize_qasymm8(act_info.a(), q_info) : quantize_qasymm8_signed(act_info.a(), q_info); break; default: ARM_COMPUTE_ERROR("Activation function not supported."); @@ -205,7 +210,7 @@ std::tuple get_quantized_asymmetric_output_min_max(const } } - return std::make_pair(type_min, type_max); + return std::make_tuple(type_min, type_max); } void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, diff --git a/src/core/utils/quantization/AsymmHelpers.h b/src/core/utils/quantization/AsymmHelpers.h new file mode 100644 index 0000000000..f9701095cb --- /dev/null +++ b/src/core/utils/quantization/AsymmHelpers.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H +#define ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace quantization { + +/** Get minimum and maximum output of the activation function after quantization. + * + * Only ReLU, upper bounded ReLU and lower+upper bounded ReLU are supported. + * + * @param[in] q_info Output quantization info. + * @param[in] act_info Activation function information. + * @param[in] data_type Output data type (either QASYMM8 or QASYMM8_SIGNED). + * + * @return The minimum and maximum output of the activation function after quantization. + */ +std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type); + +} // namespace quantization +} // namespace arm_compute + +#endif // ACL_SRC_CORE_UTILS_QUANTIZATION_ASYMMHELPERS_H diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp index 460257902d..395d8d2aa5 100644 --- a/src/cpu/operators/CpuFullyConnected.cpp +++ b/src/cpu/operators/CpuFullyConnected.cpp @@ -32,6 +32,7 @@ #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/MemoryHelpers.h" +#include "src/core/utils/quantization/AsymmHelpers.h" #include "src/cpu/kernels/CpuTransposeKernel.h" #include "src/cpu/operators/CpuConvertFullyConnectedWeights.h" #include "src/cpu/operators/CpuFlatten.h" @@ -63,16 +64,16 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); - PixelValue type_min{}; - PixelValue type_max{}; + int32_t type_min = 0; + int32_t type_max = 0; std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type); gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier; gemmlowp_output_stage_info.gemmlowp_shift = output_shift; gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset; gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get(); - gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get(); + gemmlowp_output_stage_info.gemmlowp_min_bound = type_min; + gemmlowp_output_stage_info.gemmlowp_max_bound = type_max; return Status{}; } diff --git a/src/cpu/operators/CpuMatMul.cpp b/src/cpu/operators/CpuMatMul.cpp index 369466b669..46858f4659 100644 --- a/src/cpu/operators/CpuMatMul.cpp +++ b/src/cpu/operators/CpuMatMul.cpp @@ -34,6 +34,7 @@ #include "src/core/CPP/Validate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/MemoryHelpers.h" +#include "src/core/utils/quantization/AsymmHelpers.h" #include "src/cpu/utils/CpuAuxTensorHandler.h" using namespace arm_compute::experimental; @@ -60,16 +61,16 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); - PixelValue type_min{}; - PixelValue type_max{}; + int32_t type_min = 0; + int32_t type_max = 0; std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type); gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier; gemmlowp_output_stage_info.gemmlowp_shift = output_shift; gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset; gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get(); - gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get(); + gemmlowp_output_stage_info.gemmlowp_min_bound = type_min; + gemmlowp_output_stage_info.gemmlowp_max_bound = type_max; return Status{}; } diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h index f8f038af3f..15719024b1 100644 --- a/tests/validation/fixtures/MatMulFixture.h +++ b/tests/validation/fixtures/MatMulFixture.h @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/utils/quantization/AsymmHelpers.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/GEMM.h" @@ -162,16 +163,16 @@ protected: template typename std::enable_if::value, SimpleTensor>::type - compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const QuantizationInfo &o_qinfo) { - ARM_COMPUTE_UNUSED(act_info, o_qinfo); + ARM_COMPUTE_UNUSED(o_qinfo); return reference::gemm(a, b, c, alpha, beta); } template typename std::enable_if::value, SimpleTensor>::type - compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const QuantizationInfo &o_qinfo) { ARM_COMPUTE_UNUSED(alpha, beta); @@ -187,17 +188,12 @@ protected: std::vector output_multipliers{ output_multiplier }; std::vector output_shifts{ output_shift }; - PixelValue output_min{}; - PixelValue output_max{}; - std::tie(output_min, output_max) = quantization::get_quantized_asymmetric_output_min_max( - o_qinfo, act_info, a.data_type()); - const auto tmp = reference::gemmlowp_matrix_multiply_core( a, b, c.shape(), aq.offset, bq.offset); auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint( tmp, output_multipliers, output_shifts, oq.offset, - output_min.get(), output_max.get()); + std::numeric_limits::lowest(), std::numeric_limits::max()); output.quantization_info(o_qinfo); return output; @@ -253,7 +249,7 @@ protected: // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors - auto result = compute_reference_gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, act_info, o_qinfo); + auto result = compute_reference_gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, o_qinfo); result = reference::activation_layer(result, act_info, o_qinfo); -- cgit v1.2.1