From 433ea4981675b64c44c8f47f2f4aac6bfcbfc911 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 26 May 2021 15:32:50 +0100 Subject: Optimize int8 arithmetic addition on CPU Avoid accessing quantization info from TensorInfo in leftover loop. Use the already available UniformQuantizationInfo instead Create another version of the quantize utility function which assumes RoundingPolicy::TO_NEAREST_UP. This allows us to call std::lround() and avoid some overhead Resolve COMPMID-4546 Signed-off-by: Giorgio Arena Change-Id: Ib481a586f879b7e937e3d54ba11100d0a37ef277 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5722 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- arm_compute/core/QuantizationInfo.h | 28 +++++++++++++++++++++++---- src/core/cpu/kernels/add/neon/qasymm8.cpp | 2 +- support/ToolchainSupport.h | 32 ++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index af7b8c66d8..b331f7d923 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_QUANTIZATION_INFO_H #define ARM_COMPUTE_QUANTIZATION_INFO_H +#include "arm_compute/core/Error.h" #include "arm_compute/core/Rounding.h" +#include "support/ToolchainSupport.h" #include "utils/misc/Utility.h" -#include "arm_compute/core/Error.h" #include #include @@ -215,15 +216,34 @@ struct Qasymm8QuantizationHelper "quantized type should be either uint8_t or int8_t."); /** Quantize a value given a 8-bit asymmetric quantization scheme + * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * + * @return Quantized value + */ + static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo) + { + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); + const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset; + return static_cast(arm_compute::utility::clamp(quantized)); + } + + /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy * * @param[in] value Value to quantize * @param[in] qinfo Quantization information to use for quantizing - * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up + * @param[in] rounding_policy Rounding policy to use * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy) { + if(rounding_policy == RoundingPolicy::TO_NEAREST_UP) + { + return quantize(value, qinfo); + } + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; return static_cast(arm_compute::utility::clamp(quantized)); diff --git a/src/core/cpu/kernels/add/neon/qasymm8.cpp b/src/core/cpu/kernels/add/neon/qasymm8.cpp index cc97f0067c..e357a7ef7f 100644 --- a/src/core/cpu/kernels/add/neon/qasymm8.cpp +++ b/src/core/cpu/kernels/add/neon/qasymm8.cpp @@ -199,7 +199,7 @@ void add_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, co { const float afs = static_cast((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; const float bfs = static_cast((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; - *(output_ptr + x) = quantize_qasymm8((afs + bfs), dst->info()->quantization_info()); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, input1, input2, output); diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h index e79084a629..d8c14411e8 100644 --- a/support/ToolchainSupport.h +++ b/support/ToolchainSupport.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -72,6 +72,21 @@ inline T round(T value) return ::round(value); } +/** Round floating-point value with half value rounding away from zero and cast to long + * + * @note This function implements the same behaviour as std::lround except that it doesn't + * support Integral type. The latter is not in the namespace std in some Android toolchains. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value casted to long + */ +template ::value>::type> +inline long lround(T value) +{ + return ::lround(value); +} + /** Truncate floating-point value. * * @note This function implements the same behaviour as std::truncate except that it doesn't @@ -172,6 +187,21 @@ inline T round(T value) return (value < 0.f) ? static_cast(value - 0.5f) : static_cast(value + 0.5f); } +/** Round floating-point value with half value rounding away from zero and cast to long + * + * @note This function implements the same behaviour as std::lround except that it doesn't + * support Integral type. The latter is not in the namespace std in some Android toolchains. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value casted to long + */ +template ::value>::type> +inline long lround(T value) +{ + return std::lround(value); +} + /** Truncate floating-point value. * * @note This function implements the same behaviour as std::truncate except that it doesn't -- cgit v1.2.1