aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-05-26 15:32:50 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-06-01 11:27:17 +0000
commit433ea4981675b64c44c8f47f2f4aac6bfcbfc911 (patch)
tree7f73bdb5f7c068bd2d6849ae615447e8ea93843f
parentb3be45759bdd0749ae3a16fe470820f0d9830ea9 (diff)
downloadComputeLibrary-433ea4981675b64c44c8f47f2f4aac6bfcbfc911.tar.gz
Optimize int8 arithmetic addition on CPU
Avoid accessing quantization info from TensorInfo in leftover loop. Use the already available UniformQuantizationInfo instead Create another version of the quantize utility function which assumes RoundingPolicy::TO_NEAREST_UP. This allows us to call std::lround() and avoid some overhead Resolve COMPMID-4546 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib481a586f879b7e937e3d54ba11100d0a37ef277 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5722 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/QuantizationInfo.h28
-rw-r--r--src/core/cpu/kernels/add/neon/qasymm8.cpp2
-rw-r--r--support/ToolchainSupport.h32
3 files changed, 56 insertions, 6 deletions
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index af7b8c66d8..b331f7d923 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
#define ARM_COMPUTE_QUANTIZATION_INFO_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Rounding.h"
+#include "support/ToolchainSupport.h"
#include "utils/misc/Utility.h"
-#include "arm_compute/core/Error.h"
#include <cstddef>
#include <type_traits>
@@ -216,14 +217,33 @@ struct Qasymm8QuantizationHelper
/** Quantize a value given a 8-bit asymmetric quantization scheme
*
+ * @param[in] value Value to quantize
+ * @param[in] qinfo Quantization information to use for quantizing
+ *
+ * @return Quantized value
+ */
+ static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
+ {
+ ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
+ const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset;
+ return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
+ }
+
+ /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
+ *
* @param[in] value Value to quantize
* @param[in] qinfo Quantization information to use for quantizing
- * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
+ * @param[in] rounding_policy Rounding policy to use
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+ static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
{
+ if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
+ {
+ return quantize(value, qinfo);
+ }
+
ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
diff --git a/src/core/cpu/kernels/add/neon/qasymm8.cpp b/src/core/cpu/kernels/add/neon/qasymm8.cpp
index cc97f0067c..e357a7ef7f 100644
--- a/src/core/cpu/kernels/add/neon/qasymm8.cpp
+++ b/src/core/cpu/kernels/add/neon/qasymm8.cpp
@@ -199,7 +199,7 @@ void add_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, co
{
const float afs = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale;
const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale;
- *(output_ptr + x) = quantize_qasymm8((afs + bfs), dst->info()->quantization_info());
+ *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info);
}
},
input1, input2, output);
diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h
index e79084a629..d8c14411e8 100644
--- a/support/ToolchainSupport.h
+++ b/support/ToolchainSupport.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,6 +72,21 @@ inline T round(T value)
return ::round(value);
}
+/** Round floating-point value with half value rounding away from zero and cast to long
+ *
+ * @note This function implements the same behaviour as std::lround except that it doesn't
+ * support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value casted to long
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline long lround(T value)
+{
+ return ::lround(value);
+}
+
/** Truncate floating-point value.
*
* @note This function implements the same behaviour as std::truncate except that it doesn't
@@ -172,6 +187,21 @@ inline T round(T value)
return (value < 0.f) ? static_cast<int>(value - 0.5f) : static_cast<int>(value + 0.5f);
}
+/** Round floating-point value with half value rounding away from zero and cast to long
+ *
+ * @note This function implements the same behaviour as std::lround except that it doesn't
+ * support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value casted to long
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline long lround(T value)
+{
+ return std::lround(value);
+}
+
/** Truncate floating-point value.
*
* @note This function implements the same behaviour as std::truncate except that it doesn't