aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-08-10 11:56:49 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-08-17 11:35:03 +0000
commit29db3d293302254b80e82651895d997ca145142a (patch)
tree606d1193e7f92623e0af97aea513106ec468bff0 /src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp
parente54d8c07e75d70baeb80fecbb43088027ea45658 (diff)
downloadComputeLibrary-29db3d293302254b80e82651895d997ca145142a.tar.gz
Add LUT for quantized sigmoid function
* Move LUT implementation to a seperate file. It will be used for both QASYMM8 and QASYMM8_SIGNED. * Fix wrong constant value related to QASYMM8_SIGNED leaky ReLU in 32-bit build. Resolves: COMPMID-5464 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I2b24d52409a38f1b66fd532f431eff8a9e4547b6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8066 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp10
1 files changed, 7 insertions, 3 deletions
diff --git a/src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp
index 4dca1ba794..d7c982e414 100644
--- a/src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp
+++ b/src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,9 +56,9 @@ void neon_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const Acti
const qasymm8_signed_t b = quantize_qasymm8_signed(act_info.b(), qi_in);
const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
#ifndef __aarch64__
- const auto vconst_0_f32 = vdupq_n_f32(1.f);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const auto vconst_0_f32 = vdupq_n_f32(0.f);
#endif // __aarch64__
const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
@@ -108,6 +108,7 @@ void neon_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const Acti
// Re-quantize to new output space
tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
}
+#ifndef __aarch64__ // LUT-based implementation is used for aarch64 instead.
else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
{
// De-quantize
@@ -125,6 +126,7 @@ void neon_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const Acti
// Re-quantize to new output space
tmp = vquantize_signed(tmp_dep, qi_out);
}
+#endif // __aarch64__
else if(act == ActivationLayerInfo::ActivationFunction::TANH)
{
// De-quantize
@@ -224,12 +226,14 @@ void neon_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const Acti
tmp = std::min(a, std::max(b, in));
tmp = utility::clamp<int32_t, qasymm8_signed_t>(tmp * s + o);
}
+#ifndef __aarch64__ // LUT-based implementation is used for aarch64 instead.
else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
{
float tmp_f = dequantize_qasymm8_signed(in, qi_in);
tmp_f = 1.f / (1.f + std::exp(-tmp_f));
tmp = quantize_qasymm8_signed(tmp_f, qi_out);
}
+#endif // __aarch64__
else if(act == ActivationLayerInfo::ActivationFunction::TANH)
{
float tmp_f = dequantize_qasymm8_signed(in, qi_in);