aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-11-25 11:46:03 +0000
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-12-02 10:18:46 +0000
commitadd8e815ea94c8f8e6b1c9faf18527695f1332ec (patch)
tree5941064344b426d12bc76b2fba3d0c631e796088 /src/core/NEON/kernels
parent4ffc42afafc8e6eee9917ac27b4bc510973335bf (diff)
downloadComputeLibrary-add8e815ea94c8f8e6b1c9faf18527695f1332ec.tar.gz
COMPMID-3862: Add support QASYMM8 LEAKY RELU activation
- LEAKY RELU activation is supported for QASYMM8 data type - vquantize on NEON side has been modified to match with other backends (OpenCL and reference) Change-Id: I194631225c8d4f3cc96027d64812ec2be2b4328a Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4593 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp5
-rw-r--r--src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp81
-rw-r--r--src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp81
3 files changed, 131 insertions, 36 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 51257cb96b..d969fd8e38 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -110,7 +110,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::LOGISTIC,
ActivationLayerInfo::ActivationFunction::TANH,
- ActivationLayerInfo::ActivationFunction::HARD_SWISH
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH,
+ ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
};
const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
{
@@ -123,7 +124,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
- "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
+ "For QASYMM8 only hard swish, leaky relu, tanh, logistic, relu and lower/upper bounded relu are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
"For QSYMM16 only tanh and logistic are supported");
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp
index 8a398fb531..7b26441824 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp
@@ -51,23 +51,26 @@ void qasymm8_neon_activation(const ITensor *src, ITensor *dst, const ActivationL
Iterator input(src, win_collapsed);
Iterator output(dst, win_collapsed);
- const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
- const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(act_info.a(), qi_in));
- const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(act_info.b(), qi_in));
- const qasymm8_t a = quantize_qasymm8(act_info.a(), qi_in);
- const qasymm8_t b = quantize_qasymm8(act_info.b(), qi_in);
- const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
- const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
- const float a_f32 = act_info.a();
- const float b_f32 = act_info.b();
- const auto const_6_f32 = vdupq_n_f32(6.f);
- const auto const_0_f32 = vdupq_n_f32(0.f);
- const auto const_3_f32 = vdupq_n_f32(3.f);
- const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
+ const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+#ifndef __aarch64__
+ const auto vconst_0_f32 = vdupq_n_f32(0);
+#endif // __aarch64__
+ const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
+ const float a_f32 = act_info.a();
+ const float b_f32 = act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -159,6 +162,44 @@ void qasymm8_neon_activation(const ITensor *src, ITensor *dst, const ActivationL
// Re-quantize to new output space
tmp = vquantize(tmp_dep, qi_out);
}
+ else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ {
+ const auto vin_deq = vdequantize(vin, qi_in);
+
+#ifdef __aarch64__
+ const uint32x4x4_t pos_mask =
+ {
+ {
+ wrapper::vcgtz(vin_deq.val[0]),
+ wrapper::vcgtz(vin_deq.val[1]),
+ wrapper::vcgtz(vin_deq.val[2]),
+ wrapper::vcgtz(vin_deq.val[3]),
+ }
+ };
+#else // __aarch64__
+ const uint32x4x4_t pos_mask =
+ {
+ {
+ wrapper::vcgt(vin_deq.val[0], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[1], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[2], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[3], vconst_0_f32),
+ }
+ };
+#endif // __aarch64__
+
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vbsl(pos_mask.val[0], vin_deq.val[0], wrapper::vmul(va_f32, vin_deq.val[0])),
+ wrapper::vbsl(pos_mask.val[1], vin_deq.val[1], wrapper::vmul(va_f32, vin_deq.val[1])),
+ wrapper::vbsl(pos_mask.val[2], vin_deq.val[2], wrapper::vmul(va_f32, vin_deq.val[2])),
+ wrapper::vbsl(pos_mask.val[3], vin_deq.val[3], wrapper::vmul(va_f32, vin_deq.val[3])),
+ }
+ };
+
+ tmp = vquantize(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -204,6 +245,12 @@ void qasymm8_neon_activation(const ITensor *src, ITensor *dst, const ActivationL
tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+ else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = tmp_f > 0 ? tmp_f : tmp_f * a_f32;
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp
index bfab07c8e3..c616c5e27d 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp
@@ -50,23 +50,26 @@ void qasymm8_signed_neon_activation(const ITensor *src, ITensor *dst, const Acti
Iterator input(src, win_collapsed);
Iterator output(dst, win_collapsed);
- const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
- const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(act_info.a(), qi_in));
- const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(act_info.b(), qi_in));
- const qasymm8_signed_t a = quantize_qasymm8_signed(act_info.a(), qi_in);
- const qasymm8_signed_t b = quantize_qasymm8_signed(act_info.b(), qi_in);
- const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
- const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
- const float a_f32 = act_info.a();
- const float b_f32 = act_info.b();
- const auto const_6_f32 = vdupq_n_f32(6.f);
- const auto const_0_f32 = vdupq_n_f32(0.f);
- const auto const_3_f32 = vdupq_n_f32(3.f);
- const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
+ const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
+ const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(act_info.a(), qi_in));
+ const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(act_info.b(), qi_in));
+ const qasymm8_signed_t a = quantize_qasymm8_signed(act_info.a(), qi_in);
+ const qasymm8_signed_t b = quantize_qasymm8_signed(act_info.b(), qi_in);
+ const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
+ const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+#ifndef __aarch64__
+ const auto vconst_0_f32 = vdupq_n_f32(1.f);
+#endif // __aarch64__
+ const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
+ const float a_f32 = act_info.a();
+ const float b_f32 = act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -158,6 +161,44 @@ void qasymm8_signed_neon_activation(const ITensor *src, ITensor *dst, const Acti
// Re-quantize to new output space
tmp = vquantize_signed(tmp_dep, qi_out);
}
+ else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ {
+ const auto vin_deq = vdequantize(vin, qi_in);
+
+#ifdef __aarch64__
+ const uint32x4x4_t pos_mask =
+ {
+ {
+ wrapper::vcgtz(vin_deq.val[0]),
+ wrapper::vcgtz(vin_deq.val[1]),
+ wrapper::vcgtz(vin_deq.val[2]),
+ wrapper::vcgtz(vin_deq.val[3]),
+ }
+ };
+#else // __aarch64__
+ const uint32x4x4_t pos_mask =
+ {
+ {
+ wrapper::vcgt(vin_deq.val[0], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[1], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[2], vconst_0_f32),
+ wrapper::vcgt(vin_deq.val[3], vconst_0_f32),
+ }
+ };
+#endif // __aarch64__
+
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vbsl(pos_mask.val[0], vin_deq.val[0], wrapper::vmul(va_f32, vin_deq.val[0])),
+ wrapper::vbsl(pos_mask.val[1], vin_deq.val[1], wrapper::vmul(va_f32, vin_deq.val[1])),
+ wrapper::vbsl(pos_mask.val[2], vin_deq.val[2], wrapper::vmul(va_f32, vin_deq.val[2])),
+ wrapper::vbsl(pos_mask.val[3], vin_deq.val[3], wrapper::vmul(va_f32, vin_deq.val[3])),
+ }
+ };
+
+ tmp = vquantize_signed(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -203,6 +244,12 @@ void qasymm8_signed_neon_activation(const ITensor *src, ITensor *dst, const Acti
tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
tmp = quantize_qasymm8_signed(tmp_f, qi_out);
}
+ else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ {
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ tmp_f = tmp_f > 0 ? tmp_f : tmp_f * a_f32;
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");