aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp10
-rw-r--r--src/cpu/kernels/activation/generic/sve2/qasymm8.cpp18
2 files changed, 6 insertions, 22 deletions
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 2ff54cd6e7..330de1ece2 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -45,6 +45,11 @@ namespace
{
static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels =
{
+ { // neon LUT implementantion of HARD_SWISH takes precedence
+ "neon_qu8_activation_hardswish_lut",
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f == ActivationLayerInfo::ActivationFunction::HARD_SWISH; },
+ REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_hardswish_lut)
+ },
{
"sve2_qu8_activation",
[](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; },
@@ -86,11 +91,6 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation)
},
{
- "neon_qu8_activation_hardswish",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f == ActivationLayerInfo::ActivationFunction::HARD_SWISH; },
- REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_hardswish_lut)
- },
- {
"neon_qs8_activation",
[](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_activation)
diff --git a/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp b/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp
index 2fa8dee5f1..928a414fb0 100644
--- a/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp
+++ b/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp
@@ -57,10 +57,7 @@ void sve2_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
const auto vconst_1 = svdup_n_f32(1.f);
const auto va_f32 = svdup_n_f32(act_info.a());
const auto vb_f32 = svdup_n_f32(act_info.b());
- const auto const_6_f32 = svdup_n_f32(6.f);
- const auto const_0_f32 = svdup_n_f32(0.f);
- const auto const_3_f32 = svdup_n_f32(3.f);
- const auto const_inv_6_f32 = svdup_n_f32(0.166666667f);
+
// Initialise scale/offset for re-quantization
bool requant = true;
@@ -146,19 +143,6 @@ void sve2_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
// Re-quantize to new output space
tmp = svquantize_z(pg, tmp_dep, qi_out);
}
- else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
- {
- // De-quantize
- const auto vin_deq = svdequantize_z(pg, vin, qi_in);
- // Perform activation
- const svfloat32x4_t tmp_dep = svcreate4_f32(svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg,
- svget4_f32(vin_deq, 0), const_3_f32))))),
- svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
- svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
- svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))));
- // Re-quantize to new output space
- tmp = svquantize_z(pg, tmp_dep, qi_out);
- }
else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
{
svbool_t p0, p1, p2, p3;