From 700b913ed9257c44147372378bc8a0dadcfd2ac2 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 22 Jun 2022 11:23:53 +0100 Subject: Select neon LUT Hard-Swish kernel on all devices * Resolves COMPMID-5211 Change-Id: I560ab2992c6089774c7ebee3538847905521607d Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7840 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Viet-Hoa Do --- src/cpu/kernels/CpuActivationKernel.cpp | 10 +++++----- src/cpu/kernels/activation/generic/sve2/qasymm8.cpp | 18 +----------------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 2ff54cd6e7..330de1ece2 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -45,6 +45,11 @@ namespace { static const std::vector available_kernels = { + { // neon LUT implementantion of HARD_SWISH takes precedence + "neon_qu8_activation_hardswish_lut", + [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f == ActivationLayerInfo::ActivationFunction::HARD_SWISH; }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_hardswish_lut) + }, { "sve2_qu8_activation", [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; }, @@ -85,11 +90,6 @@ static const std::vector available_kernel [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f != ActivationLayerInfo::ActivationFunction::HARD_SWISH; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation) }, - { - "neon_qu8_activation_hardswish", - [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f == ActivationLayerInfo::ActivationFunction::HARD_SWISH; }, - REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_hardswish_lut) - }, { "neon_qs8_activation", [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, diff --git a/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp b/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp index 2fa8dee5f1..928a414fb0 100644 --- a/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp +++ b/src/cpu/kernels/activation/generic/sve2/qasymm8.cpp @@ -57,10 +57,7 @@ void sve2_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL const auto vconst_1 = svdup_n_f32(1.f); const auto va_f32 = svdup_n_f32(act_info.a()); const auto vb_f32 = svdup_n_f32(act_info.b()); - const auto const_6_f32 = svdup_n_f32(6.f); - const auto const_0_f32 = svdup_n_f32(0.f); - const auto const_3_f32 = svdup_n_f32(3.f); - const auto const_inv_6_f32 = svdup_n_f32(0.166666667f); + // Initialise scale/offset for re-quantization bool requant = true; @@ -146,19 +143,6 @@ void sve2_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL // Re-quantize to new output space tmp = svquantize_z(pg, tmp_dep, qi_out); } - else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH) - { - // De-quantize - const auto vin_deq = svdequantize_z(pg, vin, qi_in); - // Perform activation - const svfloat32x4_t tmp_dep = svcreate4_f32(svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, - svget4_f32(vin_deq, 0), const_3_f32))))), - svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))), - svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))), - svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32)))))); - // Re-quantize to new output space - tmp = svquantize_z(pg, tmp_dep, qi_out); - } else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU) { svbool_t p0, p1, p2, p3; -- cgit v1.2.1