From aa85cdf22802cb892d7fa422ca505a43d84adb38 Mon Sep 17 00:00:00 2001 From: morgolock Date: Fri, 28 Feb 2020 15:38:28 +0000 Subject: COMPMID-3079: Implement Hard-Swish in NEON * Support for quantized tensors Change-Id: I47c011a4a0b92dbadd733eff7ce283a19c5d0e94 Signed-off-by: morgolock Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2823 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 121 ++++++++++++++++------ 1 file changed, 91 insertions(+), 30 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index a0bf9e8010..5251209463 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -50,18 +50,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - static std::set qasymm8_supported_activations = + const static std::set qasymm8_supported_activations = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; - static std::set qsymm16_supported_activations = + const static std::set qsymm16_supported_activations = { ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; const DataType data_type = input->data_type(); const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info(); @@ -191,6 +193,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation }, + }; // Activation functions : QASYMM8 @@ -202,6 +206,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation }, + }; // Activation functions : QSYMM16 @@ -209,6 +215,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, + }; switch(input->info()->data_type()) @@ -403,19 +410,23 @@ typename std::enable_if::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); - const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); - const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); - const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); - const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -490,6 +501,23 @@ typename std::enable_if::value, void>::type NEActivat // Re-quantize to new output space tmp = vquantize(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -529,6 +557,12 @@ typename std::enable_if::value, void>::type NEActivat tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -553,19 +587,23 @@ typename std::enable_if::value, void>::type NE Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); - const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); - const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); - const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); - const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); - const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); + const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); + const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); + const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); + const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); + const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -640,6 +678,23 @@ typename std::enable_if::value, void>::type NE // Re-quantize to new output space tmp = vquantize_signed(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize_signed(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -679,6 +734,12 @@ typename std::enable_if::value, void>::type NE tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8_signed(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8_signed(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8_signed(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); -- cgit v1.2.1