diff options
author | morgolock <pablo.tello@arm.com> | 2020-02-28 15:38:28 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-03-05 12:29:45 +0000 |
commit | aa85cdf22802cb892d7fa422ca505a43d84adb38 (patch) | |
tree | cf98d5720e1b1cb15bec77e1329afe635c5c3c94 /src/core | |
parent | 740872e076ccecc24151e170be1ef065cee598e4 (diff) | |
download | ComputeLibrary-aa85cdf22802cb892d7fa422ca505a43d84adb38.tar.gz |
COMPMID-3079: Implement Hard-Swish in NEON
* Support for quantized tensors
Change-Id: I47c011a4a0b92dbadd733eff7ce283a19c5d0e94
Signed-off-by: morgolock <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2823
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 121 |
1 files changed, 91 insertions, 30 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index a0bf9e8010..5251209463 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -50,18 +50,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = + const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; - static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = + const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = { ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; const DataType data_type = input->data_type(); const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info(); @@ -191,6 +193,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_signed_t> }, + }; // Activation functions : QASYMM8 @@ -202,6 +206,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_t> }, + }; // Activation functions : QSYMM16 @@ -209,6 +215,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> }, + }; switch(input->info()->data_type()) @@ -403,19 +410,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); - const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); - const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); - const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); - const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -490,6 +501,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat // Re-quantize to new output space tmp = vquantize(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -529,6 +557,12 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -553,19 +587,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); - const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); - const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); - const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); - const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); - const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); + const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); + const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); + const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); + const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); + const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -640,6 +678,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE // Re-quantize to new output space tmp = vquantize_signed(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize_signed(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -679,6 +734,12 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8_signed(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8_signed(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8_signed(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); |