diff options
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 121 | ||||
-rw-r--r-- | tests/validation/NEON/ActivationLayer.cpp | 7 |
2 files changed, 96 insertions, 32 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index a0bf9e8010..5251209463 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -50,18 +50,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = + const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; - static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = + const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = { ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH }; const DataType data_type = input->data_type(); const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info(); @@ -191,6 +193,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_signed_t> }, + }; // Activation functions : QASYMM8 @@ -202,6 +206,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> }, { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> }, + { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_t> }, + }; // Activation functions : QSYMM16 @@ -209,6 +215,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> }, + }; switch(input->info()->data_type()) @@ -403,19 +410,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); - const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); - const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); - const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); - const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -490,6 +501,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat // Re-quantize to new output space tmp = vquantize(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -529,6 +557,12 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -553,19 +587,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); - const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); - const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); - const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); - const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); - const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); + const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); + const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); + const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); + const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); + const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + const auto const_6_f32 = vdupq_n_f32(6.f); + const auto const_0_f32 = vdupq_n_f32(0.f); + const auto const_3_f32 = vdupq_n_f32(3.f); + const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -640,6 +678,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE // Re-quantize to new output space tmp = vquantize_signed(tmp_dep, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))), + wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))), + wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))), + wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))), + } + }; + // Re-quantize to new output space + tmp = vquantize_signed(tmp_dep, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -679,6 +734,12 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); tmp = quantize_qasymm8_signed(tmp_f, qi_out); } + else if(act == ActivationFunction::HARD_SWISH) + { + float tmp_f = dequantize_qasymm8_signed(in, qi_in); + tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f); + tmp = quantize_qasymm8_signed(tmp_f, qi_out); + } else { ARM_COMPUTE_ERROR("Unsupported activation function"); diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index e3082cb5dc..f9f7451ed7 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -117,7 +117,7 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH) ); +const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH)); /** Input data sets. */ const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); @@ -242,7 +242,10 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ ActivationLayerInfo::ActivationFunction::LOGISTIC, ActivationLayerInfo::ActivationFunction::TANH }); -const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), QuantizedActivationFunctionsDataset), +const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH)); + +const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), + concat(QuantizedActivationFunctionsDataset, framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH))), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); TEST_SUITE(Quantized) |