aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp121
1 files changed, 91 insertions, 30 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index a0bf9e8010..5251209463 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -50,18 +50,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
- static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
+ const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
{
ActivationLayerInfo::ActivationFunction::RELU,
ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH
};
- static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
+ const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
{
ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH
};
const DataType data_type = input->data_type();
const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
@@ -191,6 +193,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> },
{ ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> },
+ { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_signed_t> },
+
};
// Activation functions : QASYMM8
@@ -202,6 +206,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
{ ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
+ { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_t> },
+
};
// Activation functions : QSYMM16
@@ -209,6 +215,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{
{ ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
+
};
switch(input->info()->data_type())
@@ -403,19 +410,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
- const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
- const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
- const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
- const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
- const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
- const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
- const float a_f32 = _act_info.a();
- const float b_f32 = _act_info.b();
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -490,6 +501,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
// Re-quantize to new output space
tmp = vquantize(tmp_dep, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -529,6 +557,12 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -553,19 +587,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
- const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
- const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
- const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
- const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
- const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
- const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
- const float a_f32 = _act_info.a();
- const float b_f32 = _act_info.b();
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
+ const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
+ const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
+ const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
+ const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
+ const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -640,6 +678,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
// Re-quantize to new output space
tmp = vquantize_signed(tmp_dep, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize_signed(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -679,6 +734,12 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
tmp = quantize_qasymm8_signed(tmp_f, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");