aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authormorgolock <pablo.tello@arm.com>2020-02-28 15:38:28 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-05 12:29:45 +0000
commitaa85cdf22802cb892d7fa422ca505a43d84adb38 (patch)
treecf98d5720e1b1cb15bec77e1329afe635c5c3c94 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent740872e076ccecc24151e170be1ef065cee598e4 (diff)
downloadComputeLibrary-aa85cdf22802cb892d7fa422ca505a43d84adb38.tar.gz
COMPMID-3079: Implement Hard-Swish in NEON
* Support for quantized tensors Change-Id: I47c011a4a0b92dbadd733eff7ce283a19c5d0e94 Signed-off-by: morgolock <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2823 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp121
1 files changed, 91 insertions, 30 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index a0bf9e8010..5251209463 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -50,18 +50,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
- static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
+ const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
{
ActivationLayerInfo::ActivationFunction::RELU,
ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH
};
- static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
+ const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
{
ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH
};
const DataType data_type = input->data_type();
const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
@@ -191,6 +193,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> },
{ ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> },
+ { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_signed_t> },
+
};
// Activation functions : QASYMM8
@@ -202,6 +206,8 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
{ ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
+ { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_t> },
+
};
// Activation functions : QSYMM16
@@ -209,6 +215,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{
{ ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
+
};
switch(input->info()->data_type())
@@ -403,19 +410,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
- const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
- const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
- const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
- const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
- const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
- const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
- const float a_f32 = _act_info.a();
- const float b_f32 = _act_info.b();
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -490,6 +501,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
// Re-quantize to new output space
tmp = vquantize(tmp_dep, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -529,6 +557,12 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -553,19 +587,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
- const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
- const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
- const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
- const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
- const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
- const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
- const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
- const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
- const float a_f32 = _act_info.a();
- const float b_f32 = _act_info.b();
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
+ const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
+ const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
+ const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
+ const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
+ const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -640,6 +678,23 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
// Re-quantize to new output space
tmp = vquantize_signed(tmp_dep, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize_signed(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -679,6 +734,12 @@ typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NE
tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
tmp = quantize_qasymm8_signed(tmp_f, qi_out);
}
+ else if(act == ActivationFunction::HARD_SWISH)
+ {
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");