aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-11-28 11:31:23 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2019-12-05 11:58:51 +0000
commit8d4d1b85bc57d5f76f3939bb422e44df68dc2342 (patch)
tree8de9dd3c7bec7ea59caa4d6e70b3bbeac877c8b8 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent25a6b67cd8188e5a968c0c89adf99f874c7eecb4 (diff)
downloadComputeLibrary-8d4d1b85bc57d5f76f3939bb422e44df68dc2342.tar.gz
COMPMID-2796: Add support for QASYMM8_SIGNED in NEActivationLayer and NEPReluLayer
Change-Id: I089fd19a6beab7779d690bc9ace327f661c2753d Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/2407 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp175
1 files changed, 172 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index c338ef09c7..44f76f6e22 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -48,7 +48,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
{
@@ -72,8 +72,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
"For QSYMM16 only tanh and logistic are supported");
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH)
+ && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
@@ -173,6 +178,17 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
};
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+ // Activation functions : QASYMM8_SIGNED
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8_signed =
+ {
+ { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_signed_t> },
+ { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_signed_t> },
+ { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_signed_t> },
+ { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> },
+ { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> },
+ { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> },
+ };
+
// Activation functions : QASYMM8
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
{
@@ -193,6 +209,9 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
switch(input->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ _func = act_map_qasymm8_signed[activation_info.activation()];
+ break;
case DataType::QASYMM8:
_func = act_map_qasymm8[activation_info.activation()];
break;
@@ -508,6 +527,156 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
}
template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
+{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
+ const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
+ const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
+ const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
+ const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
+ const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+
+ // Initialise scale/offset for re-quantization
+ float s = qi_in.scale / qi_out.scale;
+ float o = -qi_in.offset * s + qi_out.offset;
+ float32x4_t vs = vdupq_n_f32(s);
+ float32x4_t vo = vdupq_n_f32(o);
+
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if(act == ActivationFunction::RELU)
+ {
+ // Perform activation
+ tmp = vmaxq_s8(vconst_0, vin);
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
+ }
+ else if(act == ActivationFunction::BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_s8(va, vmaxq_s8(vconst_0, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
+ }
+ else if(act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_s8(va, vmaxq_s8(vb, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
+ }
+ else if(act == ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize_signed(tmp_dep, qi_out);
+ }
+ else if(act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize_signed(tmp_dep, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for(; x < window_end_x; ++x)
+ {
+ T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ if(act == ActivationFunction::RELU)
+ {
+ tmp = std::max(const_0, in);
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if(act == ActivationFunction::BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(const_0, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if(act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(b, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if(act == ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out);
+ }
+ else if(act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
{
const int window_step_x = 16 / sizeof(T);