COMPMID-2407: Add (logistic and tanh) activation support for QSYMM16 for NEON

Change-Id: Ib89c9cfe12975e51d1710af736c73ce79e667363 Signed-off-by: giuros01 <giuseppe.rossini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1412 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: giuros01 <giuseppe.rossini@arm.com> 2019-06-20 10:30:17 +0100
committer: Manuel Bottini <manuel.bottini@arm.com> 2019-07-01 10:20:51 +0000
commit: c9573f35c4267aa55648df4a134ebec82c5af93b (patch)
tree: aa17eabf8c1ecec995d22aa8a3479b9f15becaf7 /src/core/NEON/kernels
parent: 5a1320b923ec4db1cde00f9fc1be590022178b7f (diff)
download: ComputeLibrary-c9573f35c4267aa55648df4a134ebec82c5af93b.tar.gz
1 files changed, 124 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 64342512a0..3953305996 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -29,6 +29,7 @@
 #include "arm_compute/core/NEON/NEAsymm.h"
 #include "arm_compute/core/NEON/NEFixedPoint.h"
 #include "arm_compute/core/NEON/NEMath.h"
+#include "arm_compute/core/NEON/NESymm.h"
 #include "arm_compute/core/NEON/wrapper/wrapper.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
@@ -47,9 +48,9 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
 
-    static std::set<ActivationLayerInfo::ActivationFunction> qs8_supported_activations =
+    static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
     {
         ActivationLayerInfo::ActivationFunction::RELU,
         ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
@@ -57,15 +58,26 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
         ActivationLayerInfo::ActivationFunction::LOGISTIC,
         ActivationLayerInfo::ActivationFunction::TANH
     };
+    static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
+    {
+        ActivationLayerInfo::ActivationFunction::LOGISTIC,
+        ActivationLayerInfo::ActivationFunction::TANH
+    };
     const DataType                                data_type = input->data_type();
     const QuantizationInfo                       &oq_info   = (output != nullptr) ? output->quantization_info() : input->quantization_info();
     const ActivationLayerInfo::ActivationFunction f_act     = activation_info.activation();
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qs8_supported_activations.count(f_act) == 0),
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
                                     "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
+                                    "For QSYMM16 only tanh and logistic are supported");
     ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
     ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
 
+    ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+    ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+
     // Checks performed when output is configured
     if((output != nullptr) && (output->total_size() != 0))
     {
@@ -163,11 +175,21 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
         { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
     };
 
+    // Activation functions : QSYMM16
+    static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 =
+    {
+        { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
+        { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
+    };
+
     switch(input->info()->data_type())
     {
         case DataType::QASYMM8:
             _func = act_map_qasymm8[activation_info.activation()];
             break;
+        case DataType::QSYMM16:
+            _func = act_map_qsymm16[activation_info.activation()];
+            break;
         case DataType::F32:
             _func = act_map_f32[activation_info.activation()];
             break;
@@ -469,6 +491,105 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
     input, output);
 }
 
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
+{
+    const int                window_step_x  = 16 / sizeof(T);
+    const auto               window_start_x = static_cast<int>(window.x().start());
+    const auto               window_end_x   = static_cast<int>(window.x().end());
+    const ActivationFunction act            = F;
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(_input, win_collapsed);
+    Iterator output(_output, win_collapsed);
+
+    const UniformQuantizationInfo qi_in    = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qi_out   = _output->info()->quantization_info().uniform();
+    const auto                    vconst_1 = vdupq_n_f32(1.f);
+    const float32x4_t             va_f32   = vdupq_n_f32(_act_info.a());
+    const float32x4_t             vb_f32   = vdupq_n_f32(_act_info.b());
+    const float                   a_f32    = _act_info.a();
+    const float                   b_f32    = _act_info.b();
+
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const T *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+        wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+        ARM_COMPUTE_UNUSED(tmp);
+
+        // Compute S elements per iteration
+        int x = window_start_x;
+        for(; x <= (window_end_x - window_step_x); x += window_step_x)
+        {
+            const auto vin = wrapper::vloadq(input_ptr + x);
+            if(act == ActivationFunction::LOGISTIC)
+            {
+                // De-quantize
+                const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+                // Perform activation
+                const float32x4x2_t tmp_dep =
+                {
+                    {
+                        wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
+                        wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = vquantize_int16(tmp_dep, qi_out.scale);
+            }
+            else if(act == ActivationFunction::TANH)
+            {
+                // De-quantize
+                const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+                // Perform activation
+                const float32x4x2_t tmp_dep =
+                {
+                    {
+                        wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+                        wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = vquantize_int16(tmp_dep, qi_out.scale);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+            wrapper::vstore(output_ptr + x, tmp);
+        }
+
+        // Compute left-over elements
+        for(; x < window_end_x; ++x)
+        {
+            T in = *(reinterpret_cast<const T *>(input_ptr + x));
+            T tmp;
+            if(act == ActivationFunction::LOGISTIC)
+            {
+                float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+                tmp_f       = 1.f / (1.f + std::exp(-tmp_f));
+                tmp         = quantize_qsymm16(tmp_f, qi_out);
+            }
+            else if(act == ActivationFunction::TANH)
+            {
+                float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+                tmp_f       = a_f32 * std::tanh(b_f32 * tmp_f);
+                tmp         = quantize_qsymm16(tmp_f, qi_out);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+            *(output_ptr + x) = tmp;
+        }
+    },
+    input, output);
+}
+
 Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(act_info);
author	giuros01 <giuseppe.rossini@arm.com>	2019-06-20 10:30:17 +0100
committer	Manuel Bottini <manuel.bottini@arm.com>	2019-07-01 10:20:51 +0000
commit	c9573f35c4267aa55648df4a134ebec82c5af93b (patch)
tree	aa17eabf8c1ecec995d22aa8a3479b9f15becaf7 /src/core/NEON/kernels
parent	5a1320b923ec4db1cde00f9fc1be590022178b7f (diff)
download	ComputeLibrary-c9573f35c4267aa55648df4a134ebec82c5af93b.tar.gz