Make CpuActivation stateless

- Rename NEActivationLayer to CpuActivation - Add member function to generate execution window Partially Resolves: COMPMID-3992 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I4e1ae15cf456b860d3080b2fedc4dbcce7d1bb79 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4791 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-01-08 17:25:55 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-01-11 16:48:31 +0000
commit: f8f0442e9a6105be0e32f4defec5fbc10248ea6e (patch)
tree: d4e77c82f57df175dcec6c46ed2f74f4a8b72d7a /src/core
parent: 4f77ba9f2dccbae1b46b2d4e17d862560f858050 (diff)
download: ComputeLibrary-f8f0442e9a6105be0e32f4defec5fbc10248ea6e.tar.gz
20 files changed, 115 insertions, 123 deletions
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 1e0b1f08d6..0d447de44c 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -27,7 +27,6 @@
 /* Header regrouping all the NEON kernels */
 #include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
 #include "src/core/NEON/kernels/NEAccumulateKernel.h"
-#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
 #include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
 #include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
 #include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
index f201e92738..cf7f9f5a95 100644
--- a/src/core/NEON/SVEMath.inl
+++ b/src/core/NEON/SVEMath.inl
@@ -225,7 +225,7 @@ inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x)
 inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val)
 {
     using ScalarType = float;
-    using IntType    = u32;
+    using IntType    = uint32_t;
 
     constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3)
     constexpr float te_sin_coeff3 = 0.05f;           // 1/(4*5)
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index f215787bf6..abdba3ae53 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
+#include "src/core/cpu/kernels/CpuActivationKernel.h"
 
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/TensorInfo.h"
@@ -30,13 +30,17 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 
-#include "src/core/NEON/kernels/activation/impl/list.h"
 #include "src/core/common/Registrars.h"
+#include "src/core/cpu/kernels/activation/list.h"
 
-#include <set>
+#include <array>
 
 namespace arm_compute
 {
+namespace cpu
+{
+namespace kernels
+{
 namespace
 {
 struct ActivationSelectorData
@@ -127,6 +131,25 @@ const ActivationKernel *get_implementation(const ActivationSelectorData &data)
     return nullptr;
 }
 
+/* Supported activation in the 8-bit integer domain */
+static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations =
+{
+    ActivationLayerInfo::ActivationFunction::RELU,
+    ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+    ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+    ActivationLayerInfo::ActivationFunction::LOGISTIC,
+    ActivationLayerInfo::ActivationFunction::TANH,
+    ActivationLayerInfo::ActivationFunction::HARD_SWISH,
+    ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
+};
+/* Supported activation in the 16-bit integer domain */
+static const std::array<ActivationLayerInfo::ActivationFunction, 3> qsymm16_activations =
+{
+    ActivationLayerInfo::ActivationFunction::LOGISTIC,
+    ActivationLayerInfo::ActivationFunction::TANH,
+    ActivationLayerInfo::ActivationFunction::HARD_SWISH
+};
+
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
@@ -135,30 +158,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
     const auto *uk = get_implementation(ActivationSelectorData{ input->data_type() });
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
-    const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
-    {
-        ActivationLayerInfo::ActivationFunction::RELU,
-        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
-        ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
-        ActivationLayerInfo::ActivationFunction::LOGISTIC,
-        ActivationLayerInfo::ActivationFunction::TANH,
-        ActivationLayerInfo::ActivationFunction::HARD_SWISH,
-        ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
-    };
-    const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
-    {
-        ActivationLayerInfo::ActivationFunction::LOGISTIC,
-        ActivationLayerInfo::ActivationFunction::TANH,
-        ActivationLayerInfo::ActivationFunction::HARD_SWISH
-    };
     const DataType                                data_type = input->data_type();
     const QuantizationInfo                       &oq_info   = (output != nullptr) ? output->quantization_info() : input->quantization_info();
     const ActivationLayerInfo::ActivationFunction f_act     = activation_info.activation();
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (std::find(std::begin(qasymm8_activations), std::end(qasymm8_activations), f_act) == std::end(qasymm8_activations)),
                                     "For QASYMM8 only hard swish, leaky relu, tanh, logistic, relu and lower/upper bounded relu are supported");
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (std::find(std::begin(qsymm16_activations), std::end(qsymm16_activations), f_act) == std::end(qsymm16_activations)),
                                     "For QSYMM16 only tanh and logistic are supported");
     ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH)
                                 && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
@@ -200,12 +207,7 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
 }
 } // namespace
 
-NEActivationLayerKernel::NEActivationLayerKernel()
-    : _act_info()
-{
-}
-
-void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
+void CpuActivationKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
@@ -219,7 +221,7 @@ void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *o
     ICPPKernel::configure(win_config.second);
 }
 
-Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+Status CpuActivationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(act_info);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
@@ -228,7 +230,7 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor
     return Status{};
 }
 
-void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     // Early exit on disabled activation
     if(!_act_info.enabled())
@@ -249,4 +251,11 @@ void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window,
 
     uk->ukernel(src, dst, _act_info, window);
 }
+
+const char *CpuActivationKernel::name() const
+{
+    return "CpuActivationKernel";
+}
+} // namespace kernels
+} // namespace cpu
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.h b/src/core/cpu/kernels/CpuActivationKernel.h
index 783783c6ab..083915ba9f 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/src/core/cpu/kernels/CpuActivationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,56 +21,39 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
+#ifndef ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H
+#define ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H
 
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/INEKernel.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
 
 namespace arm_compute
 {
-// Forward declarations
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INEKernel
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the activation kernel */
+class CpuActivationKernel : public ICpuKernel
 {
 public:
-    const char *name() const override
-    {
-        return "NEActivationLayerKernel";
-    }
-    /** Constructor */
-    NEActivationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
-    /** Default move constructor */
-    NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
-    /** Default move assignment operator */
-    NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEActivationLayerKernel() = default;
+    CpuActivationKernel() = default;
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel);
     /** Set the input and output tensor.
      *
      * @note If the output tensor is a nullptr, the activation function will be performed in-place
      *
-     * @param[in, out] input           Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
+     * @param[in, out] src             Source tensor info. In case of @p dst tensor = nullptr, this tensor will store the result
      *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[out]     output          Destination tensor info. Data type supported: same as @p input
+     * @param[out]     dst             Destination tensor info. Data type supported: same as @p src
      * @param[in]      activation_info Activation layer information.
      */
-    void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info);
+    void configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info);
     /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
      *
-     * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+     * @param[in] src      Source tensor info. In case of @p dst tensor info = nullptr, this tensor will store the result
      *                     of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[in] output   Destination tensor info. Data type supported: same as @p input
+     * @param[in] dst      Destination tensor info. Data type supported: same as @p src
      * @param[in] act_info Activation layer information.
      *
      * @return a status
@@ -79,9 +62,12 @@ public:
 
     // Inherited methods overridden:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
 
 private:
-    ActivationLayerInfo _act_info;
+    ActivationLayerInfo _act_info{};
 };
+} // namespace kernels
+} // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H */
diff --git a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp b/src/core/cpu/kernels/CpuFloorKernel.cpp
index 738f04d14a..6115b69907 100644
--- a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp
+++ b/src/core/cpu/kernels/CpuFloorKernel.cpp
@@ -32,7 +32,7 @@
 #include "src/core/helpers/WindowHelpers.h"
 
 #include "src/core/common/Registrars.h"
-#include "src/core/cpu/kernels/floor/impl/list.h"
+#include "src/core/cpu/kernels/floor/list.h"
 
 namespace arm_compute
 {
diff --git a/src/core/cpu/kernels/CpuFloorKernel.h b/src/core/cpu/kernels/CpuFloorKernel.h
index dc3a9d5ff1..25d78c7870 100644
--- a/src/core/cpu/kernels/CpuFloorKernel.h
+++ b/src/core/cpu/kernels/CpuFloorKernel.h
@@ -63,8 +63,8 @@ public:
     Window infer_window(const ITensorInfo *src, const ITensorInfo *dst);
 
     // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) final;
-    const char *name() const final;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
 };
 } // namespace kernels
 } // namespace cpu
diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp b/src/core/cpu/kernels/activation/NEON/fp16.cpp
index 27ae2830cc..27ae2830cc 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp b/src/core/cpu/kernels/activation/NEON/fp32.cpp
index 0687646be7..0687646be7 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp
index 7506a8294f..7506a8294f 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp
index 8f75abea8e..8f75abea8e 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp
index 9eee360427..9eee360427 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp b/src/core/cpu/kernels/activation/SVE/fp16.cpp
index 8208813cd3..8208813cd3 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
+++ b/src/core/cpu/kernels/activation/SVE/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp b/src/core/cpu/kernels/activation/SVE/fp32.cpp
index 55bdc9999e..55bdc9999e 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
+++ b/src/core/cpu/kernels/activation/SVE/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp
index 3e29a68788..9eea3ace9e 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp
@@ -77,18 +77,18 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
     auto  vo = svdup_n_f32(o);
 
     // Initialise scale/offset for re-quantization with int32_t
-    const auto  voffset_in      = svdup_n_s32(qi_in.offset);
-    int32_t     s_s32           = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    int32_t     o_s32           = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    const auto  vs_s32          = svdup_n_s32(s_s32);
-    const auto  vo_s32          = svdup_n_s32(o_s32);
+    const auto voffset_in = svdup_n_s32(qi_in.offset);
+    int32_t    s_s32      = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    int32_t    o_s32      = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    const auto vs_s32     = svdup_n_s32(s_s32);
+    const auto vo_s32     = svdup_n_s32(o_s32);
 
     // Initialise scale/offset for re-quantization for leaky relu
-    int32_t     s_leaky_s32     = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    int32_t     o_leaky_s32     = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
-                                             arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    const auto  vs_leaky_s32    = svdup_n_s32(s_leaky_s32);
-    const auto  vo_leaky_s32    = svdup_n_s32(o_leaky_s32);
+    int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
+                                arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
+    const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
 
     execute_window_loop(win_collapsed, [&](const Coordinates &)
     {
@@ -179,7 +179,7 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
             }
             else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
             {
-                svbool_t p0, p1, p2, p3;
+                svbool_t    p0, p1, p2, p3;
                 svint32x4_t tmp_dep;
 
                 // Expand to int32
@@ -190,11 +190,12 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
                             svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(vin))),
                             svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(vin))),
                             svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(vin))),
-                    } }
+                        }
+                    }
                 };
 
                 // Compare elements to input offset
-                if (qi_in.scale >= 0)
+                if(qi_in.scale >= 0)
                 {
                     p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
                     p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
@@ -210,27 +211,25 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
                 }
 
                 // Multiply negative elements and requantize if necessary
-                if (requant)
+                if(requant)
                 {
                     tmp_dep = svcreate4_s32(
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)
-                    );
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8));
                 }
                 else
                 {
                     tmp_dep = svcreate4_s32(
-                        svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)
-                    );
+                                  svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8));
                 }
 
                 // Convert uint32 vectors to uint16 vectors (with saturation)
-                const auto v_low_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
+                const auto v_low_u16  = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
                 const auto v_high_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3));
 
                 // convert uint16 vectors to uint8 vectors (with saturation)
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp
index f21d0657ab..0b3d798942 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp
@@ -76,18 +76,18 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
     auto  vo = svdup_n_f32(o);
 
     // Initialise scale/offset for re-quantization with int32_t
-    const auto  voffset_in      = svdup_n_s32(qi_in.offset);
-    int32_t     s_s32           = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    int32_t     o_s32           = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    const auto  vs_s32          = svdup_n_s32(s_s32);
-    const auto  vo_s32          = svdup_n_s32(o_s32);
+    const auto voffset_in = svdup_n_s32(qi_in.offset);
+    int32_t    s_s32      = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    int32_t    o_s32      = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    const auto vs_s32     = svdup_n_s32(s_s32);
+    const auto vo_s32     = svdup_n_s32(o_s32);
 
     // Initialise scale/offset for re-quantization for leaky relu
-    int32_t     s_leaky_s32     = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    int32_t     o_leaky_s32     = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
-                                             arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-    const auto  vs_leaky_s32    = svdup_n_s32(s_leaky_s32);
-    const auto  vo_leaky_s32    = svdup_n_s32(o_leaky_s32);
+    int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
+                                arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+    const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
+    const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
 
     execute_window_loop(win_collapsed, [&](const Coordinates &)
     {
@@ -178,7 +178,7 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
             }
             else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
             {
-                svbool_t p0, p1, p2, p3;
+                svbool_t    p0, p1, p2, p3;
                 svint32x4_t tmp_dep;
 
                 // Expand to int32
@@ -189,11 +189,12 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
                             svmovlt_s32(svmovlb_s16(vin)),
                             svmovlb_s32(svmovlt_s16(vin)),
                             svmovlt_s32(svmovlt_s16(vin)),
-                    } }
+                        }
+                    }
                 };
 
                 // Compare elements to input offset
-                if (qi_in.scale >= 0)
+                if(qi_in.scale >= 0)
                 {
                     p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
                     p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
@@ -209,27 +210,25 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
                 }
 
                 // Multiply negative elements and requantize if necessary
-                if (requant)
+                if(requant)
                 {
                     tmp_dep = svcreate4_s32(
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
-                        svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)
-                    );
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
+                                  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8));
                 }
                 else
                 {
                     tmp_dep = svcreate4_s32(
-                        svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
-                        svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)
-                    );
+                                  svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
+                                  svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8));
                 }
 
                 // Convert uint32 vectors to uint16 vectors (with saturation)
-                const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
+                const auto v_low_s16  = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
                 const auto v_high_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3));
 
                 // convert uint16 vectors to uint8 vectors (with saturation)
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp
index dbaf267bf9..dbaf267bf9 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/list.h b/src/core/cpu/kernels/activation/list.h
index db6c5b21b8..409d025db0 100644
--- a/src/core/NEON/kernels/activation/impl/list.h
+++ b/src/core/cpu/kernels/activation/list.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp b/src/core/cpu/kernels/floor/NEON/fp16.cpp
index 0d31eb77f8..0d31eb77f8 100644
--- a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp
+++ b/src/core/cpu/kernels/floor/NEON/fp16.cpp
diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp b/src/core/cpu/kernels/floor/NEON/fp32.cpp
index dd63f9f9d7..dd63f9f9d7 100644
--- a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp
+++ b/src/core/cpu/kernels/floor/NEON/fp32.cpp
diff --git a/src/core/cpu/kernels/floor/impl/list.h b/src/core/cpu/kernels/floor/list.h
index 4367e0ffc9..4367e0ffc9 100644
--- a/src/core/cpu/kernels/floor/impl/list.h
+++ b/src/core/cpu/kernels/floor/list.h
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-01-08 17:25:55 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-01-11 16:48:31 +0000
commit	f8f0442e9a6105be0e32f4defec5fbc10248ea6e (patch)
tree	d4e77c82f57df175dcec6c46ed2f74f4a8b72d7a /src/core
parent	4f77ba9f2dccbae1b46b2d4e17d862560f858050 (diff)
download	ComputeLibrary-f8f0442e9a6105be0e32f4defec5fbc10248ea6e.tar.gz