aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-08 17:25:55 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-11 16:48:31 +0000
commitf8f0442e9a6105be0e32f4defec5fbc10248ea6e (patch)
treed4e77c82f57df175dcec6c46ed2f74f4a8b72d7a /src/core
parent4f77ba9f2dccbae1b46b2d4e17d862560f858050 (diff)
downloadComputeLibrary-f8f0442e9a6105be0e32f4defec5fbc10248ea6e.tar.gz
Make CpuActivation stateless
- Rename NEActivationLayer to CpuActivation - Add member function to generate execution window Partially Resolves: COMPMID-3992 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I4e1ae15cf456b860d3080b2fedc4dbcce7d1bb79 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4791 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/NEON/NEKernels.h1
-rw-r--r--src/core/NEON/SVEMath.inl2
-rw-r--r--src/core/cpu/kernels/CpuActivationKernel.cpp (renamed from src/core/NEON/kernels/NEActivationLayerKernel.cpp)69
-rw-r--r--src/core/cpu/kernels/CpuActivationKernel.h (renamed from src/core/NEON/kernels/NEActivationLayerKernel.h)60
-rw-r--r--src/core/cpu/kernels/CpuFloorKernel.cpp (renamed from src/core/cpu/kernels/floor/CpuFloorKernel.cpp)2
-rw-r--r--src/core/cpu/kernels/CpuFloorKernel.h4
-rw-r--r--src/core/cpu/kernels/activation/NEON/fp16.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/fp16.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/NEON/fp32.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/fp32.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/NEON/qasymm8.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/NEON/qsymm16.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/SVE/fp16.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/fp16.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/SVE/fp32.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/fp32.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/SVE/qasymm8.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp)49
-rw-r--r--src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp)49
-rw-r--r--src/core/cpu/kernels/activation/SVE/qsymm16.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp)0
-rw-r--r--src/core/cpu/kernels/activation/list.h (renamed from src/core/NEON/kernels/activation/impl/list.h)2
-rw-r--r--src/core/cpu/kernels/floor/NEON/fp16.cpp (renamed from src/core/cpu/kernels/floor/impl/NEON/fp16.cpp)0
-rw-r--r--src/core/cpu/kernels/floor/NEON/fp32.cpp (renamed from src/core/cpu/kernels/floor/impl/NEON/fp32.cpp)0
-rw-r--r--src/core/cpu/kernels/floor/list.h (renamed from src/core/cpu/kernels/floor/impl/list.h)0
20 files changed, 115 insertions, 123 deletions
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 1e0b1f08d6..0d447de44c 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -27,7 +27,6 @@
/* Header regrouping all the NEON kernels */
#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
#include "src/core/NEON/kernels/NEAccumulateKernel.h"
-#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
index f201e92738..cf7f9f5a95 100644
--- a/src/core/NEON/SVEMath.inl
+++ b/src/core/NEON/SVEMath.inl
@@ -225,7 +225,7 @@ inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x)
inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val)
{
using ScalarType = float;
- using IntType = u32;
+ using IntType = uint32_t;
constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3)
constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5)
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index f215787bf6..abdba3ae53 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
+#include "src/core/cpu/kernels/CpuActivationKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
@@ -30,13 +30,17 @@
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-#include "src/core/NEON/kernels/activation/impl/list.h"
#include "src/core/common/Registrars.h"
+#include "src/core/cpu/kernels/activation/list.h"
-#include <set>
+#include <array>
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
struct ActivationSelectorData
@@ -127,6 +131,25 @@ const ActivationKernel *get_implementation(const ActivationSelectorData &data)
return nullptr;
}
+/* Supported activation in the 8-bit integer domain */
+static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations =
+{
+ ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH,
+ ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
+};
+/* Supported activation in the 16-bit integer domain */
+static const std::array<ActivationLayerInfo::ActivationFunction, 3> qsymm16_activations =
+{
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH,
+ ActivationLayerInfo::ActivationFunction::HARD_SWISH
+};
+
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
@@ -135,30 +158,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
const auto *uk = get_implementation(ActivationSelectorData{ input->data_type() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
- const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
- {
- ActivationLayerInfo::ActivationFunction::RELU,
- ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
- ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
- ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH,
- ActivationLayerInfo::ActivationFunction::HARD_SWISH,
- ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
- };
- const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
- {
- ActivationLayerInfo::ActivationFunction::LOGISTIC,
- ActivationLayerInfo::ActivationFunction::TANH,
- ActivationLayerInfo::ActivationFunction::HARD_SWISH
- };
const DataType data_type = input->data_type();
const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (std::find(std::begin(qasymm8_activations), std::end(qasymm8_activations), f_act) == std::end(qasymm8_activations)),
"For QASYMM8 only hard swish, leaky relu, tanh, logistic, relu and lower/upper bounded relu are supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (std::find(std::begin(qsymm16_activations), std::end(qsymm16_activations), f_act) == std::end(qsymm16_activations)),
"For QSYMM16 only tanh and logistic are supported");
ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH)
&& (oq_info != QuantizationInfo(1.f / 128.f, 128)));
@@ -200,12 +207,7 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
}
} // namespace
-NEActivationLayerKernel::NEActivationLayerKernel()
- : _act_info()
-{
-}
-
-void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
+void CpuActivationKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -219,7 +221,7 @@ void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *o
ICPPKernel::configure(win_config.second);
}
-Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+Status CpuActivationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
@@ -228,7 +230,7 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor
return Status{};
}
-void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
// Early exit on disabled activation
if(!_act_info.enabled())
@@ -249,4 +251,11 @@ void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window,
uk->ukernel(src, dst, _act_info, window);
}
+
+const char *CpuActivationKernel::name() const
+{
+ return "CpuActivationKernel";
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.h b/src/core/cpu/kernels/CpuActivationKernel.h
index 783783c6ab..083915ba9f 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/src/core/cpu/kernels/CpuActivationKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,56 +21,39 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
+#ifndef ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H
+#define ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/INEKernel.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
namespace arm_compute
{
-// Forward declarations
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INEKernel
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the activation kernel */
+class CpuActivationKernel : public ICpuKernel
{
public:
- const char *name() const override
- {
- return "NEActivationLayerKernel";
- }
- /** Constructor */
- NEActivationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
- /** Default move constructor */
- NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
- /** Default move assignment operator */
- NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
- /** Default destructor */
- ~NEActivationLayerKernel() = default;
+ CpuActivationKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel);
/** Set the input and output tensor.
*
* @note If the output tensor is a nullptr, the activation function will be performed in-place
*
- * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
+ * @param[in, out] src Source tensor info. In case of @p dst tensor = nullptr, this tensor will store the result
* of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
+ * @param[out] dst Destination tensor info. Data type supported: same as @p src
* @param[in] activation_info Activation layer information.
*/
- void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info);
+ void configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info);
/** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
*
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+ * @param[in] src Source tensor info. In case of @p dst tensor info = nullptr, this tensor will store the result
* of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] dst Destination tensor info. Data type supported: same as @p src
* @param[in] act_info Activation layer information.
*
* @return a status
@@ -79,9 +62,12 @@ public:
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
private:
- ActivationLayerInfo _act_info;
+ ActivationLayerInfo _act_info{};
};
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H */
diff --git a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp b/src/core/cpu/kernels/CpuFloorKernel.cpp
index 738f04d14a..6115b69907 100644
--- a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp
+++ b/src/core/cpu/kernels/CpuFloorKernel.cpp
@@ -32,7 +32,7 @@
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/common/Registrars.h"
-#include "src/core/cpu/kernels/floor/impl/list.h"
+#include "src/core/cpu/kernels/floor/list.h"
namespace arm_compute
{
diff --git a/src/core/cpu/kernels/CpuFloorKernel.h b/src/core/cpu/kernels/CpuFloorKernel.h
index dc3a9d5ff1..25d78c7870 100644
--- a/src/core/cpu/kernels/CpuFloorKernel.h
+++ b/src/core/cpu/kernels/CpuFloorKernel.h
@@ -63,8 +63,8 @@ public:
Window infer_window(const ITensorInfo *src, const ITensorInfo *dst);
// Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) final;
- const char *name() const final;
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
};
} // namespace kernels
} // namespace cpu
diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp b/src/core/cpu/kernels/activation/NEON/fp16.cpp
index 27ae2830cc..27ae2830cc 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp b/src/core/cpu/kernels/activation/NEON/fp32.cpp
index 0687646be7..0687646be7 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp
index 7506a8294f..7506a8294f 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp
index 8f75abea8e..8f75abea8e 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp
diff --git a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp
index 9eee360427..9eee360427 100644
--- a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
+++ b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp b/src/core/cpu/kernels/activation/SVE/fp16.cpp
index 8208813cd3..8208813cd3 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
+++ b/src/core/cpu/kernels/activation/SVE/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp b/src/core/cpu/kernels/activation/SVE/fp32.cpp
index 55bdc9999e..55bdc9999e 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
+++ b/src/core/cpu/kernels/activation/SVE/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp
index 3e29a68788..9eea3ace9e 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp
@@ -77,18 +77,18 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
auto vo = svdup_n_f32(o);
// Initialise scale/offset for re-quantization with int32_t
- const auto voffset_in = svdup_n_s32(qi_in.offset);
- int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- const auto vs_s32 = svdup_n_s32(s_s32);
- const auto vo_s32 = svdup_n_s32(o_s32);
+ const auto voffset_in = svdup_n_s32(qi_in.offset);
+ int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ const auto vs_s32 = svdup_n_s32(s_s32);
+ const auto vo_s32 = svdup_n_s32(o_s32);
// Initialise scale/offset for re-quantization for leaky relu
- int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
- arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
- const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
+ int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
+ const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
execute_window_loop(win_collapsed, [&](const Coordinates &)
{
@@ -179,7 +179,7 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
}
else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
{
- svbool_t p0, p1, p2, p3;
+ svbool_t p0, p1, p2, p3;
svint32x4_t tmp_dep;
// Expand to int32
@@ -190,11 +190,12 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(vin))),
svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(vin))),
svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(vin))),
- } }
+ }
+ }
};
// Compare elements to input offset
- if (qi_in.scale >= 0)
+ if(qi_in.scale >= 0)
{
p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
@@ -210,27 +211,25 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
}
// Multiply negative elements and requantize if necessary
- if (requant)
+ if(requant)
{
tmp_dep = svcreate4_s32(
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)
- );
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8));
}
else
{
tmp_dep = svcreate4_s32(
- svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)
- );
+ svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8));
}
// Convert uint32 vectors to uint16 vectors (with saturation)
- const auto v_low_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
+ const auto v_low_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
const auto v_high_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3));
// convert uint16 vectors to uint8 vectors (with saturation)
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp
index f21d0657ab..0b3d798942 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp
@@ -76,18 +76,18 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
auto vo = svdup_n_f32(o);
// Initialise scale/offset for re-quantization with int32_t
- const auto voffset_in = svdup_n_s32(qi_in.offset);
- int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- const auto vs_s32 = svdup_n_s32(s_s32);
- const auto vo_s32 = svdup_n_s32(o_s32);
+ const auto voffset_in = svdup_n_s32(qi_in.offset);
+ int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ const auto vs_s32 = svdup_n_s32(s_s32);
+ const auto vo_s32 = svdup_n_s32(o_s32);
// Initialise scale/offset for re-quantization for leaky relu
- int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
- arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
- const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
- const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
+ int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
+ const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
execute_window_loop(win_collapsed, [&](const Coordinates &)
{
@@ -178,7 +178,7 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
}
else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
{
- svbool_t p0, p1, p2, p3;
+ svbool_t p0, p1, p2, p3;
svint32x4_t tmp_dep;
// Expand to int32
@@ -189,11 +189,12 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
svmovlt_s32(svmovlb_s16(vin)),
svmovlb_s32(svmovlt_s16(vin)),
svmovlt_s32(svmovlt_s16(vin)),
- } }
+ }
+ }
};
// Compare elements to input offset
- if (qi_in.scale >= 0)
+ if(qi_in.scale >= 0)
{
p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
@@ -209,27 +210,25 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
}
// Multiply negative elements and requantize if necessary
- if (requant)
+ if(requant)
{
tmp_dep = svcreate4_s32(
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
- svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)
- );
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
+ svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8));
}
else
{
tmp_dep = svcreate4_s32(
- svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
- svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)
- );
+ svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
+ svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8));
}
// Convert uint32 vectors to uint16 vectors (with saturation)
- const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
+ const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
const auto v_high_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3));
// convert uint16 vectors to uint8 vectors (with saturation)
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp
index dbaf267bf9..dbaf267bf9 100644
--- a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
+++ b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/list.h b/src/core/cpu/kernels/activation/list.h
index db6c5b21b8..409d025db0 100644
--- a/src/core/NEON/kernels/activation/impl/list.h
+++ b/src/core/cpu/kernels/activation/list.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp b/src/core/cpu/kernels/floor/NEON/fp16.cpp
index 0d31eb77f8..0d31eb77f8 100644
--- a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp
+++ b/src/core/cpu/kernels/floor/NEON/fp16.cpp
diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp b/src/core/cpu/kernels/floor/NEON/fp32.cpp
index dd63f9f9d7..dd63f9f9d7 100644
--- a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp
+++ b/src/core/cpu/kernels/floor/NEON/fp32.cpp
diff --git a/src/core/cpu/kernels/floor/impl/list.h b/src/core/cpu/kernels/floor/list.h
index 4367e0ffc9..4367e0ffc9 100644
--- a/src/core/cpu/kernels/floor/impl/list.h
+++ b/src/core/cpu/kernels/floor/list.h