diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/NEON/NEKernels.h | 1 | ||||
-rw-r--r-- | src/core/NEON/SVEMath.inl | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuActivationKernel.cpp (renamed from src/core/NEON/kernels/NEActivationLayerKernel.cpp) | 69 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuActivationKernel.h (renamed from src/core/NEON/kernels/NEActivationLayerKernel.h) | 60 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuFloorKernel.cpp (renamed from src/core/cpu/kernels/floor/CpuFloorKernel.cpp) | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuFloorKernel.h | 4 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/NEON/fp16.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/fp16.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/NEON/fp32.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/fp32.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/NEON/qasymm8.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/NEON/qsymm16.cpp (renamed from src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/SVE/fp16.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/fp16.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/SVE/fp32.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/fp32.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/SVE/qasymm8.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp) | 49 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp) | 49 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/SVE/qsymm16.cpp (renamed from src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/activation/list.h (renamed from src/core/NEON/kernels/activation/impl/list.h) | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/floor/NEON/fp16.cpp (renamed from src/core/cpu/kernels/floor/impl/NEON/fp16.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/floor/NEON/fp32.cpp (renamed from src/core/cpu/kernels/floor/impl/NEON/fp32.cpp) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/floor/list.h (renamed from src/core/cpu/kernels/floor/impl/list.h) | 0 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEActivationLayer.cpp | 47 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEFloor.cpp | 17 | ||||
-rw-r--r-- | src/runtime/cpu/ICpuOperator.h | 2 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuActivation.cpp | 44 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuActivation.h | 58 |
25 files changed, 240 insertions, 166 deletions
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index 1e0b1f08d6..0d447de44c 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -27,7 +27,6 @@ /* Header regrouping all the NEON kernels */ #include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" #include "src/core/NEON/kernels/NEAccumulateKernel.h" -#include "src/core/NEON/kernels/NEActivationLayerKernel.h" #include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h" #include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index f201e92738..cf7f9f5a95 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -225,7 +225,7 @@ inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x) inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val) { using ScalarType = float; - using IntType = u32; + using IntType = uint32_t; constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3) constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5) diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp index f215787bf6..abdba3ae53 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/cpu/kernels/CpuActivationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/NEON/kernels/NEActivationLayerKernel.h" +#include "src/core/cpu/kernels/CpuActivationKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" @@ -30,13 +30,17 @@ #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" -#include "src/core/NEON/kernels/activation/impl/list.h" #include "src/core/common/Registrars.h" +#include "src/core/cpu/kernels/activation/list.h" -#include <set> +#include <array> namespace arm_compute { +namespace cpu +{ +namespace kernels +{ namespace { struct ActivationSelectorData @@ -127,6 +131,25 @@ const ActivationKernel *get_implementation(const ActivationSelectorData &data) return nullptr; } +/* Supported activation in the 8-bit integer domain */ +static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations = +{ + ActivationLayerInfo::ActivationFunction::RELU, + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH, + ActivationLayerInfo::ActivationFunction::LEAKY_RELU, +}; +/* Supported activation in the 16-bit integer domain */ +static const std::array<ActivationLayerInfo::ActivationFunction, 3> qsymm16_activations = +{ + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::HARD_SWISH +}; + Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); @@ -135,30 +158,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c const auto *uk = get_implementation(ActivationSelectorData{ input->data_type() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); - const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = - { - ActivationLayerInfo::ActivationFunction::RELU, - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH, - ActivationLayerInfo::ActivationFunction::HARD_SWISH, - ActivationLayerInfo::ActivationFunction::LEAKY_RELU, - }; - const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = - { - ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH, - ActivationLayerInfo::ActivationFunction::HARD_SWISH - }; const DataType data_type = input->data_type(); const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info(); const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (std::find(std::begin(qasymm8_activations), std::end(qasymm8_activations), f_act) == std::end(qasymm8_activations)), "For QASYMM8 only hard swish, leaky relu, tanh, logistic, relu and lower/upper bounded relu are supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (std::find(std::begin(qsymm16_activations), std::end(qsymm16_activations), f_act) == std::end(qsymm16_activations)), "For QSYMM16 only tanh and logistic are supported"); ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128))); @@ -200,12 +207,7 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input } } // namespace -NEActivationLayerKernel::NEActivationLayerKernel() - : _act_info() -{ -} - -void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info) +void CpuActivationKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -219,7 +221,7 @@ void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *o ICPPKernel::configure(win_config.second); } -Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) +Status CpuActivationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); @@ -228,7 +230,7 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { // Early exit on disabled activation if(!_act_info.enabled()) @@ -249,4 +251,11 @@ void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, uk->ukernel(src, dst, _act_info, window); } + +const char *CpuActivationKernel::name() const +{ + return "CpuActivationKernel"; +} +} // namespace kernels +} // namespace cpu } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.h b/src/core/cpu/kernels/CpuActivationKernel.h index 783783c6ab..083915ba9f 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.h +++ b/src/core/cpu/kernels/CpuActivationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,56 +21,39 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H +#ifndef ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H +#define ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H -#include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/INEKernel.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include <arm_fp16.h> -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#include "src/core/common/Macros.h" +#include "src/core/cpu/ICpuKernel.h" namespace arm_compute { -// Forward declarations -class ITensor; - -/** Interface for the activation layer kernel. */ -class NEActivationLayerKernel : public INEKernel +namespace cpu +{ +namespace kernels +{ +/** Interface for the activation kernel */ +class CpuActivationKernel : public ICpuKernel { public: - const char *name() const override - { - return "NEActivationLayerKernel"; - } - /** Constructor */ - NEActivationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; - /** Default move constructor */ - NEActivationLayerKernel(NEActivationLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; - /** Default move assignment operator */ - NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; - /** Default destructor */ - ~NEActivationLayerKernel() = default; + CpuActivationKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel); /** Set the input and output tensor. * * @note If the output tensor is a nullptr, the activation function will be performed in-place * - * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result + * @param[in, out] src Source tensor info. In case of @p dst tensor = nullptr, this tensor will store the result * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[out] dst Destination tensor info. Data type supported: same as @p src * @param[in] activation_info Activation layer information. */ - void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); + void configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info); /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * @param[in] src Source tensor info. In case of @p dst tensor info = nullptr, this tensor will store the result * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] dst Destination tensor info. Data type supported: same as @p src * @param[in] act_info Activation layer information. * * @return a status @@ -79,9 +62,12 @@ public: // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; private: - ActivationLayerInfo _act_info; + ActivationLayerInfo _act_info{}; }; +} // namespace kernels +} // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */ +#endif /* ARM_COMPUTE_CPU_ACTIVATION_KERNEL_H */ diff --git a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp b/src/core/cpu/kernels/CpuFloorKernel.cpp index 738f04d14a..6115b69907 100644 --- a/src/core/cpu/kernels/floor/CpuFloorKernel.cpp +++ b/src/core/cpu/kernels/CpuFloorKernel.cpp @@ -32,7 +32,7 @@ #include "src/core/helpers/WindowHelpers.h" #include "src/core/common/Registrars.h" -#include "src/core/cpu/kernels/floor/impl/list.h" +#include "src/core/cpu/kernels/floor/list.h" namespace arm_compute { diff --git a/src/core/cpu/kernels/CpuFloorKernel.h b/src/core/cpu/kernels/CpuFloorKernel.h index dc3a9d5ff1..25d78c7870 100644 --- a/src/core/cpu/kernels/CpuFloorKernel.h +++ b/src/core/cpu/kernels/CpuFloorKernel.h @@ -63,8 +63,8 @@ public: Window infer_window(const ITensorInfo *src, const ITensorInfo *dst); // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) final; - const char *name() const final; + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; }; } // namespace kernels } // namespace cpu diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp b/src/core/cpu/kernels/activation/NEON/fp16.cpp index 27ae2830cc..27ae2830cc 100644 --- a/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp +++ b/src/core/cpu/kernels/activation/NEON/fp16.cpp diff --git a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp b/src/core/cpu/kernels/activation/NEON/fp32.cpp index 0687646be7..0687646be7 100644 --- a/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp +++ b/src/core/cpu/kernels/activation/NEON/fp32.cpp diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp index 7506a8294f..7506a8294f 100644 --- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp +++ b/src/core/cpu/kernels/activation/NEON/qasymm8.cpp diff --git a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp index 8f75abea8e..8f75abea8e 100644 --- a/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp diff --git a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp index 9eee360427..9eee360427 100644 --- a/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp +++ b/src/core/cpu/kernels/activation/NEON/qsymm16.cpp diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp b/src/core/cpu/kernels/activation/SVE/fp16.cpp index 8208813cd3..8208813cd3 100644 --- a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp +++ b/src/core/cpu/kernels/activation/SVE/fp16.cpp diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp b/src/core/cpu/kernels/activation/SVE/fp32.cpp index 55bdc9999e..55bdc9999e 100644 --- a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp +++ b/src/core/cpu/kernels/activation/SVE/fp32.cpp diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp index 3e29a68788..9eea3ace9e 100644 --- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp +++ b/src/core/cpu/kernels/activation/SVE/qasymm8.cpp @@ -77,18 +77,18 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa auto vo = svdup_n_f32(o); // Initialise scale/offset for re-quantization with int32_t - const auto voffset_in = svdup_n_s32(qi_in.offset); - int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - const auto vs_s32 = svdup_n_s32(s_s32); - const auto vo_s32 = svdup_n_s32(o_s32); + const auto voffset_in = svdup_n_s32(qi_in.offset); + int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + const auto vs_s32 = svdup_n_s32(s_s32); + const auto vo_s32 = svdup_n_s32(o_s32); // Initialise scale/offset for re-quantization for leaky relu - int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8), - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32); - const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32); + int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8), + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32); + const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32); execute_window_loop(win_collapsed, [&](const Coordinates &) { @@ -179,7 +179,7 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU) { - svbool_t p0, p1, p2, p3; + svbool_t p0, p1, p2, p3; svint32x4_t tmp_dep; // Expand to int32 @@ -190,11 +190,12 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(vin))), svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(vin))), svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(vin))), - } } + } + } }; // Compare elements to input offset - if (qi_in.scale >= 0) + if(qi_in.scale >= 0) { p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in); p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in); @@ -210,27 +211,25 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } // Multiply negative elements and requantize if necessary - if (requant) + if(requant) { tmp_dep = svcreate4_s32( - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8) - ); + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)); } else { tmp_dep = svcreate4_s32( - svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8) - ); + svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)); } // Convert uint32 vectors to uint16 vectors (with saturation) - const auto v_low_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1)); + const auto v_low_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1)); const auto v_high_u16 = svqxtunt_s32(svqxtunb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3)); // convert uint16 vectors to uint8 vectors (with saturation) diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp index f21d0657ab..0b3d798942 100644 --- a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp @@ -76,18 +76,18 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ auto vo = svdup_n_f32(o); // Initialise scale/offset for re-quantization with int32_t - const auto voffset_in = svdup_n_s32(qi_in.offset); - int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - const auto vs_s32 = svdup_n_s32(s_s32); - const auto vo_s32 = svdup_n_s32(o_s32); + const auto voffset_in = svdup_n_s32(qi_in.offset); + int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + const auto vs_s32 = svdup_n_s32(s_s32); + const auto vo_s32 = svdup_n_s32(o_s32); // Initialise scale/offset for re-quantization for leaky relu - int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8), - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32); - const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32); + int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8), + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32); + const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32); execute_window_loop(win_collapsed, [&](const Coordinates &) { @@ -178,7 +178,7 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ } else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU) { - svbool_t p0, p1, p2, p3; + svbool_t p0, p1, p2, p3; svint32x4_t tmp_dep; // Expand to int32 @@ -189,11 +189,12 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ svmovlt_s32(svmovlb_s16(vin)), svmovlb_s32(svmovlt_s16(vin)), svmovlt_s32(svmovlt_s16(vin)), - } } + } + } }; // Compare elements to input offset - if (qi_in.scale >= 0) + if(qi_in.scale >= 0) { p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in); p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in); @@ -209,27 +210,25 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ } // Multiply negative elements and requantize if necessary - if (requant) + if(requant) { tmp_dep = svcreate4_s32( - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8), - svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8) - ); + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8), + svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8)); } else { tmp_dep = svcreate4_s32( - svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8), - svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8) - ); + svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8), + svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8)); } // Convert uint32 vectors to uint16 vectors (with saturation) - const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1)); + const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1)); const auto v_high_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3)); // convert uint16 vectors to uint8 vectors (with saturation) diff --git a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp index dbaf267bf9..dbaf267bf9 100644 --- a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp +++ b/src/core/cpu/kernels/activation/SVE/qsymm16.cpp diff --git a/src/core/NEON/kernels/activation/impl/list.h b/src/core/cpu/kernels/activation/list.h index db6c5b21b8..409d025db0 100644 --- a/src/core/NEON/kernels/activation/impl/list.h +++ b/src/core/cpu/kernels/activation/list.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp b/src/core/cpu/kernels/floor/NEON/fp16.cpp index 0d31eb77f8..0d31eb77f8 100644 --- a/src/core/cpu/kernels/floor/impl/NEON/fp16.cpp +++ b/src/core/cpu/kernels/floor/NEON/fp16.cpp diff --git a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp b/src/core/cpu/kernels/floor/NEON/fp32.cpp index dd63f9f9d7..dd63f9f9d7 100644 --- a/src/core/cpu/kernels/floor/impl/NEON/fp32.cpp +++ b/src/core/cpu/kernels/floor/NEON/fp32.cpp diff --git a/src/core/cpu/kernels/floor/impl/list.h b/src/core/cpu/kernels/floor/list.h index 4367e0ffc9..4367e0ffc9 100644 --- a/src/core/cpu/kernels/floor/impl/list.h +++ b/src/core/cpu/kernels/floor/list.h diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp index 27f01f67ce..2b5c51fa5a 100644 --- a/src/runtime/NEON/functions/NEActivationLayer.cpp +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,37 +23,17 @@ */ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/runtime/IRuntimeContext.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/core/NEON/kernels/NEActivationLayerKernel.h" +#include "arm_compute/core/Validate.h" +#include "src/runtime/cpu/operators/CpuActivation.h" namespace arm_compute { -namespace experimental -{ -NEActivationLayer::~NEActivationLayer() = default; - -void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info) -{ - auto k = std::make_unique<NEActivationLayerKernel>(); - k->configure(input, output, activation_info); - _kernel = std::move(k); -} - -Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) -{ - return NEActivationLayerKernel::validate(input, output, activation_info); -} -} // namespace experimental - struct NEActivationLayer::Impl { - const ITensor *src{ nullptr }; - ITensor *dst{ nullptr }; - IRuntimeContext *ctx{ nullptr }; - std::unique_ptr<experimental::NEActivationLayer> op{ nullptr }; + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + IRuntimeContext *ctx{ nullptr }; + std::unique_ptr<cpu::CpuActivation> op{ nullptr }; }; NEActivationLayer::NEActivationLayer(IRuntimeContext *ctx) @@ -61,27 +41,24 @@ NEActivationLayer::NEActivationLayer(IRuntimeContext *ctx) { _impl->ctx = ctx; } - NEActivationLayer::NEActivationLayer(NEActivationLayer &&) = default; - NEActivationLayer &NEActivationLayer::operator=(NEActivationLayer &&) = default; - -NEActivationLayer::~NEActivationLayer() = default; +NEActivationLayer::~NEActivationLayer() = default; void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - _impl->src = input; _impl->dst = output == nullptr ? input : output; - _impl->op = std::make_unique<experimental::NEActivationLayer>(); + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst); + + _impl->op = std::make_unique<cpu::CpuActivation>(); _impl->op->configure(_impl->src->info(), _impl->dst->info(), activation_info); } Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::NEActivationLayer::validate(input, output, act_info); + return cpu::CpuActivation::validate(input, output, act_info); } void NEActivationLayer::run() diff --git a/src/runtime/NEON/functions/NEFloor.cpp b/src/runtime/NEON/functions/NEFloor.cpp index b4ecb1ef38..f8a3c13d6d 100644 --- a/src/runtime/NEON/functions/NEFloor.cpp +++ b/src/runtime/NEON/functions/NEFloor.cpp @@ -23,10 +23,18 @@ */ #include "arm_compute/runtime/NEON/functions/NEFloor.h" +#include "arm_compute/core/Validate.h" #include "src/runtime/cpu/operators/CpuFloor.h" namespace arm_compute { +struct NEFloor::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::CpuFloor> op{ nullptr }; +}; + NEFloor::NEFloor() : _impl(std::make_unique<Impl>()) { @@ -35,15 +43,10 @@ NEFloor::NEFloor(NEFloor &&) = default; NEFloor &NEFloor::operator=(NEFloor &&) = default; NEFloor::~NEFloor() = default; -struct NEFloor::Impl -{ - const ITensor *src{ nullptr }; - ITensor *dst{ nullptr }; - std::unique_ptr<cpu::CpuFloor> op{ nullptr }; -}; - void NEFloor::configure(const ITensor *input, ITensor *output) { + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + _impl->src = input; _impl->dst = output; diff --git a/src/runtime/cpu/ICpuOperator.h b/src/runtime/cpu/ICpuOperator.h index bb7de1b69e..70ab4364c7 100644 --- a/src/runtime/cpu/ICpuOperator.h +++ b/src/runtime/cpu/ICpuOperator.h @@ -30,7 +30,7 @@ namespace arm_compute { namespace cpu { -using ICpuOperator = arm_compute::experimental::INEOperator; +using ICpuOperator = experimental::INEOperator; } // namespace cpu } // namespace arm_compute #endif /* ARM_COMPUTE_ICPUOPERATOR_H */ diff --git a/src/runtime/cpu/operators/CpuActivation.cpp b/src/runtime/cpu/operators/CpuActivation.cpp new file mode 100644 index 0000000000..7753c9601f --- /dev/null +++ b/src/runtime/cpu/operators/CpuActivation.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/cpu/operators/CpuActivation.h" + +#include "src/core/cpu/kernels/CpuActivationKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +void CpuActivation::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info) +{ + auto k = std::make_unique<kernels::CpuActivationKernel>(); + k->configure(input, output, activation_info); + _kernel = std::move(k); +} + +Status CpuActivation::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) +{ + return kernels::CpuActivationKernel::validate(input, output, activation_info); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuActivation.h b/src/runtime/cpu/operators/CpuActivation.h new file mode 100644 index 0000000000..25bc9036dc --- /dev/null +++ b/src/runtime/cpu/operators/CpuActivation.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_ACTIVATION_H +#define ARM_COMPUTE_CPU_ACTIVATION_H + +#include "src/runtime/cpu/ICpuOperator.h" + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to run @ref CpuActivationKernel */ +class CpuActivation : public ICpuOperator +{ +public: + /** Constructor */ + CpuActivation() = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p src + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p src + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); +}; +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */ |