diff options
63 files changed, 673 insertions, 527 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 82a6a6c324..a021bdf5e4 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,11 @@ namespace arm_compute { +namespace cpuinfo +{ +struct CpuIsaInfo; +} // namespace cpuinfo + #define ARM_COMPUTE_CPU_MODEL_LIST \ X(GENERIC) \ X(GENERIC_FP16) \ @@ -134,6 +139,11 @@ public: * @return Current thread's @ref CPUModel */ CPUModel get_cpu_model() const; + /** Gets the current cpu's ISA information + * + * @return Current cpu's ISA information + */ + cpuinfo::CpuIsaInfo get_isa() const; /** Gets the L1 cache size * * @return the size of the L1 cache diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 88cb295c44..b24955d778 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -1200,6 +1200,49 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0) return vec_size; } +/** Returns the suffix string of CPU kernel implementation names based on the given data type + * + * @param[in] data_type The data type the CPU kernel implemetation uses + * + * @return the suffix string of CPU kernel implementations + */ +inline std::string cpu_impl_dt(const DataType &data_type) +{ + std::string ret = ""; + + switch(data_type) + { + case DataType::F32: + ret = "fp32"; + break; + case DataType::F16: + ret = "fp16"; + break; + case DataType::U8: + ret = "u8"; + break; + case DataType::S16: + ret = "s16"; + break; + case DataType::S32: + ret = "s32"; + break; + case DataType::QASYMM8: + ret = "qu8"; + break; + case DataType::QASYMM8_SIGNED: + ret = "qs8"; + break; + case DataType::QSYMM16: + ret = "qs16"; + break; + default: + ARM_COMPUTE_ERROR("Unsupported."); + } + + return ret; +} + #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** Print consecutive elements to an output stream. * diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index 44cd000ada..c197932a13 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "src/common/cpuinfo/CpuInfo.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" namespace arm_compute { @@ -110,6 +111,11 @@ CPUModel CPUInfo::get_cpu_model(unsigned int cpuid) const return _impl->info.cpu_model(cpuid); } +cpuinfo::CpuIsaInfo CPUInfo::get_isa() const +{ + return _impl->info.isa(); +} + unsigned int CPUInfo::get_L1_cache_size() const { return _impl->L1_cache_size; diff --git a/src/cpu/ICpuKernel.h b/src/cpu/ICpuKernel.h index 650b3a7d0b..03aec5c08e 100644 --- a/src/cpu/ICpuKernel.h +++ b/src/cpu/ICpuKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,12 +25,50 @@ #define ARM_COMPUTE_ICPUKERNEL_H #include "arm_compute/core/CPP/ICPPKernel.h" +#include "src/cpu/kernels/CpuKernelSelectionTypes.h" namespace arm_compute { namespace cpu { +enum class KernelSelectionType +{ + Preferred, /**< Retrieve the best implementation available for the given Cpu ISA, ignoring the build flags */ + Supported /**< Retrieve the best implementation available for the given Cpu ISA that is supported by the current build */ +}; + using ICpuKernel = arm_compute::ICPPKernel; + +template <class Derived> +/* This is a temp name for stage 1 process of adding UT for multi-ISA. +In the next stage NewICpuKernel will be called ICpuKernel again */ +class NewICpuKernel : public ICPPKernel +{ +public: + /** Micro-kernel selector + * + * @param[in] selector Selection struct passed including information to help pick the appropriate micro-kernel + * @param[in] selection_type (Optional) Decides whether to get the best implementation for the given hardware or for the given build + * + * @return A matching micro-kernel else nullptr + */ + + template <typename SelectorType> + static const auto *get_implementation(const SelectorType &selector, KernelSelectionType selection_type = KernelSelectionType::Supported) + { + using kernel_type = typename std::remove_reference<decltype(Derived::get_available_kernels())>::type::value_type; + + for(const auto &uk : Derived::get_available_kernels()) + { + if(uk.is_selected(selector) && (selection_type == KernelSelectionType::Preferred || uk.ukernel != nullptr)) + { + return &uk; + } + } + + return static_cast<kernel_type *>(nullptr); + } +}; } // namespace cpu } // namespace arm_compute #endif /* ARM_COMPUTE_ICPUKERNEL_H */ diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 3af379d8af..c048b14a96 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,94 +43,60 @@ namespace kernels { namespace { -struct ActivationSelectorData +static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels = { - DataType dt; - const CPUInfo &ci; -}; - -using ActivationSelectorPtr = std::add_pointer<bool(const ActivationSelectorData &data)>::type; -using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type; - -struct ActivationKernel -{ - const char *name; - const ActivationSelectorPtr is_selected; - ActivationKernelPtr ukernel; -}; - -static const ActivationKernel available_kernels[] = -{ -#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_fp16_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; }, REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation) }, { "sve_fp32_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; }, REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_activation) }, -#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_fp16_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F16; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_activation) }, { "neon_fp32_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_activation) }, -#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ -#if defined(ARM_COMPUTE_ENABLE_SVE2) { - "sve_qu8_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, + "sve2_qu8_activation", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; }, REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_activation) }, { - "sve_qs8_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, + "sve2_qs8_activation", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve2; }, REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_activation) }, { - "sve_qs16_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); }, + "sve2_qs16_activation", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16 && data.isa.sve2; }, REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation) }, -#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ { "neon_qu8_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation) }, { "neon_qs8_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_activation) }, { "neon_qs16_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qsymm16_activation) }, }; -const ActivationKernel *get_implementation(const ActivationSelectorData &data) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected(data)) - { - return &uk; - } - } - return nullptr; -} - /* Supported activation in the 8-bit integer domain */ static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations = { @@ -155,7 +121,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() }); + const auto *uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); const DataType data_type = src->data_type(); @@ -208,7 +175,8 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, activation_info)); - const auto uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() }); + const auto uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _act_info = activation_info; @@ -269,6 +237,11 @@ const char *CpuActivationKernel::name() const { return _name.c_str(); } + +const std::vector<CpuActivationKernel::ActivationKernel> &CpuActivationKernel::get_available_kernels() +{ + return available_kernels; +} } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h index 8e78d86016..ac974850aa 100644 --- a/src/cpu/kernels/CpuActivationKernel.h +++ b/src/cpu/kernels/CpuActivationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,11 @@ namespace cpu namespace kernels { /** Interface for the activation kernel */ -class CpuActivationKernel : public ICpuKernel +class CpuActivationKernel : public NewICpuKernel<CpuActivationKernel> { +private: + using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type; + public: CpuActivationKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel); @@ -70,8 +73,14 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; -private: - using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type; + struct ActivationKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + ActivationKernelPtr ukernel; + }; + + static const std::vector<ActivationKernel> &get_available_kernels(); private: ActivationLayerInfo _act_info{}; diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp index f3ee032ec5..c27ee9f1bd 100644 --- a/src/cpu/kernels/CpuAddKernel.cpp +++ b/src/cpu/kernels/CpuAddKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,159 +41,116 @@ namespace kernels { namespace { -struct AddSelectorData +static const std::vector<CpuAddKernel::AddKernel> available_kernels = { - DataType dt; - const CPUInfo &ci; -}; - -using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type; -using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; -struct AddKernel -{ - const char *name; - const AddSelectorPtr is_selected; - AddKernelPtr ukernel; -}; - -static const AddKernel available_kernels[] = -{ -#if defined(ARM_COMPUTE_ENABLE_SVE2) { "sve2_qu8_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); + return (data.dt == DataType::QASYMM8) && data.isa.sve2; }, REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2) }, { "sve2_qs8_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); + return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2; }, REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2) }, { "sve2_qs16_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::QSYMM16) && data.ci.has_sve2(); + return (data.dt == DataType::QSYMM16) && data.isa.sve2; }, REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2) }, -#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ -#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_fp32_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::F32) && data.ci.has_sve(); + return (data.dt == DataType::F32) && data.isa.sve; }, REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve) }, { "sve_fp16_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::F16) && data.ci.has_sve(); + return (data.dt == DataType::F16) && data.isa.sve; }, REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve) }, { "sve_u8_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::U8) && data.ci.has_sve(); + return (data.dt == DataType::U8) && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve) }, { "sve_s16_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::S16) && data.ci.has_sve(); + return (data.dt == DataType::S16) && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve) }, { "sve_s32_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::S32) && data.ci.has_sve(); + return (data.dt == DataType::S32) && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve) }, -#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_fp32_add", - [](const AddSelectorData & data) { return (data.dt == DataType::F32); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); }, REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon) }, -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_add", - [](const AddSelectorData & data) + [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::F16) && data.ci.has_fp16(); + return (data.dt == DataType::F16) && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon) }, -#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_u8_add", - [](const AddSelectorData & data) { return (data.dt == DataType::U8); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); }, REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon) }, { "neon_s16_add", - [](const AddSelectorData & data) { return (data.dt == DataType::S16); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); }, REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon) }, { "neon_s32_add", - [](const AddSelectorData & data) { return (data.dt == DataType::S32); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); }, REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon) }, -#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ -#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) { "neon_qu8_add", - [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::add_qasymm8_neon) }, { "neon_qs8_add", - [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_qasymm8_signed_neon) }, { "neon_qs16_add", - [](const AddSelectorData & data) { return (data.dt == DataType::QSYMM16); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); }, REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon) - }, -#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ -}; - -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected({ dt, cpuinfo })) - { - return &uk; - } } - return nullptr; -} +}; Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy) { @@ -220,7 +177,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons "Wrong shape for dst"); } - const auto *uk = get_implementation(CPUInfo::get(), src0.data_type()); + const auto *uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -246,7 +203,8 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy)); - const auto uk = get_implementation(CPUInfo::get(), src0->data_type()); + const auto uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _policy = policy; @@ -256,7 +214,7 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I // Configure kernel window auto win_config = validate_and_configure_window(*src0, *src1, *dst); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICpuKernel::configure(win_config.second); + NewICpuKernel::configure(win_config.second); } Status CpuAddKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy) @@ -273,7 +231,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window); ARM_COMPUTE_ERROR_ON(tensors.empty()); ARM_COMPUTE_ERROR_ON(_run_method == nullptr); @@ -290,6 +248,11 @@ const char *CpuAddKernel::name() const return _name.c_str(); } +const std::vector<CpuAddKernel::AddKernel> &CpuAddKernel::get_available_kernels() +{ + return available_kernels; +} + size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const { ARM_COMPUTE_UNUSED(thread_count); @@ -298,7 +261,7 @@ size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const { return 10240; } - else if (platform.get_cpu_model() == CPUModel::A76) + else if(platform.get_cpu_model() == CPUModel::A76) { return 9216; } diff --git a/src/cpu/kernels/CpuAddKernel.h b/src/cpu/kernels/CpuAddKernel.h index a0c7e497dd..93b86de4ae 100644 --- a/src/cpu/kernels/CpuAddKernel.h +++ b/src/cpu/kernels/CpuAddKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,9 +34,19 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform addition between two tensors */ -class CpuAddKernel : public ICpuKernel +class CpuAddKernel : public NewICpuKernel<CpuAddKernel> { +private: + using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; + public: + struct AddKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + AddKernelPtr ukernel; + }; + CpuAddKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuAddKernel); /** Initialise the kernel's input, dst and border mode. @@ -79,8 +89,7 @@ public: */ size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; -private: - using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; + static const std::vector<AddKernel> &get_available_kernels(); private: ConvertPolicy _policy{}; diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h index a8ce97230e..9aeb537044 100644 --- a/src/cpu/kernels/CpuCastKernel.h +++ b/src/cpu/kernels/CpuCastKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace kernels * * @note When casting between quantized types the scale and zeroPoint are ignored */ -class CpuCastKernel : public ICpuKernel +class CpuCastKernel : public NewICpuKernel<CpuCastKernel> { public: CpuCastKernel() = default; diff --git a/src/cpu/kernels/CpuCol2ImKernel.h b/src/cpu/kernels/CpuCol2ImKernel.h index 8e09a2b689..43be476b2f 100644 --- a/src/cpu/kernels/CpuCol2ImKernel.h +++ b/src/cpu/kernels/CpuCol2ImKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,7 +52,7 @@ namespace kernels * \end{array} \right) * @f] */ -class CpuCol2ImKernel : public ICpuKernel +class CpuCol2ImKernel : public NewICpuKernel<CpuCol2ImKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuConcatenateBatchKernel.h b/src/cpu/kernels/CpuConcatenateBatchKernel.h index 91f2808f81..2b5946571b 100644 --- a/src/cpu/kernels/CpuConcatenateBatchKernel.h +++ b/src/cpu/kernels/CpuConcatenateBatchKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,7 @@ namespace kernels /** Interface for the batch concatenate kernel. * The input tensor will be concatenated into the output tensor. */ -class CpuConcatenateBatchKernel : public ICpuKernel +class CpuConcatenateBatchKernel : public NewICpuKernel<CpuConcatenateBatchKernel> { public: CpuConcatenateBatchKernel() = default; diff --git a/src/cpu/kernels/CpuConcatenateDepthKernel.h b/src/cpu/kernels/CpuConcatenateDepthKernel.h index 063118b33b..90b68d3a06 100644 --- a/src/cpu/kernels/CpuConcatenateDepthKernel.h +++ b/src/cpu/kernels/CpuConcatenateDepthKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ namespace kernels /** Interface for the depth concatenate kernel. * The input tensor will be concatenated into the output tensor. */ -class CpuConcatenateDepthKernel : public ICpuKernel +class CpuConcatenateDepthKernel : public NewICpuKernel<CpuConcatenateDepthKernel> { public: CpuConcatenateDepthKernel() = default; diff --git a/src/cpu/kernels/CpuConcatenateHeightKernel.h b/src/cpu/kernels/CpuConcatenateHeightKernel.h index 883c59a206..8ace9809cc 100644 --- a/src/cpu/kernels/CpuConcatenateHeightKernel.h +++ b/src/cpu/kernels/CpuConcatenateHeightKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,7 @@ namespace kernels /** Interface for the height concatenate kernel. * The source tensor will be concatenated into the destination tensor. */ -class CpuConcatenateHeightKernel : public ICpuKernel +class CpuConcatenateHeightKernel : public NewICpuKernel<CpuConcatenateHeightKernel> { public: CpuConcatenateHeightKernel() = default; diff --git a/src/cpu/kernels/CpuConcatenateWidthKernel.h b/src/cpu/kernels/CpuConcatenateWidthKernel.h index 3b4612ab0d..d5f2ef24d6 100644 --- a/src/cpu/kernels/CpuConcatenateWidthKernel.h +++ b/src/cpu/kernels/CpuConcatenateWidthKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace kernels /** Interface for the width concatenate kernel. * The source tensor will be concatenated into the destination tensor. */ -class CpuConcatenateWidthKernel : public ICPPKernel +class CpuConcatenateWidthKernel : public NewICpuKernel<CpuConcatenateWidthKernel> { public: CpuConcatenateWidthKernel() = default; diff --git a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h index 70f0a742f9..001a6fcab0 100644 --- a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h +++ b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ namespace kernels * * @note This function assumes the weights are already reshaped (transposed) */ -class CpuConvertFullyConnectedWeightsKernel : public ICpuKernel +class CpuConvertFullyConnectedWeightsKernel : public NewICpuKernel<CpuConvertFullyConnectedWeightsKernel> { public: CpuConvertFullyConnectedWeightsKernel() = default; diff --git a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h index 8cce1eaf1d..9d5ee39126 100644 --- a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h +++ b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Kernel to convert asymmetric signed to asymmetric signed and vice-versa */ -class CpuConvertQuantizedSignednessKernel : public ICpuKernel +class CpuConvertQuantizedSignednessKernel : public NewICpuKernel<CpuConvertQuantizedSignednessKernel> { public: CpuConvertQuantizedSignednessKernel() = default; diff --git a/src/cpu/kernels/CpuCopyKernel.h b/src/cpu/kernels/CpuCopyKernel.h index 193f38078b..ee4adeb4eb 100644 --- a/src/cpu/kernels/CpuCopyKernel.h +++ b/src/cpu/kernels/CpuCopyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Kernel to perform a copy between two tensors */ -class CpuCopyKernel : public ICpuKernel +class CpuCopyKernel : public NewICpuKernel<CpuCopyKernel> { public: CpuCopyKernel() = default; diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h index 1afb6bed4c..eae682bb6d 100644 --- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h +++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ namespace cpu namespace kernels { /** Interface for the kernel to run a depthwise convolution native on a tensor. */ -class CpuDepthwiseConv2dNativeKernel : public ICpuKernel +class CpuDepthwiseConv2dNativeKernel : public NewICpuKernel<CpuDepthwiseConv2dNativeKernel> { public: CpuDepthwiseConv2dNativeKernel() = default; diff --git a/src/cpu/kernels/CpuDequantizeKernel.h b/src/cpu/kernels/CpuDequantizeKernel.h index f515cd36f9..834c039a76 100644 --- a/src/cpu/kernels/CpuDequantizeKernel.h +++ b/src/cpu/kernels/CpuDequantizeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Interface for the dequantization layer kernel. */ -class CpuDequantizeKernel : public ICpuKernel +class CpuDequantizeKernel : public NewICpuKernel<CpuDequantizeKernel> { public: CpuDequantizeKernel() = default; diff --git a/src/cpu/kernels/CpuDirectConv2dKernel.h b/src/cpu/kernels/CpuDirectConv2dKernel.h index 3ba7f7ed5f..1f5568743e 100644 --- a/src/cpu/kernels/CpuDirectConv2dKernel.h +++ b/src/cpu/kernels/CpuDirectConv2dKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform Direct Convolution Layer. */ -class CpuDirectConv2dKernel : public ICpuKernel +class CpuDirectConv2dKernel : public NewICpuKernel<CpuDirectConv2dKernel> { public: CpuDirectConv2dKernel() = default; diff --git a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h index a68936bbae..95011f79aa 100644 --- a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h +++ b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ namespace kernels * @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part * of the @ref DirectConvolutionLayerOutputStageKernelInfo. */ -class CpuDirectConv2dOutputStageKernel : public ICpuKernel +class CpuDirectConv2dOutputStageKernel : public NewICpuKernel<CpuDirectConv2dOutputStageKernel> { public: CpuDirectConv2dOutputStageKernel() = default; diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.cpp b/src/cpu/kernels/CpuDirectConv3dKernel.cpp index 36764a625d..22c60cd994 100644 --- a/src/cpu/kernels/CpuDirectConv3dKernel.cpp +++ b/src/cpu/kernels/CpuDirectConv3dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,64 +49,32 @@ namespace kernels { namespace { -struct DirectConv3dSelectorData -{ - DataType dt; - const CPUInfo &ci; -}; -using DirectConv3dSelectorPtr = std::add_pointer<bool(const DirectConv3dSelectorData &data)>::type; -using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type; -struct DirectConv3dKernel -{ - const char *name; - const DirectConv3dSelectorPtr is_selected; - DirectConv3dKernelPtr ukernel; -}; - -static const DirectConv3dKernel available_kernels[] = +static const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> available_kernels = { #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_directconv3d", - [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float16_t>) }, #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_fp32_directconv3d", - [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float>) }, { "neon_qasymm8_directconv3d", - [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<uint8_t>) }, { "neon_qasymm8_signed_directconv3d", - [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<int8_t>) } }; -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const DirectConv3dKernel *get_implementation(const DirectConv3dSelectorData &data) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected(data)) - { - return &uk; - } - } - return nullptr; -} - Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const Conv3dInfo &conv_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); @@ -117,7 +85,8 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, src1); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation != Size3D(1U, 1U, 1U)); - const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() }); + const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); const DataLayout data_layout = src0->data_layout(); @@ -161,7 +130,8 @@ void CpuDirectConv3dKernel::configure(const ITensorInfo *src0, const ITensorInfo ARM_COMPUTE_UNUSED(src2); ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); - const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() }); + const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _conv_info = conv_info; @@ -210,6 +180,12 @@ const char *CpuDirectConv3dKernel::name() const { return _name.c_str(); } + +const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> &CpuDirectConv3dKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute
\ No newline at end of file diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.h b/src/cpu/kernels/CpuDirectConv3dKernel.h index ff3b30f8ae..6ae70bd3b7 100644 --- a/src/cpu/kernels/CpuDirectConv3dKernel.h +++ b/src/cpu/kernels/CpuDirectConv3dKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/runtime/FunctionDescriptors.h" #include "src/core/common/Macros.h" #include "src/cpu/ICpuKernel.h" + namespace arm_compute { namespace cpu @@ -34,8 +35,12 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform 3D Direct Convolution Layer. */ -class CpuDirectConv3dKernel : public ICpuKernel +class CpuDirectConv3dKernel : public NewICpuKernel<CpuDirectConv3dKernel> { +private: + /* Template function for convolution 3d NDHWC */ + using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type; + public: CpuDirectConv3dKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv3dKernel); @@ -71,14 +76,21 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; -private: - /* Template function for convolution 3d NDHWC */ - using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type; + struct DirectConv3dKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + DirectConv3dKernelPtr ukernel; + }; + + static const std::vector<DirectConv3dKernel> &get_available_kernels(); +private: Conv3dInfo _conv_info{}; DirectConv3dKernelPtr _run_method{ nullptr }; std::string _name{}; }; + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuElementwiseKernel.h b/src/cpu/kernels/CpuElementwiseKernel.h index f323fe4470..bb081cbec1 100644 --- a/src/cpu/kernels/CpuElementwiseKernel.h +++ b/src/cpu/kernels/CpuElementwiseKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,7 @@ namespace kernels * @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f] * */ -class CpuElementwiseKernel : public ICpuKernel +class CpuElementwiseKernel : public NewICpuKernel<CpuElementwiseKernel> { public: CpuElementwiseKernel() = default; diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 3573fa0815..61bc64b235 100644 --- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,77 +43,58 @@ namespace kernels { namespace { -struct ElementwiseUnarySelectorData -{ - DataType dt; - const CPUInfo &ci; -}; -using ElementwiseUnarySelector = std::add_pointer<bool(const ElementwiseUnarySelectorData &)>::type; - -struct ElementwiseUnaryKernel -{ - const char *name; - const ElementwiseUnarySelector is_selected; - CpuElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel; -}; - -static const ElementwiseUnaryKernel available_kernels[] = +static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels = { #if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_fp32_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, - REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>), + [](const DataTypeISASelectorData & data) + { + return data.dt == DataType::F32 && data.isa.sve; + }, + REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>) }, { "sve_fp16_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) + { + return (data.dt == DataType::F16) && data.isa.sve; + }, REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<__fp16>), }, { "sve_s32_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32 && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>), }, #endif // defined(ARM_COMPUTE_ENABLE_SVE) #if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_fp32_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<float>), }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<__fp16>), }, #endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_s32_elementwise_unary", - [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32; }, REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>), }, #endif // defined(ARM_COMPUTE_ENABLE_NEON) }; -const ElementwiseUnaryKernel *get_implementation(DataType dt) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected({ dt, CPUInfo::get() })) - { - return &uk; - } - } - return nullptr; -} } // namespace void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst) { ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst)); - const auto uk = get_implementation(src.data_type()); + const auto uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); _op = op; @@ -128,14 +109,15 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo auto shape_and_window = compute_output_shape_and_window(src.tensor_shape()); auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type()); - ICpuKernel::configure(shape_and_window.second); + NewICpuKernel::configure(shape_and_window.second); } Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src); - const auto *uk = get_implementation(src.data_type()); + const auto *uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); switch(op) @@ -177,6 +159,12 @@ const char *CpuElementwiseUnaryKernel::name() const { return _name.c_str(); } + +const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> &CpuElementwiseUnaryKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/cpu/kernels/CpuElementwiseUnaryKernel.h index f72eddf737..c520b89618 100644 --- a/src/cpu/kernels/CpuElementwiseUnaryKernel.h +++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,8 +39,11 @@ namespace kernels * Element-wise operation is computed by: * @f[ dst(x) = OP(src(x))@f] */ -class CpuElementwiseUnaryKernel : public ICpuKernel +class CpuElementwiseUnaryKernel : public NewICpuKernel<CpuElementwiseUnaryKernel> { +private: + using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type; + public: CpuElementwiseUnaryKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseUnaryKernel); @@ -64,11 +67,14 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; - /** Common signature for all the specialised elementwise unary micro-kernels - * - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type; + struct ElementwiseUnaryKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + ElementwiseUnaryUkernelPtr ukernel; + }; + + static const std::vector<ElementwiseUnaryKernel> &get_available_kernels(); private: ElementWiseUnary _op{}; diff --git a/src/cpu/kernels/CpuFillKernel.h b/src/cpu/kernels/CpuFillKernel.h index 3bc6a40309..5262ecc5c6 100644 --- a/src/cpu/kernels/CpuFillKernel.h +++ b/src/cpu/kernels/CpuFillKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ namespace cpu namespace kernels { /** Kernel for filling a tensor with a given constant value */ -class CpuFillKernel : public ICpuKernel +class CpuFillKernel : public NewICpuKernel<CpuFillKernel> { public: CpuFillKernel() = default; diff --git a/src/cpu/kernels/CpuFloorKernel.cpp b/src/cpu/kernels/CpuFloorKernel.cpp index bcac1a41fc..65e390a81a 100644 --- a/src/cpu/kernels/CpuFloorKernel.cpp +++ b/src/cpu/kernels/CpuFloorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,58 +42,25 @@ namespace kernels { namespace { -struct FloorSelectorData -{ - DataType dt; -}; - -using FloorSelectorPtr = std::add_pointer<bool(const FloorSelectorData &data)>::type; -using FloorUKernelPtr = std::add_pointer<void(const void *, void *, int)>::type; - -struct FloorUKernel -{ - const char *name; - const FloorSelectorPtr is_selected; - FloorUKernelPtr ukernel; -}; - -static const FloorUKernel available_kernels[] = +static const std::vector<CpuFloorKernel::FloorKernel> available_kernels = { { "neon_fp16_floor", - [](const FloorSelectorData & data) { return data.dt == DataType::F16; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_floor) }, { "neon_fp32_floor", - [](const FloorSelectorData & data) { return data.dt == DataType::F32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_floor) - }, -}; - -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const FloorUKernel *get_implementation(const FloorSelectorData &data) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected(data)) - { - return &uk; - } } - return nullptr; -} +}; Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - const auto *uk = get_implementation(FloorSelectorData{ src->data_type() }); + const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); // Validate in case of configured output @@ -114,7 +81,7 @@ void CpuFloorKernel::configure(const ITensorInfo *src, ITensorInfo *dst) auto_init_if_empty(*dst, src->tensor_shape(), 1, src->data_type()); - const auto *uk = get_implementation(FloorSelectorData{ src->data_type() }); + const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _run_method = uk->ukernel; @@ -172,6 +139,12 @@ const char *CpuFloorKernel::name() const { return _name.c_str(); } + +const std::vector<CpuFloorKernel::FloorKernel> &CpuFloorKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuFloorKernel.h b/src/cpu/kernels/CpuFloorKernel.h index ffb9658190..2b102a0515 100644 --- a/src/cpu/kernels/CpuFloorKernel.h +++ b/src/cpu/kernels/CpuFloorKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,11 @@ namespace cpu namespace kernels { /** Cpu accelarated kernel to perform a floor operation */ -class CpuFloorKernel : public ICpuKernel +class CpuFloorKernel : public NewICpuKernel<CpuFloorKernel> { +private: + using FloorKernelPtr = std::add_pointer<void(const void *, void *, int)>::type; + public: CpuFloorKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuFloorKernel); @@ -65,12 +68,18 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; -private: - using FloorUKernelPtr = std::add_pointer<void(const void *, void *, int)>::type; + struct FloorKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + FloorKernelPtr ukernel; + }; + + static const std::vector<FloorKernel> &get_available_kernels(); private: - FloorUKernelPtr _run_method{ nullptr }; - std::string _name{}; + FloorKernelPtr _run_method{ nullptr }; + std::string _name{}; }; } // namespace kernels } // namespace cpu diff --git a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h index 047776bd1e..13b46142c4 100644 --- a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h +++ b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,7 +52,7 @@ namespace kernels * * After this operation, the dst matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] */ -class CpuGemmInterleave4x4Kernel : public ICpuKernel +class CpuGemmInterleave4x4Kernel : public NewICpuKernel<CpuGemmInterleave4x4Kernel> { public: CpuGemmInterleave4x4Kernel() = default; diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h index 083ee187ef..6d06f12e54 100644 --- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h +++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ namespace kernels * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 * */ -class CpuGemmLowpMatrixMultiplyKernel : public ICpuKernel +class CpuGemmLowpMatrixMultiplyKernel : public NewICpuKernel<CpuGemmLowpMatrixMultiplyKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h index 918f8c89d9..6cced66b47 100644 --- a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h +++ b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ namespace kernels * @note This stage is needed to handle the offset of matrix product * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md */ -class CpuGemmLowpMatrixAReductionKernel : public ICpuKernel +class CpuGemmLowpMatrixAReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixAReductionKernel> { public: /** Default constructor */ @@ -98,7 +98,7 @@ private: * @note This stage is needed to handle the offset of matrix product * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md */ -class CpuGemmLowpMatrixBReductionKernel : public ICpuKernel +class CpuGemmLowpMatrixBReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixBReductionKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h index 1ec969be92..1d70c0619e 100644 --- a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h +++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,7 @@ namespace kernels * (a_offset * b_offset * k) * */ -class CpuGemmLowpOffsetContributionKernel : public ICpuKernel +class CpuGemmLowpOffsetContributionKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h index d97727dd09..13c64f4631 100644 --- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h +++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,7 +63,7 @@ namespace kernels * (a_offset * b_offset * k) */ -class CpuGemmLowpOffsetContributionOutputStageKernel : public ICpuKernel +class CpuGemmLowpOffsetContributionOutputStageKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionOutputStageKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h index ae13e760ff..f6e8c816f3 100644 --- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h +++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,7 +51,7 @@ namespace kernels * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. * */ -class CpuGemmLowpQuantizeDownInt32ScaleKernel : public ICpuKernel +class CpuGemmLowpQuantizeDownInt32ScaleKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ScaleKernel> { public: CpuGemmLowpQuantizeDownInt32ScaleKernel() = default; diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h index 53a9d34ed1..a9e2560657 100644 --- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h +++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,7 +48,7 @@ namespace kernels * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. * */ -class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICpuKernel +class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel> { public: CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default; diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h index 67829e7773..bfac8681a5 100644 --- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h +++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ namespace kernels * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. * */ -class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICpuKernel +class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel> { public: CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default; diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h index b62cac4818..5e5683cfc3 100644 --- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h +++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ namespace kernels * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. * */ -class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICpuKernel +class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel> { public: CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default; diff --git a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h index c9798fc24c..64338259e9 100644 --- a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h +++ b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ namespace kernels * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref CpuGemmMatrixMultiplyKernel * - MTX_1 = C */ -class CpuGemmMatrixAdditionKernel : public ICpuKernel +class CpuGemmMatrixAdditionKernel : public NewICpuKernel<CpuGemmMatrixAdditionKernel> { public: CpuGemmMatrixAdditionKernel() = default; diff --git a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h index 0b4e01579c..757b46e9a7 100644 --- a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h +++ b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,7 @@ namespace kernels * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p lhs is a vector and the second input tensor @p rhs a matrix. The implementation also assumes that both tensors have not been reshaped * */ -class CpuGemmMatrixMultiplyKernel : public ICpuKernel +class CpuGemmMatrixMultiplyKernel : public NewICpuKernel<CpuGemmMatrixMultiplyKernel> { public: CpuGemmMatrixMultiplyKernel() = default; diff --git a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h index de920b5ed7..2acda35947 100644 --- a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h +++ b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,7 @@ namespace kernels * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) * */ -class CpuGemmTranspose1xWKernel : public ICpuKernel +class CpuGemmTranspose1xWKernel : public NewICpuKernel<CpuGemmTranspose1xWKernel> { public: CpuGemmTranspose1xWKernel() = default; diff --git a/src/cpu/kernels/CpuIm2ColKernel.h b/src/cpu/kernels/CpuIm2ColKernel.h index 797d54c95c..d789adef95 100644 --- a/src/cpu/kernels/CpuIm2ColKernel.h +++ b/src/cpu/kernels/CpuIm2ColKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ namespace kernels * \end{array} \right) * @f] */ -class CpuIm2ColKernel : public ICpuKernel +class CpuIm2ColKernel : public NewICpuKernel<CpuIm2ColKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuKernelSelectionTypes.h b/src/cpu/kernels/CpuKernelSelectionTypes.h new file mode 100644 index 0000000000..779fb86e6d --- /dev/null +++ b/src/cpu/kernels/CpuKernelSelectionTypes.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H +#define ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H + +#include "arm_compute/core/Types.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +// Selector data types +struct DataTypeISASelectorData +{ + DataType dt; + const cpuinfo::CpuIsaInfo &isa; +}; + +struct PoolDataTypeISASelectorData +{ + DataType dt; + DataLayout dl; + int pool_stride_x; + Size2D pool_size; + const cpuinfo::CpuIsaInfo &isa; +}; + +// Selector pointer types +using DataTypeISASelectorPtr = std::add_pointer<bool(const DataTypeISASelectorData &data)>::type; +using PoolDataTypeISASelectorPtr = std::add_pointer<bool(const PoolDataTypeISASelectorData &data)>::type; + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute + +#endif // ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
\ No newline at end of file diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index b65ec20044..3ab198510f 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform multiplication between two tensors */ -class CpuMulKernel : public ICpuKernel +class CpuMulKernel : public NewICpuKernel<CpuMulKernel> { public: CpuMulKernel() = default; @@ -118,7 +118,7 @@ private: }; /** Interface for the complex pixelwise multiplication kernel. */ -class CpuComplexMulKernel : public ICpuKernel +class CpuComplexMulKernel : public NewICpuKernel<CpuComplexMulKernel> { public: CpuComplexMulKernel() = default; diff --git a/src/cpu/kernels/CpuPermuteKernel.h b/src/cpu/kernels/CpuPermuteKernel.h index 1b2672b5b9..aae28582b1 100644 --- a/src/cpu/kernels/CpuPermuteKernel.h +++ b/src/cpu/kernels/CpuPermuteKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Kernel to perform tensor permutation given a permutation vector */ -class CpuPermuteKernel : public ICpuKernel +class CpuPermuteKernel : public NewICpuKernel<CpuPermuteKernel> { public: CpuPermuteKernel() = default; diff --git a/src/cpu/kernels/CpuPool2dKernel.cpp b/src/cpu/kernels/CpuPool2dKernel.cpp index f61cd0835d..953a9ffb67 100644 --- a/src/cpu/kernels/CpuPool2dKernel.cpp +++ b/src/cpu/kernels/CpuPool2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,136 +52,101 @@ namespace { using namespace misc::shape_calculator; -struct PoolingSelectorData -{ - DataType dt; - DataLayout dl; - int pool_stride_x; - Size2D pool_size; -}; - -using PoolingSelectorPtr = std::add_pointer<bool(const PoolingSelectorData &data)>::type; -using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type; -struct PoolingKernel -{ - const char *name; - const PoolingSelectorPtr is_selected; - PoolingKernelPtr ukernel; -}; - -static const PoolingKernel available_kernels[] = +static const std::vector<CpuPool2dKernel::PoolingKernel> available_kernels = { { "neon_qu8_nhwc_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_qasymm8_neon_nhwc) }, { "neon_qs8_nhwc_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_qasymm8_signed_neon_nhwc) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_f16_nhwc_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); }, REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nhwc) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_fp32_nhwc_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); }, REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nhwc) }, #if defined(ENABLE_NCHW_KERNELS) { "neon_qu8_nchw_pool2", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<uint8_t>) }, { "neon_qu8_nchw_pool3", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<uint8_t>) }, { "neon_qu8_nchw_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<uint8_t>) }, { "neon_qs8_nchw_pool2", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<int8_t>) }, { "neon_qs8_nchw_pool3", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<int8_t>) }, { "neon_qs8_nchw_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<int8_t>) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_nchw_pool2", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, REGISTER_FP16_NEON(arm_compute::cpu::pooling2_fp16_neon_nchw) }, { "neon_fp16_nchw_pool3", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, REGISTER_FP16_NEON(arm_compute::cpu::pooling3_fp16_neon_nchw) }, { "neon_fp16_nchw_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16)); }, REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nchw) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_fp32_nchw_pool2", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling2_fp32_neon_nchw) }, { "neon_fp32_nchw_pool3", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling3_fp32_neon_nchw) }, { "neon_fp32_nchw_pool7", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling7_fp32_neon_nchw) }, { "neon_fp32_nchw_poolMxN", - [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); }, + [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); }, REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nchw) }, #endif /* defined(ENABLE_NCHW_KERNELS) */ }; -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const PoolingKernel *get_implementation(DataType dt, DataLayout dl, int pool_stride_x, Size2D pool_size) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected({ dt, dl, pool_stride_x, pool_size })) - { - return &uk; - } - } - return nullptr; -} - Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices, Size2D pool_size) { @@ -235,7 +200,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const } } - const auto *uk = get_implementation(src->data_type(), src->data_layout(), pool_stride_x, pool_size); + const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), pool_stride_x, pool_size, CPUInfo::get().get_isa() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -335,7 +300,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, indices, pool_size)); - const auto *uk = get_implementation(src->data_type(), src->data_layout(), pad_stride_info.stride().first, pool_size); + const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), (int)pad_stride_info.stride().first, pool_size, CPUInfo::get().get_isa() }); ARM_COMPUTE_ERROR_ON(uk == nullptr); // Set instance variables @@ -350,7 +315,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin { // Configure kernel window Window win = calculate_max_window(*dst, Steps()); - ICpuKernel::configure(win); + NewICpuKernel::configure(win); } else { @@ -358,7 +323,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration, pool_size.x(), pool_size.y()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICpuKernel::configure(win_config.second); + NewICpuKernel::configure(win_config.second); } } @@ -391,7 +356,7 @@ void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const T { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window); ARM_COMPUTE_ERROR_ON(_run_method == nullptr); const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); @@ -447,6 +412,12 @@ const char *CpuPool2dKernel::name() const { return _name.c_str(); } + +const std::vector<CpuPool2dKernel::PoolingKernel> &CpuPool2dKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuPool2dKernel.h b/src/cpu/kernels/CpuPool2dKernel.h index aedeb7fbe9..7fd3247d6d 100644 --- a/src/cpu/kernels/CpuPool2dKernel.h +++ b/src/cpu/kernels/CpuPool2dKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,11 @@ namespace cpu namespace kernels { /** Interface for the pooling layer kernel */ -class CpuPool2dKernel : public ICpuKernel +class CpuPool2dKernel : public NewICpuKernel<CpuPool2dKernel> { +private: + using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type; + public: CpuPool2dKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel); @@ -62,8 +65,14 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; -private: - using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type; + struct PoolingKernel + { + const char *name; + const PoolDataTypeISASelectorPtr is_selected; + PoolingKernelPtr ukernel; + }; + + static const std::vector<PoolingKernel> &get_available_kernels(); private: PoolingLayerInfo _pool_info{}; diff --git a/src/cpu/kernels/CpuQuantizeKernel.h b/src/cpu/kernels/CpuQuantizeKernel.h index eb0814926d..709e1c89c7 100644 --- a/src/cpu/kernels/CpuQuantizeKernel.h +++ b/src/cpu/kernels/CpuQuantizeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace kernels * * @note The implementation supports only 3D input tensors */ -class CpuQuantizeKernel : public ICpuKernel +class CpuQuantizeKernel : public NewICpuKernel<CpuQuantizeKernel> { public: CpuQuantizeKernel() = default; diff --git a/src/cpu/kernels/CpuReshapeKernel.h b/src/cpu/kernels/CpuReshapeKernel.h index d4e2b44b54..6a5c528ecd 100644 --- a/src/cpu/kernels/CpuReshapeKernel.h +++ b/src/cpu/kernels/CpuReshapeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform tensor reshaping */ -class CpuReshapeKernel : public ICpuKernel +class CpuReshapeKernel : public NewICpuKernel<CpuReshapeKernel> { public: CpuReshapeKernel() = default; diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp index 3063d8f682..60564a97dd 100644 --- a/src/cpu/kernels/CpuScaleKernel.cpp +++ b/src/cpu/kernels/CpuScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,52 +48,37 @@ namespace kernels { namespace { -struct ScaleSelectorData -{ - DataType dt; - const CPUInfo &ci; -}; -using ScaleSelectorPtr = std::add_pointer<bool(const ScaleSelectorData &data)>::type; -using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, - InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type; -struct ScaleKernel -{ - const char *name; - const ScaleSelectorPtr is_selected; - ScaleKernelPtr ukernel; -}; - -static const ScaleKernel available_kernels[] = +static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels = { #if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_fp16_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; }, REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale) }, { "sve_fp32_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; }, REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale) }, { "sve_qu8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; }, REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale) }, { "sve_qs8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale) }, { "sve_u8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale) }, { "sve_s16_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; }, REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale) }, #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ @@ -101,60 +86,43 @@ static const ScaleKernel available_kernels[] = #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>) }, #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_fp32_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F32; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::common_neon_scale<float>) }, { "neon_qu8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::qasymm8_neon_scale) }, { "neon_qs8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::qasymm8_signed_neon_scale) }, { "neon_u8_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::U8; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8; }, REGISTER_INTEGER_NEON(arm_compute::cpu::u8_neon_scale) }, { "neon_s16_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::S16; }, + [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16; }, REGISTER_INTEGER_NEON(arm_compute::cpu::s16_neon_scale) }, #endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const ScaleKernel *get_implementation(const ScaleSelectorData &data) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected(data)) - { - return &uk; - } - } - return nullptr; -} - Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info) { - const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() }); + const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); @@ -212,7 +180,7 @@ void CpuScaleKernel::configure(const ITensorInfo *src, const ITensorInfo *dx, co dst, info)); - const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() }); + const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _run_method = uk->ukernel; @@ -618,6 +586,12 @@ const char *CpuScaleKernel::name() const { return _name.c_str(); } + +const std::vector<CpuScaleKernel::ScaleKernel> &CpuScaleKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuScaleKernel.h b/src/cpu/kernels/CpuScaleKernel.h index 913b5a5593..94bbdb72a0 100644 --- a/src/cpu/kernels/CpuScaleKernel.h +++ b/src/cpu/kernels/CpuScaleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,14 @@ namespace cpu namespace kernels { /** Arm(R) Neon(TM) kernel to perform scaling on a tensor */ -class CpuScaleKernel : public ICpuKernel +class CpuScaleKernel : public NewICpuKernel<CpuScaleKernel> { +private: + /** Scale function to use for the particular function to use */ + using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window); + using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, + InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type; + public: CpuScaleKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuScaleKernel); @@ -67,6 +73,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + struct ScaleKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + ScaleKernelPtr ukernel; + }; + + static const std::vector<ScaleKernel> &get_available_kernels(); + private: #ifdef ENABLE_NCHW_KERNELS /** function to perform scale using area interpolation on the given window @@ -87,11 +102,6 @@ private: void scale_nearest_nchw(const ITensor *src, ITensor *dst, const ITensor *dx, const ITensor *dy, const ITensor *offsets, const Window &window); #endif // ENABLE_NCHW_KERNELS - /** Scale function to use for the particular function to use */ - using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window); - using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, - InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type; - ScaleFunctionPtr _func{ nullptr }; InterpolationPolicy _policy{}; BorderMode _border_mode{}; diff --git a/src/cpu/kernels/CpuSoftmaxKernel.h b/src/cpu/kernels/CpuSoftmaxKernel.h index 8073a677d9..f317662620 100644 --- a/src/cpu/kernels/CpuSoftmaxKernel.h +++ b/src/cpu/kernels/CpuSoftmaxKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Interface for the identifying the max value of 1D Logits */ -class CpuLogits1DMaxKernel : public ICpuKernel +class CpuLogits1DMaxKernel : public NewICpuKernel<CpuLogits1DMaxKernel> { public: CpuLogits1DMaxKernel() = default; @@ -67,7 +67,7 @@ private: /** Interface for softmax computation for QASYMM8 with pre-computed max. */ template <bool IS_LOG = false> -class CpuLogits1DSoftmaxKernel : public ICpuKernel +class CpuLogits1DSoftmaxKernel : public NewICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>> { public: CpuLogits1DSoftmaxKernel() = default; diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp index ec65f12dfc..c12feb4331 100644 --- a/src/cpu/kernels/CpuSubKernel.cpp +++ b/src/cpu/kernels/CpuSubKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,85 +39,52 @@ namespace kernels { namespace { -struct SubSelectorData -{ - DataType dt; -}; - -using SubSelectorPtr = std::add_pointer<bool(const SubSelectorData &data)>::type; -using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; - -struct SubKernel -{ - const char *name; - const SubSelectorPtr is_selected; - SubKernelPtr ukernel; -}; - -static const SubKernel available_kernels[] = +static const std::vector<CpuSubKernel::SubKernel> available_kernels = { { "neon_fp32_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::F32); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); }, REGISTER_FP32_NEON(arm_compute::cpu::sub_same_neon<float>) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "neon_fp16_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::F16); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16) && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::sub_same_neon<float16_t>) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_u8_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::U8); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); }, REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<uint8_t>) }, { "neon_s16_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::S16); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); }, REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int16_t>) }, { "neon_s32_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::S32); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); }, REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int32_t>) }, { "neon_qu8_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::sub_qasymm8_neon) }, { "neon_qs8_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::sub_qasymm8_signed_neon) }, { "neon_qs16_sub", - [](const SubSelectorData & data) { return (data.dt == DataType::QSYMM16); }, + [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); }, REGISTER_QSYMM16_NEON(arm_compute::cpu::sub_qsymm16_neon) }, }; -/** Micro-kernel selector - * - * @param[in] data Selection data passed to help pick the appropriate micro-kernel - * - * @return A matching micro-kernel else nullptr - */ -const SubKernel *get_implementation(DataType dt) -{ - for(const auto &uk : available_kernels) - { - if(uk.is_selected({ dt })) - { - return &uk; - } - } - return nullptr; -} - inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy) { ARM_COMPUTE_UNUSED(policy); @@ -126,7 +93,8 @@ inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1); - const auto *uk = get_implementation(src0.data_type()); + const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape()); @@ -157,7 +125,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I set_shape_if_empty(*dst, out_shape); set_data_type_if_unknown(*dst, src0->data_type()); - const auto *uk = get_implementation(src0->data_type()); + const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() }); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); _policy = policy; @@ -196,6 +164,12 @@ const char *CpuSubKernel::name() const { return _name.c_str(); } + +const std::vector<CpuSubKernel::SubKernel> &CpuSubKernel::get_available_kernels() +{ + return available_kernels; +} + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h index 80d6be68b5..70f34b1b57 100644 --- a/src/cpu/kernels/CpuSubKernel.h +++ b/src/cpu/kernels/CpuSubKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,11 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform subtraction between two tensors */ -class CpuSubKernel : public ICpuKernel +class CpuSubKernel : public NewICpuKernel<CpuSubKernel> { +private: + using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; + public: CpuSubKernel() = default; ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuSubKernel); @@ -70,8 +73,14 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; -private: - using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type; + struct SubKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + SubKernelPtr ukernel; + }; + + static const std::vector<SubKernel> &get_available_kernels(); private: ConvertPolicy _policy{}; diff --git a/src/cpu/kernels/CpuTransposeKernel.h b/src/cpu/kernels/CpuTransposeKernel.h index 6805eac642..7e1ee5f73d 100644 --- a/src/cpu/kernels/CpuTransposeKernel.h +++ b/src/cpu/kernels/CpuTransposeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu namespace kernels { /** Kernel which transposes the elements of a matrix */ -class CpuTransposeKernel : public ICpuKernel +class CpuTransposeKernel : public NewICpuKernel<CpuTransposeKernel> { public: CpuTransposeKernel() = default; diff --git a/src/cpu/kernels/CpuWeightsReshapeKernel.h b/src/cpu/kernels/CpuWeightsReshapeKernel.h index c80bf3b25e..6c2d7ef6f9 100644 --- a/src/cpu/kernels/CpuWeightsReshapeKernel.h +++ b/src/cpu/kernels/CpuWeightsReshapeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,7 +56,7 @@ namespace kernels * \end{array} \right) * @f] */ -class CpuWeightsReshapeKernel : public ICpuKernel +class CpuWeightsReshapeKernel : public NewICpuKernel<CpuWeightsReshapeKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/CpuWinogradConv2dKernel.h b/src/cpu/kernels/CpuWinogradConv2dKernel.h index db2d8acfdb..0c4e28c394 100644 --- a/src/cpu/kernels/CpuWinogradConv2dKernel.h +++ b/src/cpu/kernels/CpuWinogradConv2dKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ namespace arm_compute namespace cpu { /** Interface for the kernel to perform Winograd input transform. */ -class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel +class ICpuWinogradConv2dTransformInputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformInputKernel> { public: /** Get the working space required to perform the transformation. @@ -216,7 +216,7 @@ private: }; /** Interface for the kernel to perform Winograd output transform. */ -class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel +class ICpuWinogradConv2dTransformOutputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformOutputKernel> { public: /** Get the working space required to perform the transformation. @@ -418,7 +418,7 @@ private: }; /** Interface for the kernel to perform Winograd weights transform. */ -class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel +class ICpuWinogradConv2dTransformWeightsKernel : public NewICpuKernel<ICpuWinogradConv2dTransformWeightsKernel> { public: /** Prevent instances of this class from being copied (As this class contains pointers) */ diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h index 8980922945..902e9616d1 100644 --- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h +++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "src/core/common/Macros.h" #include "src/cpu/ICpuKernel.h" +#include "src/cpu/kernels/CpuKernelSelectionTypes.h" namespace arm_conv { @@ -44,7 +45,7 @@ namespace cpu namespace kernels { /** This class is a wrapper for the depthwise convolution assembly kernels. */ -class CpuDepthwiseConv2dAssemblyWrapperKernel final : public ICpuKernel +class CpuDepthwiseConv2dAssemblyWrapperKernel final : public NewICpuKernel<CpuDepthwiseConv2dAssemblyWrapperKernel> { public: /** Default constructor */ diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h index 8625fd96b4..daa3168beb 100644 --- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h +++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ #include "src/core/NEON/kernels/assembly/pooling.hpp" #include "src/core/common/Macros.h" #include "src/cpu/ICpuKernel.h" +#include "src/cpu/kernels/CpuKernelSelectionTypes.h" #include "pool_common.hpp" @@ -45,7 +46,7 @@ namespace kernels * execute a single assembly kernel in the context of an NEFunction. * */ -class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel +class CpuPool2dAssemblyWrapperKernel final : public NewICpuKernel<CpuPool2dAssemblyWrapperKernel> { public: /** Constructor diff --git a/support/StringSupport.h b/support/StringSupport.h index 5e237c7dff..e8b3ca7ab3 100644 --- a/support/StringSupport.h +++ b/support/StringSupport.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -131,6 +131,12 @@ inline std::string to_string(T && value) return stream.str(); } +// Specialization for const std::string& +inline std::string to_string(const std::string &value) +{ + return value; +} + /** Convert string values to float. * * @note This function implements the same behaviour as std::stof. The latter @@ -164,6 +170,12 @@ inline std::string to_string(T &&value) return ::std::to_string(std::forward<T>(value)); } +// Specialization for const std::string& +inline std::string to_string(const std::string &value) +{ + return value; +} + /** Convert string values to float. * * @note This function acts as a convenience wrapper around std::stof. The diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index 69fe9053d8..8d70ca5415 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,8 @@ #include "arm_compute/runtime/RuntimeContext.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuActivationKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ActivationFunctionsDataset.h" @@ -279,6 +281,43 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + }))), + combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuActivationKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index f3e4dfc6e5..c72e082a74 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuAddKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ConvertPolicyDataset.h" @@ -85,6 +87,49 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( ConvertPolicy::WRAP); ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); } + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::U8, + DataType::S16, + DataType::S32, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::U8, + DataType::S16, + DataType::S32 + }))), + combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuAddKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp index 419ce56e44..d3bd3e0b1c 100644 --- a/tests/validation/NEON/Floor.cpp +++ b/tests/validation/NEON/Floor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #include "arm_compute/runtime/NEON/functions/NEFloor.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuFloorKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ShapeDatasets.h" @@ -62,6 +64,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( const Status status = NEFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)); ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } + + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + })), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuFloorKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_floor"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* |