Enable kernel selection testing (Phase #1)

Change-Id: I1d65fb9d3a7583cf8d4163ca7c0fbee27dc52633 Signed-off-by: Yair Schwarzbaum <yair.schwarzbaum@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6767 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2021-11-18 18:02:13 +0000
committer: Yair Schwarzbaum <yair.schwarzbaum@arm.com> 2022-01-12 06:52:04 +0000
commit: 5ae8d804d67f57fbfa793800ddcc21a5aff954dd (patch)
tree: 1defbe7f788645f6f0fb4c3f79be6c4b8ecfb709
parent: 3475ffe40b7db99c782cbaf351aa7b4e341562ef (diff)
download: ComputeLibrary-5ae8d804d67f57fbfa793800ddcc21a5aff954dd.tar.gz
63 files changed, 673 insertions, 527 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 82a6a6c324..a021bdf5e4 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,11 @@
 
 namespace arm_compute
 {
+namespace cpuinfo
+{
+struct CpuIsaInfo;
+} // namespace cpuinfo
+
 #define ARM_COMPUTE_CPU_MODEL_LIST \
     X(GENERIC)                     \
     X(GENERIC_FP16)                \
@@ -134,6 +139,11 @@ public:
      * @return Current thread's @ref CPUModel
      */
     CPUModel get_cpu_model() const;
+    /** Gets the current cpu's ISA information
+     *
+     * @return Current cpu's ISA information
+     */
+    cpuinfo::CpuIsaInfo get_isa() const;
     /** Gets the L1 cache size
      *
      * @return the size of the L1 cache
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 88cb295c44..b24955d778 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -1200,6 +1200,49 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
     return vec_size;
 }
 
+/** Returns the suffix string of CPU kernel implementation names based on the given data type
+ *
+ * @param[in] data_type The data type the CPU kernel implemetation uses
+ *
+ * @return the suffix string of CPU kernel implementations
+ */
+inline std::string cpu_impl_dt(const DataType &data_type)
+{
+    std::string ret = "";
+
+    switch(data_type)
+    {
+        case DataType::F32:
+            ret = "fp32";
+            break;
+        case DataType::F16:
+            ret = "fp16";
+            break;
+        case DataType::U8:
+            ret = "u8";
+            break;
+        case DataType::S16:
+            ret = "s16";
+            break;
+        case DataType::S32:
+            ret = "s32";
+            break;
+        case DataType::QASYMM8:
+            ret = "qu8";
+            break;
+        case DataType::QASYMM8_SIGNED:
+            ret = "qs8";
+            break;
+        case DataType::QSYMM16:
+            ret = "qs16";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported.");
+    }
+
+    return ret;
+}
+
 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
 /** Print consecutive elements to an output stream.
  *
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index 44cd000ada..c197932a13 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Error.h"
 #include "src/common/cpuinfo/CpuInfo.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
 
 namespace arm_compute
 {
@@ -110,6 +111,11 @@ CPUModel CPUInfo::get_cpu_model(unsigned int cpuid) const
     return _impl->info.cpu_model(cpuid);
 }
 
+cpuinfo::CpuIsaInfo CPUInfo::get_isa() const
+{
+    return _impl->info.isa();
+}
+
 unsigned int CPUInfo::get_L1_cache_size() const
 {
     return _impl->L1_cache_size;
diff --git a/src/cpu/ICpuKernel.h b/src/cpu/ICpuKernel.h
index 650b3a7d0b..03aec5c08e 100644
--- a/src/cpu/ICpuKernel.h
+++ b/src/cpu/ICpuKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,12 +25,50 @@
 #define ARM_COMPUTE_ICPUKERNEL_H
 
 #include "arm_compute/core/CPP/ICPPKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
+enum class KernelSelectionType
+{
+    Preferred, /**< Retrieve the best implementation available for the given Cpu ISA, ignoring the build flags */
+    Supported  /**< Retrieve the best implementation available for the given Cpu ISA that is supported by the current build */
+};
+
 using ICpuKernel = arm_compute::ICPPKernel;
+
+template <class Derived>
+/* This is a temp name for stage 1 process of adding UT for multi-ISA.
+In the next stage NewICpuKernel will be called ICpuKernel again */
+class NewICpuKernel : public ICPPKernel
+{
+public:
+    /** Micro-kernel selector
+     *
+     * @param[in] selector       Selection struct passed including information to help pick the appropriate micro-kernel
+     * @param[in] selection_type (Optional) Decides whether to get the best implementation for the given hardware or for the given build
+     *
+     * @return A matching micro-kernel else nullptr
+     */
+
+    template <typename SelectorType>
+    static const auto *get_implementation(const SelectorType &selector, KernelSelectionType selection_type = KernelSelectionType::Supported)
+    {
+        using kernel_type = typename std::remove_reference<decltype(Derived::get_available_kernels())>::type::value_type;
+
+        for(const auto &uk : Derived::get_available_kernels())
+        {
+            if(uk.is_selected(selector) && (selection_type == KernelSelectionType::Preferred || uk.ukernel != nullptr))
+            {
+                return &uk;
+            }
+        }
+
+        return static_cast<kernel_type *>(nullptr);
+    }
+};
 } // namespace cpu
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_ICPUKERNEL_H */
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 3af379d8af..c048b14a96 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,94 +43,60 @@ namespace kernels
 {
 namespace
 {
-struct ActivationSelectorData
+static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels =
 {
-    DataType       dt;
-    const CPUInfo &ci;
-};
-
-using ActivationSelectorPtr = std::add_pointer<bool(const ActivationSelectorData &data)>::type;
-using ActivationKernelPtr   = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
-
-struct ActivationKernel
-{
-    const char                 *name;
-    const ActivationSelectorPtr is_selected;
-    ActivationKernelPtr         ukernel;
-};
-
-static const ActivationKernel available_kernels[] =
-{
-#if defined(ARM_COMPUTE_ENABLE_SVE)
     {
         "sve_fp16_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; },
         REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation)
     },
     {
         "sve_fp32_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
         REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_activation)
     },
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE)  */
-#if defined(ARM_COMPUTE_ENABLE_NEON)
     {
         "neon_fp16_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_activation)
     },
     {
         "neon_fp32_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_activation)
     },
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON)  */
-#if defined(ARM_COMPUTE_ENABLE_SVE2)
     {
-        "sve_qu8_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); },
+        "sve2_qu8_activation",
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; },
         REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_activation)
     },
     {
-        "sve_qs8_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); },
+        "sve2_qs8_activation",
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve2; },
         REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_activation)
     },
     {
-        "sve_qs16_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); },
+        "sve2_qs16_activation",
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16 && data.isa.sve2; },
         REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)
     },
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
     {
         "neon_qu8_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation)
     },
     {
         "neon_qs8_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_activation)
     },
     {
         "neon_qs16_activation",
-        [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16; },
         REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qsymm16_activation)
     },
 };
 
-const ActivationKernel *get_implementation(const ActivationSelectorData &data)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected(data))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
-
 /* Supported activation in the 8-bit integer domain */
 static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations =
 {
@@ -155,7 +121,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
 
-    const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() });
+    const auto *uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     const DataType                                data_type = src->data_type();
@@ -208,7 +175,8 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, activation_info));
 
-    const auto uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() });
+    const auto uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _act_info   = activation_info;
@@ -269,6 +237,11 @@ const char *CpuActivationKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuActivationKernel::ActivationKernel> &CpuActivationKernel::get_available_kernels()
+{
+    return available_kernels;
+}
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h
index 8e78d86016..ac974850aa 100644
--- a/src/cpu/kernels/CpuActivationKernel.h
+++ b/src/cpu/kernels/CpuActivationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,8 +34,11 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the activation kernel */
-class CpuActivationKernel : public ICpuKernel
+class CpuActivationKernel : public NewICpuKernel<CpuActivationKernel>
 {
+private:
+    using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
+
 public:
     CpuActivationKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel);
@@ -70,8 +73,14 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-private:
-    using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
+    struct ActivationKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        ActivationKernelPtr          ukernel;
+    };
+
+    static const std::vector<ActivationKernel> &get_available_kernels();
 
 private:
     ActivationLayerInfo _act_info{};
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index f3ee032ec5..c27ee9f1bd 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,159 +41,116 @@ namespace kernels
 {
 namespace
 {
-struct AddSelectorData
+static const std::vector<CpuAddKernel::AddKernel> available_kernels =
 {
-    DataType       dt;
-    const CPUInfo &ci;
-};
-
-using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type;
-using AddKernelPtr   = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
-struct AddKernel
-{
-    const char          *name;
-    const AddSelectorPtr is_selected;
-    AddKernelPtr         ukernel;
-};
-
-static const AddKernel available_kernels[] =
-{
-#if defined(ARM_COMPUTE_ENABLE_SVE2)
     {
         "sve2_qu8_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
+            return (data.dt == DataType::QASYMM8) && data.isa.sve2;
         },
         REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2)
     },
     {
         "sve2_qs8_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
+            return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2;
         },
         REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2)
     },
     {
         "sve2_qs16_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
+            return (data.dt == DataType::QSYMM16) && data.isa.sve2;
         },
         REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2)
     },
-#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
-#if defined(ARM_COMPUTE_ENABLE_SVE)
     {
         "sve_fp32_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::F32) && data.ci.has_sve();
+            return (data.dt == DataType::F32) && data.isa.sve;
         },
         REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve)
     },
     {
         "sve_fp16_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::F16) && data.ci.has_sve();
+            return (data.dt == DataType::F16) && data.isa.sve;
         },
         REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve)
     },
     {
         "sve_u8_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::U8) && data.ci.has_sve();
+            return (data.dt == DataType::U8) && data.isa.sve;
         },
         REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve)
     },
     {
         "sve_s16_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::S16) && data.ci.has_sve();
+            return (data.dt == DataType::S16) && data.isa.sve;
         },
         REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve)
     },
     {
         "sve_s32_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::S32) && data.ci.has_sve();
+            return (data.dt == DataType::S32) && data.isa.sve;
         },
         REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve)
     },
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ARM_COMPUTE_ENABLE_NEON)
     {
         "neon_fp32_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::F32); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
         REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon)
     },
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_add",
-        [](const AddSelectorData & data)
+        [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::F16) && data.ci.has_fp16();
+            return (data.dt == DataType::F16) && data.isa.fp16;
         },
         REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon)
     },
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_u8_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::U8); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon)
     },
     {
         "neon_s16_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::S16); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon)
     },
     {
         "neon_s32_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::S32); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon)
     },
-#endif /*  defined(ARM_COMPUTE_ENABLE_NEON) */
-#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
     {
         "neon_qu8_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::add_qasymm8_neon)
     },
     {
         "neon_qs8_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_qasymm8_signed_neon)
     },
     {
         "neon_qs16_add",
-        [](const AddSelectorData & data) { return (data.dt == DataType::QSYMM16); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); },
         REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon)
-    },
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
-};
-
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected({ dt, cpuinfo }))
-        {
-            return &uk;
-        }
     }
-    return nullptr;
-}
+};
 
 Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
 {
@@ -220,7 +177,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons
                                         "Wrong shape for dst");
     }
 
-    const auto *uk = get_implementation(CPUInfo::get(), src0.data_type());
+    const auto *uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     return Status{};
@@ -246,7 +203,8 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
     ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
 
-    const auto uk = get_implementation(CPUInfo::get(), src0->data_type());
+    const auto uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _policy     = policy;
@@ -256,7 +214,7 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
     // Configure kernel window
     auto win_config = validate_and_configure_window(*src0, *src1, *dst);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICpuKernel::configure(win_config.second);
+    NewICpuKernel::configure(win_config.second);
 }
 
 Status CpuAddKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
@@ -273,7 +231,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
 
     ARM_COMPUTE_ERROR_ON(tensors.empty());
     ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
@@ -290,6 +248,11 @@ const char *CpuAddKernel::name() const
     return _name.c_str();
 }
 
+const std::vector<CpuAddKernel::AddKernel> &CpuAddKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
 {
     ARM_COMPUTE_UNUSED(thread_count);
@@ -298,7 +261,7 @@ size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
     {
         return 10240;
     }
-    else if (platform.get_cpu_model() == CPUModel::A76)
+    else if(platform.get_cpu_model() == CPUModel::A76)
     {
         return 9216;
     }
diff --git a/src/cpu/kernels/CpuAddKernel.h b/src/cpu/kernels/CpuAddKernel.h
index a0c7e497dd..93b86de4ae 100644
--- a/src/cpu/kernels/CpuAddKernel.h
+++ b/src/cpu/kernels/CpuAddKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,9 +34,19 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform addition between two tensors */
-class CpuAddKernel : public ICpuKernel
+class CpuAddKernel : public NewICpuKernel<CpuAddKernel>
 {
+private:
+    using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+
 public:
+    struct AddKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        AddKernelPtr                 ukernel;
+    };
+
     CpuAddKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuAddKernel);
     /** Initialise the kernel's input, dst and border mode.
@@ -79,8 +89,7 @@ public:
      */
     size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
 
-private:
-    using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+    static const std::vector<AddKernel> &get_available_kernels();
 
 private:
     ConvertPolicy _policy{};
diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h
index a8ce97230e..9aeb537044 100644
--- a/src/cpu/kernels/CpuCastKernel.h
+++ b/src/cpu/kernels/CpuCastKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@ namespace kernels
  *
  * @note When casting between quantized types the scale and zeroPoint are ignored
  */
-class CpuCastKernel : public ICpuKernel
+class CpuCastKernel : public NewICpuKernel<CpuCastKernel>
 {
 public:
     CpuCastKernel() = default;
diff --git a/src/cpu/kernels/CpuCol2ImKernel.h b/src/cpu/kernels/CpuCol2ImKernel.h
index 8e09a2b689..43be476b2f 100644
--- a/src/cpu/kernels/CpuCol2ImKernel.h
+++ b/src/cpu/kernels/CpuCol2ImKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,7 +52,7 @@ namespace kernels
  * \end{array} \right)
  * @f]
  */
-class CpuCol2ImKernel : public ICpuKernel
+class CpuCol2ImKernel : public NewICpuKernel<CpuCol2ImKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuConcatenateBatchKernel.h b/src/cpu/kernels/CpuConcatenateBatchKernel.h
index 91f2808f81..2b5946571b 100644
--- a/src/cpu/kernels/CpuConcatenateBatchKernel.h
+++ b/src/cpu/kernels/CpuConcatenateBatchKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@ namespace kernels
 /** Interface for the batch concatenate kernel.
  *  The input tensor will be concatenated into the output tensor.
  */
-class CpuConcatenateBatchKernel : public ICpuKernel
+class CpuConcatenateBatchKernel : public NewICpuKernel<CpuConcatenateBatchKernel>
 {
 public:
     CpuConcatenateBatchKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateDepthKernel.h b/src/cpu/kernels/CpuConcatenateDepthKernel.h
index 063118b33b..90b68d3a06 100644
--- a/src/cpu/kernels/CpuConcatenateDepthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateDepthKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@ namespace kernels
 /** Interface for the depth concatenate kernel.
  *  The input tensor will be concatenated into the output tensor.
  */
-class CpuConcatenateDepthKernel : public ICpuKernel
+class CpuConcatenateDepthKernel : public NewICpuKernel<CpuConcatenateDepthKernel>
 {
 public:
     CpuConcatenateDepthKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateHeightKernel.h b/src/cpu/kernels/CpuConcatenateHeightKernel.h
index 883c59a206..8ace9809cc 100644
--- a/src/cpu/kernels/CpuConcatenateHeightKernel.h
+++ b/src/cpu/kernels/CpuConcatenateHeightKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@ namespace kernels
 /** Interface for the height concatenate kernel.
  *  The source tensor will be concatenated into the destination tensor.
  */
-class CpuConcatenateHeightKernel : public ICpuKernel
+class CpuConcatenateHeightKernel : public NewICpuKernel<CpuConcatenateHeightKernel>
 {
 public:
     CpuConcatenateHeightKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateWidthKernel.h b/src/cpu/kernels/CpuConcatenateWidthKernel.h
index 3b4612ab0d..d5f2ef24d6 100644
--- a/src/cpu/kernels/CpuConcatenateWidthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateWidthKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@ namespace kernels
 /** Interface for the width concatenate kernel.
  *  The source tensor will be concatenated into the destination tensor.
  */
-class CpuConcatenateWidthKernel : public ICPPKernel
+class CpuConcatenateWidthKernel : public NewICpuKernel<CpuConcatenateWidthKernel>
 {
 public:
     CpuConcatenateWidthKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index 70f0a742f9..001a6fcab0 100644
--- a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,7 +41,7 @@ namespace kernels
  *
  * @note This function assumes the weights are already reshaped (transposed)
  */
-class CpuConvertFullyConnectedWeightsKernel : public ICpuKernel
+class CpuConvertFullyConnectedWeightsKernel : public NewICpuKernel<CpuConvertFullyConnectedWeightsKernel>
 {
 public:
     CpuConvertFullyConnectedWeightsKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
index 8cce1eaf1d..9d5ee39126 100644
--- a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
+++ b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class CpuConvertQuantizedSignednessKernel : public ICpuKernel
+class CpuConvertQuantizedSignednessKernel : public NewICpuKernel<CpuConvertQuantizedSignednessKernel>
 {
 public:
     CpuConvertQuantizedSignednessKernel() = default;
diff --git a/src/cpu/kernels/CpuCopyKernel.h b/src/cpu/kernels/CpuCopyKernel.h
index 193f38078b..ee4adeb4eb 100644
--- a/src/cpu/kernels/CpuCopyKernel.h
+++ b/src/cpu/kernels/CpuCopyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Kernel to perform a copy between two tensors */
-class CpuCopyKernel : public ICpuKernel
+class CpuCopyKernel : public NewICpuKernel<CpuCopyKernel>
 {
 public:
     CpuCopyKernel() = default;
diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
index 1afb6bed4c..eae682bb6d 100644
--- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
+++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class CpuDepthwiseConv2dNativeKernel : public ICpuKernel
+class CpuDepthwiseConv2dNativeKernel : public NewICpuKernel<CpuDepthwiseConv2dNativeKernel>
 {
 public:
     CpuDepthwiseConv2dNativeKernel() = default;
diff --git a/src/cpu/kernels/CpuDequantizeKernel.h b/src/cpu/kernels/CpuDequantizeKernel.h
index f515cd36f9..834c039a76 100644
--- a/src/cpu/kernels/CpuDequantizeKernel.h
+++ b/src/cpu/kernels/CpuDequantizeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the dequantization layer kernel. */
-class CpuDequantizeKernel : public ICpuKernel
+class CpuDequantizeKernel : public NewICpuKernel<CpuDequantizeKernel>
 {
 public:
     CpuDequantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dKernel.h b/src/cpu/kernels/CpuDirectConv2dKernel.h
index 3ba7f7ed5f..1f5568743e 100644
--- a/src/cpu/kernels/CpuDirectConv2dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform Direct Convolution Layer. */
-class CpuDirectConv2dKernel : public ICpuKernel
+class CpuDirectConv2dKernel : public NewICpuKernel<CpuDirectConv2dKernel>
 {
 public:
     CpuDirectConv2dKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
index a68936bbae..95011f79aa 100644
--- a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@ namespace kernels
  * @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part
  *       of the @ref DirectConvolutionLayerOutputStageKernelInfo.
  */
-class CpuDirectConv2dOutputStageKernel : public ICpuKernel
+class CpuDirectConv2dOutputStageKernel : public NewICpuKernel<CpuDirectConv2dOutputStageKernel>
 {
 public:
     CpuDirectConv2dOutputStageKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.cpp b/src/cpu/kernels/CpuDirectConv3dKernel.cpp
index 36764a625d..22c60cd994 100644
--- a/src/cpu/kernels/CpuDirectConv3dKernel.cpp
+++ b/src/cpu/kernels/CpuDirectConv3dKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,64 +49,32 @@ namespace kernels
 {
 namespace
 {
-struct DirectConv3dSelectorData
-{
-    DataType       dt;
-    const CPUInfo &ci;
-};
-using DirectConv3dSelectorPtr = std::add_pointer<bool(const DirectConv3dSelectorData &data)>::type;
-using DirectConv3dKernelPtr   = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
-struct DirectConv3dKernel
-{
-    const char                   *name;
-    const DirectConv3dSelectorPtr is_selected;
-    DirectConv3dKernelPtr         ukernel;
-};
-
-static const DirectConv3dKernel available_kernels[] =
+static const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> available_kernels =
 {
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_directconv3d",
-        [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float16_t>)
     },
 #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_fp32_directconv3d",
-        [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float>)
     },
     {
         "neon_qasymm8_directconv3d",
-        [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<uint8_t>)
     },
     {
         "neon_qasymm8_signed_directconv3d",
-        [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<int8_t>)
     }
 };
 
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const DirectConv3dKernel *get_implementation(const DirectConv3dSelectorData &data)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected(data))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
-
 Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const Conv3dInfo &conv_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
@@ -117,7 +85,8 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, src1);
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation != Size3D(1U, 1U, 1U));
 
-    const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() });
+    const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     const DataLayout data_layout = src0->data_layout();
@@ -161,7 +130,8 @@ void CpuDirectConv3dKernel::configure(const ITensorInfo *src0, const ITensorInfo
     ARM_COMPUTE_UNUSED(src2);
     ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
 
-    const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() });
+    const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _conv_info  = conv_info;
@@ -210,6 +180,12 @@ const char *CpuDirectConv3dKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> &CpuDirectConv3dKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.h b/src/cpu/kernels/CpuDirectConv3dKernel.h
index ff3b30f8ae..6ae70bd3b7 100644
--- a/src/cpu/kernels/CpuDirectConv3dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv3dKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/FunctionDescriptors.h"
 #include "src/core/common/Macros.h"
 #include "src/cpu/ICpuKernel.h"
+
 namespace arm_compute
 {
 namespace cpu
@@ -34,8 +35,12 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform 3D Direct Convolution Layer. */
-class CpuDirectConv3dKernel : public ICpuKernel
+class CpuDirectConv3dKernel : public NewICpuKernel<CpuDirectConv3dKernel>
 {
+private:
+    /* Template function for convolution 3d NDHWC */
+    using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
+
 public:
     CpuDirectConv3dKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv3dKernel);
@@ -71,14 +76,21 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-private:
-    /* Template function for convolution 3d NDHWC */
-    using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
+    struct DirectConv3dKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        DirectConv3dKernelPtr        ukernel;
+    };
+
+    static const std::vector<DirectConv3dKernel> &get_available_kernels();
 
+private:
     Conv3dInfo            _conv_info{};
     DirectConv3dKernelPtr _run_method{ nullptr };
     std::string           _name{};
 };
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuElementwiseKernel.h b/src/cpu/kernels/CpuElementwiseKernel.h
index f323fe4470..bb081cbec1 100644
--- a/src/cpu/kernels/CpuElementwiseKernel.h
+++ b/src/cpu/kernels/CpuElementwiseKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,7 @@ namespace kernels
  * @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f]
  *
  */
-class CpuElementwiseKernel : public ICpuKernel
+class CpuElementwiseKernel : public NewICpuKernel<CpuElementwiseKernel>
 {
 public:
     CpuElementwiseKernel() = default;
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 3573fa0815..61bc64b235 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,77 +43,58 @@ namespace kernels
 {
 namespace
 {
-struct ElementwiseUnarySelectorData
-{
-    DataType       dt;
-    const CPUInfo &ci;
-};
-using ElementwiseUnarySelector = std::add_pointer<bool(const ElementwiseUnarySelectorData &)>::type;
-
-struct ElementwiseUnaryKernel
-{
-    const char                                           *name;
-    const ElementwiseUnarySelector                        is_selected;
-    CpuElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
-};
-
-static const ElementwiseUnaryKernel available_kernels[] =
+static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels =
 {
 #if defined(ARM_COMPUTE_ENABLE_SVE)
     {
         "sve_fp32_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
-        REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>),
+        [](const DataTypeISASelectorData & data)
+        {
+            return data.dt == DataType::F32 && data.isa.sve;
+        },
+        REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>)
     },
     {
         "sve_fp16_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data)
+        {
+            return (data.dt == DataType::F16) && data.isa.sve;
+        },
         REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<__fp16>),
     },
     {
         "sve_s32_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32 && data.isa.sve; },
         REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>),
     },
 #endif // defined(ARM_COMPUTE_ENABLE_SVE)
 #if defined(ARM_COMPUTE_ENABLE_NEON)
     {
         "neon_fp32_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<float>),
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<__fp16>),
     },
 #endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_s32_elementwise_unary",
-        [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32; },
         REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>),
     },
 #endif // defined(ARM_COMPUTE_ENABLE_NEON)
 };
 
-const ElementwiseUnaryKernel *get_implementation(DataType dt)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected({ dt, CPUInfo::get() }))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
 } // namespace
 
 void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
-    const auto uk = get_implementation(src.data_type());
+    const auto uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     _op         = op;
@@ -128,14 +109,15 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo
 
     auto shape_and_window = compute_output_shape_and_window(src.tensor_shape());
     auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
-    ICpuKernel::configure(shape_and_window.second);
+    NewICpuKernel::configure(shape_and_window.second);
 }
 
 Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
 
-    const auto *uk = get_implementation(src.data_type());
+    const auto *uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     switch(op)
@@ -177,6 +159,12 @@ const char *CpuElementwiseUnaryKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> &CpuElementwiseUnaryKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
index f72eddf737..c520b89618 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,8 +39,11 @@ namespace kernels
  * Element-wise operation is computed by:
  * @f[ dst(x) = OP(src(x))@f]
  */
-class CpuElementwiseUnaryKernel : public ICpuKernel
+class CpuElementwiseUnaryKernel : public NewICpuKernel<CpuElementwiseUnaryKernel>
 {
+private:
+    using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
+
 public:
     CpuElementwiseUnaryKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseUnaryKernel);
@@ -64,11 +67,14 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-    /** Common signature for all the specialised elementwise unary micro-kernels
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
+    struct ElementwiseUnaryKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        ElementwiseUnaryUkernelPtr   ukernel;
+    };
+
+    static const std::vector<ElementwiseUnaryKernel> &get_available_kernels();
 
 private:
     ElementWiseUnary           _op{};
diff --git a/src/cpu/kernels/CpuFillKernel.h b/src/cpu/kernels/CpuFillKernel.h
index 3bc6a40309..5262ecc5c6 100644
--- a/src/cpu/kernels/CpuFillKernel.h
+++ b/src/cpu/kernels/CpuFillKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,7 +35,7 @@ namespace cpu
 namespace kernels
 {
 /** Kernel for filling a tensor with a given constant value */
-class CpuFillKernel : public ICpuKernel
+class CpuFillKernel : public NewICpuKernel<CpuFillKernel>
 {
 public:
     CpuFillKernel() = default;
diff --git a/src/cpu/kernels/CpuFloorKernel.cpp b/src/cpu/kernels/CpuFloorKernel.cpp
index bcac1a41fc..65e390a81a 100644
--- a/src/cpu/kernels/CpuFloorKernel.cpp
+++ b/src/cpu/kernels/CpuFloorKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,58 +42,25 @@ namespace kernels
 {
 namespace
 {
-struct FloorSelectorData
-{
-    DataType dt;
-};
-
-using FloorSelectorPtr = std::add_pointer<bool(const FloorSelectorData &data)>::type;
-using FloorUKernelPtr  = std::add_pointer<void(const void *, void *, int)>::type;
-
-struct FloorUKernel
-{
-    const char            *name;
-    const FloorSelectorPtr is_selected;
-    FloorUKernelPtr        ukernel;
-};
-
-static const FloorUKernel available_kernels[] =
+static const std::vector<CpuFloorKernel::FloorKernel> available_kernels =
 {
     {
         "neon_fp16_floor",
-        [](const FloorSelectorData & data) { return data.dt == DataType::F16; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_floor)
     },
     {
         "neon_fp32_floor",
-        [](const FloorSelectorData & data) { return data.dt == DataType::F32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_floor)
-    },
-};
-
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const FloorUKernel *get_implementation(const FloorSelectorData &data)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected(data))
-        {
-            return &uk;
-        }
     }
-    return nullptr;
-}
+};
 
 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
 
-    const auto *uk = get_implementation(FloorSelectorData{ src->data_type() });
+    const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     // Validate in case of configured output
@@ -114,7 +81,7 @@ void CpuFloorKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
 
     auto_init_if_empty(*dst, src->tensor_shape(), 1, src->data_type());
 
-    const auto *uk = get_implementation(FloorSelectorData{ src->data_type() });
+    const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _run_method = uk->ukernel;
@@ -172,6 +139,12 @@ const char *CpuFloorKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuFloorKernel::FloorKernel> &CpuFloorKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuFloorKernel.h b/src/cpu/kernels/CpuFloorKernel.h
index ffb9658190..2b102a0515 100644
--- a/src/cpu/kernels/CpuFloorKernel.h
+++ b/src/cpu/kernels/CpuFloorKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,8 +34,11 @@ namespace cpu
 namespace kernels
 {
 /** Cpu accelarated kernel to perform a floor operation */
-class CpuFloorKernel : public ICpuKernel
+class CpuFloorKernel : public NewICpuKernel<CpuFloorKernel>
 {
+private:
+    using FloorKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
+
 public:
     CpuFloorKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuFloorKernel);
@@ -65,12 +68,18 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-private:
-    using FloorUKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
+    struct FloorKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        FloorKernelPtr               ukernel;
+    };
+
+    static const std::vector<FloorKernel> &get_available_kernels();
 
 private:
-    FloorUKernelPtr _run_method{ nullptr };
-    std::string     _name{};
+    FloorKernelPtr _run_method{ nullptr };
+    std::string    _name{};
 };
 } // namespace kernels
 } // namespace cpu
diff --git a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
index 047776bd1e..13b46142c4 100644
--- a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
+++ b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,7 +52,7 @@ namespace kernels
  *
  * After this operation, the dst matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
  */
-class CpuGemmInterleave4x4Kernel : public ICpuKernel
+class CpuGemmInterleave4x4Kernel : public NewICpuKernel<CpuGemmInterleave4x4Kernel>
 {
 public:
     CpuGemmInterleave4x4Kernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
index 083ee187ef..6d06f12e54 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ namespace kernels
  *  -# Compute the int32 matrix product of the resulting a * b and store the result as int32
  *
  */
-class CpuGemmLowpMatrixMultiplyKernel : public ICpuKernel
+class CpuGemmLowpMatrixMultiplyKernel : public NewICpuKernel<CpuGemmLowpMatrixMultiplyKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
index 918f8c89d9..6cced66b47 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@ namespace kernels
  * @note This stage is needed to handle the offset of matrix product
  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
  */
-class CpuGemmLowpMatrixAReductionKernel : public ICpuKernel
+class CpuGemmLowpMatrixAReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixAReductionKernel>
 {
 public:
     /** Default constructor */
@@ -98,7 +98,7 @@ private:
  * @note This stage is needed to handle the offset of matrix product
  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
  */
-class CpuGemmLowpMatrixBReductionKernel : public ICpuKernel
+class CpuGemmLowpMatrixBReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixBReductionKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
index 1ec969be92..1d70c0619e 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ namespace kernels
  *                   (a_offset * b_offset * k)
  *
  */
-class CpuGemmLowpOffsetContributionKernel : public ICpuKernel
+class CpuGemmLowpOffsetContributionKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
index d97727dd09..13c64f4631 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -63,7 +63,7 @@ namespace kernels
  *                        (a_offset * b_offset * k)
  */
 
-class CpuGemmLowpOffsetContributionOutputStageKernel : public ICpuKernel
+class CpuGemmLowpOffsetContributionOutputStageKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionOutputStageKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
index ae13e760ff..f6e8c816f3 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,7 +51,7 @@ namespace kernels
  *  -#  -to the [-128..127] range and cast to QASYMM8_SIGNED.
  *
  */
-class CpuGemmLowpQuantizeDownInt32ScaleKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ScaleKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ScaleKernel>
 {
 public:
     CpuGemmLowpQuantizeDownInt32ScaleKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 53a9d34ed1..a9e2560657 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,7 +48,7 @@ namespace kernels
  *  -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
  *
  */
-class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>
 {
 public:
     CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index 67829e7773..bfac8681a5 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@ namespace kernels
  *  -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
  *
  */
-class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>
 {
 public:
     CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index b62cac4818..5e5683cfc3 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@ namespace kernels
  *  -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
  *
  */
-class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>
 {
 public:
     CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
index c9798fc24c..64338259e9 100644
--- a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,7 +41,7 @@ namespace kernels
  *        - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref CpuGemmMatrixMultiplyKernel
  *        - MTX_1 = C
  */
-class CpuGemmMatrixAdditionKernel : public ICpuKernel
+class CpuGemmMatrixAdditionKernel : public NewICpuKernel<CpuGemmMatrixAdditionKernel>
 {
 public:
     CpuGemmMatrixAdditionKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
index 0b4e01579c..757b46e9a7 100644
--- a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,7 @@ namespace kernels
  * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p lhs is a vector and the second input tensor @p rhs a matrix. The implementation also assumes that both tensors have not been reshaped
  *
  */
-class CpuGemmMatrixMultiplyKernel : public ICpuKernel
+class CpuGemmMatrixMultiplyKernel : public NewICpuKernel<CpuGemmMatrixMultiplyKernel>
 {
 public:
     CpuGemmMatrixMultiplyKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
index de920b5ed7..2acda35947 100644
--- a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
+++ b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,7 +68,7 @@ namespace kernels
  * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
  *
  */
-class CpuGemmTranspose1xWKernel : public ICpuKernel
+class CpuGemmTranspose1xWKernel : public NewICpuKernel<CpuGemmTranspose1xWKernel>
 {
 public:
     CpuGemmTranspose1xWKernel() = default;
diff --git a/src/cpu/kernels/CpuIm2ColKernel.h b/src/cpu/kernels/CpuIm2ColKernel.h
index 797d54c95c..d789adef95 100644
--- a/src/cpu/kernels/CpuIm2ColKernel.h
+++ b/src/cpu/kernels/CpuIm2ColKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,7 +58,7 @@ namespace kernels
  * \end{array} \right)
  * @f]
  */
-class CpuIm2ColKernel : public ICpuKernel
+class CpuIm2ColKernel : public NewICpuKernel<CpuIm2ColKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuKernelSelectionTypes.h b/src/cpu/kernels/CpuKernelSelectionTypes.h
new file mode 100644
index 0000000000..779fb86e6d
--- /dev/null
+++ b/src/cpu/kernels/CpuKernelSelectionTypes.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
+#define ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
+
+#include "arm_compute/core/Types.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+// Selector data types
+struct DataTypeISASelectorData
+{
+    DataType                   dt;
+    const cpuinfo::CpuIsaInfo &isa;
+};
+
+struct PoolDataTypeISASelectorData
+{
+    DataType                   dt;
+    DataLayout                 dl;
+    int                        pool_stride_x;
+    Size2D                     pool_size;
+    const cpuinfo::CpuIsaInfo &isa;
+};
+
+// Selector pointer types
+using DataTypeISASelectorPtr     = std::add_pointer<bool(const DataTypeISASelectorData &data)>::type;
+using PoolDataTypeISASelectorPtr = std::add_pointer<bool(const PoolDataTypeISASelectorData &data)>::type;
+
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
+\ No newline at end of file
diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h
index b65ec20044..3ab198510f 100644
--- a/src/cpu/kernels/CpuMulKernel.h
+++ b/src/cpu/kernels/CpuMulKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform multiplication between two tensors */
-class CpuMulKernel : public ICpuKernel
+class CpuMulKernel : public NewICpuKernel<CpuMulKernel>
 {
 public:
     CpuMulKernel() = default;
@@ -118,7 +118,7 @@ private:
 };
 
 /** Interface for the complex pixelwise multiplication kernel. */
-class CpuComplexMulKernel : public ICpuKernel
+class CpuComplexMulKernel : public NewICpuKernel<CpuComplexMulKernel>
 {
 public:
     CpuComplexMulKernel() = default;
diff --git a/src/cpu/kernels/CpuPermuteKernel.h b/src/cpu/kernels/CpuPermuteKernel.h
index 1b2672b5b9..aae28582b1 100644
--- a/src/cpu/kernels/CpuPermuteKernel.h
+++ b/src/cpu/kernels/CpuPermuteKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Kernel to perform tensor permutation given a permutation vector */
-class CpuPermuteKernel : public ICpuKernel
+class CpuPermuteKernel : public NewICpuKernel<CpuPermuteKernel>
 {
 public:
     CpuPermuteKernel() = default;
diff --git a/src/cpu/kernels/CpuPool2dKernel.cpp b/src/cpu/kernels/CpuPool2dKernel.cpp
index f61cd0835d..953a9ffb67 100644
--- a/src/cpu/kernels/CpuPool2dKernel.cpp
+++ b/src/cpu/kernels/CpuPool2dKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,136 +52,101 @@ namespace
 {
 using namespace misc::shape_calculator;
 
-struct PoolingSelectorData
-{
-    DataType   dt;
-    DataLayout dl;
-    int        pool_stride_x;
-    Size2D     pool_size;
-};
-
-using PoolingSelectorPtr = std::add_pointer<bool(const PoolingSelectorData &data)>::type;
-using PoolingKernelPtr   = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
-struct PoolingKernel
-{
-    const char              *name;
-    const PoolingSelectorPtr is_selected;
-    PoolingKernelPtr         ukernel;
-};
-
-static const PoolingKernel available_kernels[] =
+static const std::vector<CpuPool2dKernel::PoolingKernel> available_kernels =
 {
     {
         "neon_qu8_nhwc_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_qasymm8_neon_nhwc)
     },
     {
         "neon_qs8_nhwc_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_qasymm8_signed_neon_nhwc)
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_f16_nhwc_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); },
         REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nhwc)
     },
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_fp32_nhwc_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); },
         REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nhwc)
     },
 #if defined(ENABLE_NCHW_KERNELS)
     {
         "neon_qu8_nchw_pool2",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<uint8_t>)
     },
     {
         "neon_qu8_nchw_pool3",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<uint8_t>)
     },
     {
         "neon_qu8_nchw_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<uint8_t>)
     },
     {
         "neon_qs8_nchw_pool2",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<int8_t>)
     },
     {
         "neon_qs8_nchw_pool3",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<int8_t>)
     },
     {
         "neon_qs8_nchw_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<int8_t>)
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_nchw_pool2",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
         REGISTER_FP16_NEON(arm_compute::cpu::pooling2_fp16_neon_nchw)
     },
     {
         "neon_fp16_nchw_pool3",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
         REGISTER_FP16_NEON(arm_compute::cpu::pooling3_fp16_neon_nchw)
     },
     {
         "neon_fp16_nchw_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16)); },
         REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nchw)
     },
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_fp32_nchw_pool2",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
         REGISTER_FP32_NEON(arm_compute::cpu::pooling2_fp32_neon_nchw)
     },
     {
         "neon_fp32_nchw_pool3",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
         REGISTER_FP32_NEON(arm_compute::cpu::pooling3_fp32_neon_nchw)
     },
     {
         "neon_fp32_nchw_pool7",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
         REGISTER_FP32_NEON(arm_compute::cpu::pooling7_fp32_neon_nchw)
     },
     {
         "neon_fp32_nchw_poolMxN",
-        [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); },
+        [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); },
         REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nchw)
     },
 #endif /* defined(ENABLE_NCHW_KERNELS) */
 };
 
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const PoolingKernel *get_implementation(DataType dt, DataLayout dl, int pool_stride_x, Size2D pool_size)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected({ dt, dl, pool_stride_x, pool_size }))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
-
 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info,
                           const ITensorInfo *indices, Size2D pool_size)
 {
@@ -235,7 +200,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
         }
     }
 
-    const auto *uk = get_implementation(src->data_type(), src->data_layout(), pool_stride_x, pool_size);
+    const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), pool_stride_x, pool_size, CPUInfo::get().get_isa() });
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     return Status{};
@@ -335,7 +300,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, indices, pool_size));
 
-    const auto *uk = get_implementation(src->data_type(), src->data_layout(), pad_stride_info.stride().first, pool_size);
+    const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), (int)pad_stride_info.stride().first, pool_size, CPUInfo::get().get_isa() });
     ARM_COMPUTE_ERROR_ON(uk == nullptr);
 
     // Set instance variables
@@ -350,7 +315,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
     {
         // Configure kernel window
         Window win = calculate_max_window(*dst, Steps());
-        ICpuKernel::configure(win);
+        NewICpuKernel::configure(win);
     }
     else
     {
@@ -358,7 +323,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
         auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration,
                                                         pool_size.x(), pool_size.y());
         ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-        ICpuKernel::configure(win_config.second);
+        NewICpuKernel::configure(win_config.second);
     }
 }
 
@@ -391,7 +356,7 @@ void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const T
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
 
     const ITensor *src     = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -447,6 +412,12 @@ const char *CpuPool2dKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuPool2dKernel::PoolingKernel> &CpuPool2dKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuPool2dKernel.h b/src/cpu/kernels/CpuPool2dKernel.h
index aedeb7fbe9..7fd3247d6d 100644
--- a/src/cpu/kernels/CpuPool2dKernel.h
+++ b/src/cpu/kernels/CpuPool2dKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,11 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the pooling layer kernel */
-class CpuPool2dKernel : public ICpuKernel
+class CpuPool2dKernel : public NewICpuKernel<CpuPool2dKernel>
 {
+private:
+    using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
+
 public:
     CpuPool2dKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel);
@@ -62,8 +65,14 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-private:
-    using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
+    struct PoolingKernel
+    {
+        const char                      *name;
+        const PoolDataTypeISASelectorPtr is_selected;
+        PoolingKernelPtr                 ukernel;
+    };
+
+    static const std::vector<PoolingKernel> &get_available_kernels();
 
 private:
     PoolingLayerInfo _pool_info{};
diff --git a/src/cpu/kernels/CpuQuantizeKernel.h b/src/cpu/kernels/CpuQuantizeKernel.h
index eb0814926d..709e1c89c7 100644
--- a/src/cpu/kernels/CpuQuantizeKernel.h
+++ b/src/cpu/kernels/CpuQuantizeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@ namespace kernels
  *
  * @note The implementation supports only 3D input tensors
  */
-class CpuQuantizeKernel : public ICpuKernel
+class CpuQuantizeKernel : public NewICpuKernel<CpuQuantizeKernel>
 {
 public:
     CpuQuantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuReshapeKernel.h b/src/cpu/kernels/CpuReshapeKernel.h
index d4e2b44b54..6a5c528ecd 100644
--- a/src/cpu/kernels/CpuReshapeKernel.h
+++ b/src/cpu/kernels/CpuReshapeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform tensor reshaping */
-class CpuReshapeKernel : public ICpuKernel
+class CpuReshapeKernel : public NewICpuKernel<CpuReshapeKernel>
 {
 public:
     CpuReshapeKernel() = default;
diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp
index 3063d8f682..60564a97dd 100644
--- a/src/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/cpu/kernels/CpuScaleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,52 +48,37 @@ namespace kernels
 {
 namespace
 {
-struct ScaleSelectorData
-{
-    DataType       dt;
-    const CPUInfo &ci;
-};
-using ScaleSelectorPtr = std::add_pointer<bool(const ScaleSelectorData &data)>::type;
-using ScaleKernelPtr   = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
-                                               InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
-struct ScaleKernel
-{
-    const char            *name;
-    const ScaleSelectorPtr is_selected;
-    ScaleKernelPtr         ukernel;
-};
-
-static const ScaleKernel available_kernels[] =
+static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels =
 {
 #if defined(ARM_COMPUTE_ENABLE_SVE)
     {
         "sve_fp16_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; },
         REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale)
     },
     {
         "sve_fp32_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
         REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale)
     },
     {
         "sve_qu8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
         REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale)
     },
     {
         "sve_qs8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
         REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale)
     },
     {
         "sve_u8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
         REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale)
     },
     {
         "sve_s16_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
         REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale)
     },
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
@@ -101,60 +86,43 @@ static const ScaleKernel available_kernels[] =
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>)
     },
 #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_fp32_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::F32; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::common_neon_scale<float>)
     },
     {
         "neon_qu8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::qasymm8_neon_scale)
     },
     {
         "neon_qs8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::qasymm8_signed_neon_scale)
     },
     {
         "neon_u8_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::U8; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8; },
         REGISTER_INTEGER_NEON(arm_compute::cpu::u8_neon_scale)
     },
     {
         "neon_s16_scale",
-        [](const ScaleSelectorData & data) { return data.dt == DataType::S16; },
+        [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16; },
         REGISTER_INTEGER_NEON(arm_compute::cpu::s16_neon_scale)
     },
 #endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
 };
 
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const ScaleKernel *get_implementation(const ScaleSelectorData &data)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected(data))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
-
 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy,
                           const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
 {
-    const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() });
+    const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
@@ -212,7 +180,7 @@ void CpuScaleKernel::configure(const ITensorInfo *src, const ITensorInfo *dx, co
                                                   dst,
                                                   info));
 
-    const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() });
+    const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _run_method = uk->ukernel;
@@ -618,6 +586,12 @@ const char *CpuScaleKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuScaleKernel::ScaleKernel> &CpuScaleKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuScaleKernel.h b/src/cpu/kernels/CpuScaleKernel.h
index 913b5a5593..94bbdb72a0 100644
--- a/src/cpu/kernels/CpuScaleKernel.h
+++ b/src/cpu/kernels/CpuScaleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,14 @@ namespace cpu
 namespace kernels
 {
 /** Arm(R) Neon(TM) kernel to perform scaling on a tensor */
-class CpuScaleKernel : public ICpuKernel
+class CpuScaleKernel : public NewICpuKernel<CpuScaleKernel>
 {
+private:
+    /** Scale function to use for the particular function to use */
+    using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window);
+    using ScaleKernelPtr   = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
+                                                   InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
+
 public:
     CpuScaleKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuScaleKernel);
@@ -67,6 +73,15 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
+    struct ScaleKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        ScaleKernelPtr               ukernel;
+    };
+
+    static const std::vector<ScaleKernel> &get_available_kernels();
+
 private:
 #ifdef ENABLE_NCHW_KERNELS
     /** function to perform scale using area interpolation on the given window
@@ -87,11 +102,6 @@ private:
     void scale_nearest_nchw(const ITensor *src, ITensor *dst, const ITensor *dx, const ITensor *dy, const ITensor *offsets, const Window &window);
 #endif // ENABLE_NCHW_KERNELS
 
-    /** Scale function to use for the particular function to use */
-    using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window);
-    using ScaleKernelPtr   = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
-                                                   InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
-
     ScaleFunctionPtr    _func{ nullptr };
     InterpolationPolicy _policy{};
     BorderMode          _border_mode{};
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.h b/src/cpu/kernels/CpuSoftmaxKernel.h
index 8073a677d9..f317662620 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.h
+++ b/src/cpu/kernels/CpuSoftmaxKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the identifying the max value of 1D Logits */
-class CpuLogits1DMaxKernel : public ICpuKernel
+class CpuLogits1DMaxKernel : public NewICpuKernel<CpuLogits1DMaxKernel>
 {
 public:
     CpuLogits1DMaxKernel() = default;
@@ -67,7 +67,7 @@ private:
 
 /** Interface for softmax computation for QASYMM8 with pre-computed max. */
 template <bool IS_LOG = false>
-class CpuLogits1DSoftmaxKernel : public ICpuKernel
+class CpuLogits1DSoftmaxKernel : public NewICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>>
 {
 public:
     CpuLogits1DSoftmaxKernel() = default;
diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp
index ec65f12dfc..c12feb4331 100644
--- a/src/cpu/kernels/CpuSubKernel.cpp
+++ b/src/cpu/kernels/CpuSubKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,85 +39,52 @@ namespace kernels
 {
 namespace
 {
-struct SubSelectorData
-{
-    DataType dt;
-};
-
-using SubSelectorPtr = std::add_pointer<bool(const SubSelectorData &data)>::type;
-using SubKernelPtr   = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
-
-struct SubKernel
-{
-    const char          *name;
-    const SubSelectorPtr is_selected;
-    SubKernelPtr         ukernel;
-};
-
-static const SubKernel available_kernels[] =
+static const std::vector<CpuSubKernel::SubKernel> available_kernels =
 {
     {
         "neon_fp32_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::F32); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
         REGISTER_FP32_NEON(arm_compute::cpu::sub_same_neon<float>)
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
         "neon_fp16_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::F16); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16) && data.isa.fp16; },
         REGISTER_FP16_NEON(arm_compute::cpu::sub_same_neon<float16_t>)
     },
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_u8_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::U8); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<uint8_t>)
     },
     {
         "neon_s16_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::S16); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int16_t>)
     },
     {
         "neon_s32_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::S32); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); },
         REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int32_t>)
     },
     {
         "neon_qu8_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
         REGISTER_QASYMM8_NEON(arm_compute::cpu::sub_qasymm8_neon)
     },
     {
         "neon_qs8_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::sub_qasymm8_signed_neon)
     },
     {
         "neon_qs16_sub",
-        [](const SubSelectorData & data) { return (data.dt == DataType::QSYMM16); },
+        [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); },
         REGISTER_QSYMM16_NEON(arm_compute::cpu::sub_qsymm16_neon)
     },
 };
 
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const SubKernel *get_implementation(DataType dt)
-{
-    for(const auto &uk : available_kernels)
-    {
-        if(uk.is_selected({ dt }))
-        {
-            return &uk;
-        }
-    }
-    return nullptr;
-}
-
 inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
 {
     ARM_COMPUTE_UNUSED(policy);
@@ -126,7 +93,8 @@ inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src
                                                          DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1);
 
-    const auto *uk = get_implementation(src0.data_type());
+    const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() });
+
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
@@ -157,7 +125,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
     set_shape_if_empty(*dst, out_shape);
     set_data_type_if_unknown(*dst, src0->data_type());
 
-    const auto *uk = get_implementation(src0->data_type());
+    const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
 
     _policy     = policy;
@@ -196,6 +164,12 @@ const char *CpuSubKernel::name() const
 {
     return _name.c_str();
 }
+
+const std::vector<CpuSubKernel::SubKernel> &CpuSubKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 80d6be68b5..70f34b1b57 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,8 +34,11 @@ namespace cpu
 namespace kernels
 {
 /** Interface for the kernel to perform subtraction between two tensors */
-class CpuSubKernel : public ICpuKernel
+class CpuSubKernel : public NewICpuKernel<CpuSubKernel>
 {
+private:
+    using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+
 public:
     CpuSubKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuSubKernel);
@@ -70,8 +73,14 @@ public:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
     const char *name() const override;
 
-private:
-    using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+    struct SubKernel
+    {
+        const char                  *name;
+        const DataTypeISASelectorPtr is_selected;
+        SubKernelPtr                 ukernel;
+    };
+
+    static const std::vector<SubKernel> &get_available_kernels();
 
 private:
     ConvertPolicy _policy{};
diff --git a/src/cpu/kernels/CpuTransposeKernel.h b/src/cpu/kernels/CpuTransposeKernel.h
index 6805eac642..7e1ee5f73d 100644
--- a/src/cpu/kernels/CpuTransposeKernel.h
+++ b/src/cpu/kernels/CpuTransposeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@ namespace cpu
 namespace kernels
 {
 /** Kernel which transposes the elements of a matrix */
-class CpuTransposeKernel : public ICpuKernel
+class CpuTransposeKernel : public NewICpuKernel<CpuTransposeKernel>
 {
 public:
     CpuTransposeKernel() = default;
diff --git a/src/cpu/kernels/CpuWeightsReshapeKernel.h b/src/cpu/kernels/CpuWeightsReshapeKernel.h
index c80bf3b25e..6c2d7ef6f9 100644
--- a/src/cpu/kernels/CpuWeightsReshapeKernel.h
+++ b/src/cpu/kernels/CpuWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,7 +56,7 @@ namespace kernels
  * \end{array} \right)
  * @f]
  */
-class CpuWeightsReshapeKernel : public ICpuKernel
+class CpuWeightsReshapeKernel : public NewICpuKernel<CpuWeightsReshapeKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/CpuWinogradConv2dKernel.h b/src/cpu/kernels/CpuWinogradConv2dKernel.h
index db2d8acfdb..0c4e28c394 100644
--- a/src/cpu/kernels/CpuWinogradConv2dKernel.h
+++ b/src/cpu/kernels/CpuWinogradConv2dKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,7 +35,7 @@ namespace arm_compute
 namespace cpu
 {
 /** Interface for the kernel to perform Winograd input transform. */
-class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformInputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformInputKernel>
 {
 public:
     /** Get the working space required to perform the transformation.
@@ -216,7 +216,7 @@ private:
 };
 
 /** Interface for the kernel to perform Winograd output transform. */
-class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformOutputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformOutputKernel>
 {
 public:
     /** Get the working space required to perform the transformation.
@@ -418,7 +418,7 @@ private:
 };
 
 /** Interface for the kernel to perform Winograd weights transform. */
-class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformWeightsKernel : public NewICpuKernel<ICpuWinogradConv2dTransformWeightsKernel>
 {
 public:
     /** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
index 8980922945..902e9616d1 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "src/core/common/Macros.h"
 #include "src/cpu/ICpuKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
 
 namespace arm_conv
 {
@@ -44,7 +45,7 @@ namespace cpu
 namespace kernels
 {
 /** This class is a wrapper for the depthwise convolution assembly kernels.  */
-class CpuDepthwiseConv2dAssemblyWrapperKernel final : public ICpuKernel
+class CpuDepthwiseConv2dAssemblyWrapperKernel final : public NewICpuKernel<CpuDepthwiseConv2dAssemblyWrapperKernel>
 {
 public:
     /** Default constructor */
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
index 8625fd96b4..daa3168beb 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 #include "src/core/NEON/kernels/assembly/pooling.hpp"
 #include "src/core/common/Macros.h"
 #include "src/cpu/ICpuKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
 
 #include "pool_common.hpp"
 
@@ -45,7 +46,7 @@ namespace kernels
   * execute a single assembly kernel in the context of an NEFunction.
   *
   */
-class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel
+class CpuPool2dAssemblyWrapperKernel final : public NewICpuKernel<CpuPool2dAssemblyWrapperKernel>
 {
 public:
     /** Constructor
diff --git a/support/StringSupport.h b/support/StringSupport.h
index 5e237c7dff..e8b3ca7ab3 100644
--- a/support/StringSupport.h
+++ b/support/StringSupport.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -131,6 +131,12 @@ inline std::string to_string(T && value)
     return stream.str();
 }
 
+// Specialization for const std::string&
+inline std::string to_string(const std::string &value)
+{
+    return value;
+}
+
 /** Convert string values to float.
  *
  * @note This function implements the same behaviour as std::stof. The latter
@@ -164,6 +170,12 @@ inline std::string to_string(T &&value)
     return ::std::to_string(std::forward<T>(value));
 }
 
+// Specialization for const std::string&
+inline std::string to_string(const std::string &value)
+{
+    return value;
+}
+
 /** Convert string values to float.
  *
  * @note This function acts as a convenience wrapper around std::stof. The
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 69fe9053d8..8d70ca5415 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,8 @@
 #include "arm_compute/runtime/RuntimeContext.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuActivationKernel.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/ActivationFunctionsDataset.h"
@@ -279,6 +281,43 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
     bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+               combine(framework::dataset::make("CpuExt", std::string("NEON")),
+                       framework::dataset::make("DataType", { DataType::F32,
+                                                              DataType::F16,
+                                                              DataType::QASYMM8,
+                                                              DataType::QASYMM8_SIGNED,
+                                                              DataType::QSYMM16
+                                                            })),
+                combine(framework::dataset::make("CpuExt", std::string("SVE")),
+                        framework::dataset::make("DataType", { DataType::F32,
+                                                               DataType::F16,
+                                                             }))),
+                combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+                        framework::dataset::make("DataType", { DataType::QASYMM8,
+                                                               DataType::QASYMM8_SIGNED,
+                                                               DataType::QSYMM16
+                                                             }))),
+               cpu_ext, data_type)
+{
+    using namespace cpu::kernels;
+
+    cpuinfo::CpuIsaInfo cpu_isa{};
+    cpu_isa.neon = (cpu_ext == "NEON");
+    cpu_isa.sve  = (cpu_ext == "SVE");
+    cpu_isa.sve2 = (cpu_ext == "SVE2");
+    cpu_isa.fp16 = (data_type == DataType::F16);
+
+    const auto *selected_impl = CpuActivationKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+    ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+    std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation";
+    std::string actual   = selected_impl->name;
+
+    ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
 // clang-format on
 // *INDENT-ON*
 
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index f3e4dfc6e5..c72e082a74 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuAddKernel.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/ConvertPolicyDataset.h"
@@ -85,6 +87,49 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                               ConvertPolicy::WRAP);
     ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
 }
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+               combine(framework::dataset::make("CpuExt", std::string("NEON")),
+                       framework::dataset::make("DataType", { DataType::F32,
+                                                              DataType::F16,
+                                                              DataType::U8,
+                                                              DataType::S16,
+                                                              DataType::S32,
+                                                              DataType::QASYMM8,
+                                                              DataType::QASYMM8_SIGNED,
+                                                              DataType::QSYMM16
+                                                            })),
+                combine(framework::dataset::make("CpuExt", std::string("SVE")),
+                        framework::dataset::make("DataType", { DataType::F32,
+                                                               DataType::F16,
+                                                               DataType::U8,
+                                                               DataType::S16,
+                                                               DataType::S32
+                                                             }))),
+                combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+                        framework::dataset::make("DataType", { DataType::QASYMM8,
+                                                               DataType::QASYMM8_SIGNED,
+                                                               DataType::QSYMM16
+                                                             }))),
+               cpu_ext, data_type)
+{
+    using namespace cpu::kernels;
+
+    cpuinfo::CpuIsaInfo cpu_isa{};
+    cpu_isa.neon = (cpu_ext == "NEON");
+    cpu_isa.sve  = (cpu_ext == "SVE");
+    cpu_isa.sve2 = (cpu_ext == "SVE2");
+    cpu_isa.fp16 = (data_type == DataType::F16);
+
+    const auto *selected_impl = CpuAddKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+    ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+    std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add";
+    std::string actual   = selected_impl->name;
+
+    ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
 // clang-format on
 // *INDENT-ON*
 
diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp
index 419ce56e44..d3bd3e0b1c 100644
--- a/tests/validation/NEON/Floor.cpp
+++ b/tests/validation/NEON/Floor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 #include "arm_compute/runtime/NEON/functions/NEFloor.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuFloorKernel.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/ShapeDatasets.h"
@@ -62,6 +64,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
     const Status status = NEFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false));
     ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
 }
+
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+               combine(framework::dataset::make("CpuExt", std::string("NEON")),
+                       framework::dataset::make("DataType", { DataType::F32,
+                                                              DataType::F16,
+                                                            })),
+               cpu_ext, data_type)
+{
+    using namespace cpu::kernels;
+
+    cpuinfo::CpuIsaInfo cpu_isa{};
+    cpu_isa.neon = (cpu_ext == "NEON");
+    cpu_isa.fp16 = (data_type == DataType::F16);
+
+    const auto *selected_impl = CpuFloorKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+    ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+    std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_floor";
+    std::string actual   = selected_impl->name;
+
+    ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
 // clang-format on
 // *INDENT-ON*
author	Giorgio Arena <giorgio.arena@arm.com>	2021-11-18 18:02:13 +0000
committer	Yair Schwarzbaum <yair.schwarzbaum@arm.com>	2022-01-12 06:52:04 +0000
commit	5ae8d804d67f57fbfa793800ddcc21a5aff954dd (patch)
tree	1defbe7f788645f6f0fb4c3f79be6c4b8ecfb709
parent	3475ffe40b7db99c782cbaf351aa7b4e341562ef (diff)
download	ComputeLibrary-5ae8d804d67f57fbfa793800ddcc21a5aff954dd.tar.gz