aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-11-18 18:02:13 +0000
committerYair Schwarzbaum <yair.schwarzbaum@arm.com>2022-01-12 06:52:04 +0000
commit5ae8d804d67f57fbfa793800ddcc21a5aff954dd (patch)
tree1defbe7f788645f6f0fb4c3f79be6c4b8ecfb709
parent3475ffe40b7db99c782cbaf351aa7b4e341562ef (diff)
downloadComputeLibrary-5ae8d804d67f57fbfa793800ddcc21a5aff954dd.tar.gz
Enable kernel selection testing (Phase #1)
Change-Id: I1d65fb9d3a7583cf8d4163ca7c0fbee27dc52633 Signed-off-by: Yair Schwarzbaum <yair.schwarzbaum@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6767 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CPP/CPPTypes.h12
-rw-r--r--arm_compute/core/Utils.h45
-rw-r--r--src/core/CPP/CPPTypes.cpp8
-rw-r--r--src/cpu/ICpuKernel.h40
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp75
-rw-r--r--src/cpu/kernels/CpuActivationKernel.h17
-rw-r--r--src/cpu/kernels/CpuAddKernel.cpp115
-rw-r--r--src/cpu/kernels/CpuAddKernel.h17
-rw-r--r--src/cpu/kernels/CpuCastKernel.h4
-rw-r--r--src/cpu/kernels/CpuCol2ImKernel.h4
-rw-r--r--src/cpu/kernels/CpuConcatenateBatchKernel.h4
-rw-r--r--src/cpu/kernels/CpuConcatenateDepthKernel.h4
-rw-r--r--src/cpu/kernels/CpuConcatenateHeightKernel.h4
-rw-r--r--src/cpu/kernels/CpuConcatenateWidthKernel.h4
-rw-r--r--src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h4
-rw-r--r--src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h4
-rw-r--r--src/cpu/kernels/CpuCopyKernel.h4
-rw-r--r--src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h4
-rw-r--r--src/cpu/kernels/CpuDequantizeKernel.h4
-rw-r--r--src/cpu/kernels/CpuDirectConv2dKernel.h4
-rw-r--r--src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h4
-rw-r--r--src/cpu/kernels/CpuDirectConv3dKernel.cpp56
-rw-r--r--src/cpu/kernels/CpuDirectConv3dKernel.h22
-rw-r--r--src/cpu/kernels/CpuElementwiseKernel.h4
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.cpp62
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.h20
-rw-r--r--src/cpu/kernels/CpuFillKernel.h4
-rw-r--r--src/cpu/kernels/CpuFloorKernel.cpp53
-rw-r--r--src/cpu/kernels/CpuFloorKernel.h21
-rw-r--r--src/cpu/kernels/CpuGemmInterleave4x4Kernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h6
-rw-r--r--src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmMatrixAdditionKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmTranspose1xWKernel.h4
-rw-r--r--src/cpu/kernels/CpuIm2ColKernel.h4
-rw-r--r--src/cpu/kernels/CpuKernelSelectionTypes.h60
-rw-r--r--src/cpu/kernels/CpuMulKernel.h6
-rw-r--r--src/cpu/kernels/CpuPermuteKernel.h4
-rw-r--r--src/cpu/kernels/CpuPool2dKernel.cpp89
-rw-r--r--src/cpu/kernels/CpuPool2dKernel.h17
-rw-r--r--src/cpu/kernels/CpuQuantizeKernel.h4
-rw-r--r--src/cpu/kernels/CpuReshapeKernel.h4
-rw-r--r--src/cpu/kernels/CpuScaleKernel.cpp72
-rw-r--r--src/cpu/kernels/CpuScaleKernel.h24
-rw-r--r--src/cpu/kernels/CpuSoftmaxKernel.h6
-rw-r--r--src/cpu/kernels/CpuSubKernel.cpp64
-rw-r--r--src/cpu/kernels/CpuSubKernel.h17
-rw-r--r--src/cpu/kernels/CpuTransposeKernel.h4
-rw-r--r--src/cpu/kernels/CpuWeightsReshapeKernel.h4
-rw-r--r--src/cpu/kernels/CpuWinogradConv2dKernel.h8
-rw-r--r--src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h5
-rw-r--r--src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h5
-rw-r--r--support/StringSupport.h14
-rw-r--r--tests/validation/NEON/ActivationLayer.cpp41
-rw-r--r--tests/validation/NEON/ArithmeticAddition.cpp47
-rw-r--r--tests/validation/NEON/Floor.cpp28
63 files changed, 673 insertions, 527 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 82a6a6c324..a021bdf5e4 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,11 @@
namespace arm_compute
{
+namespace cpuinfo
+{
+struct CpuIsaInfo;
+} // namespace cpuinfo
+
#define ARM_COMPUTE_CPU_MODEL_LIST \
X(GENERIC) \
X(GENERIC_FP16) \
@@ -134,6 +139,11 @@ public:
* @return Current thread's @ref CPUModel
*/
CPUModel get_cpu_model() const;
+ /** Gets the current cpu's ISA information
+ *
+ * @return Current cpu's ISA information
+ */
+ cpuinfo::CpuIsaInfo get_isa() const;
/** Gets the L1 cache size
*
* @return the size of the L1 cache
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 88cb295c44..b24955d778 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1200,6 +1200,49 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
return vec_size;
}
+/** Returns the suffix string of CPU kernel implementation names based on the given data type
+ *
+ * @param[in] data_type The data type the CPU kernel implemetation uses
+ *
+ * @return the suffix string of CPU kernel implementations
+ */
+inline std::string cpu_impl_dt(const DataType &data_type)
+{
+ std::string ret = "";
+
+ switch(data_type)
+ {
+ case DataType::F32:
+ ret = "fp32";
+ break;
+ case DataType::F16:
+ ret = "fp16";
+ break;
+ case DataType::U8:
+ ret = "u8";
+ break;
+ case DataType::S16:
+ ret = "s16";
+ break;
+ case DataType::S32:
+ ret = "s32";
+ break;
+ case DataType::QASYMM8:
+ ret = "qu8";
+ break;
+ case DataType::QASYMM8_SIGNED:
+ ret = "qs8";
+ break;
+ case DataType::QSYMM16:
+ ret = "qs16";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported.");
+ }
+
+ return ret;
+}
+
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Print consecutive elements to an output stream.
*
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index 44cd000ada..c197932a13 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "src/common/cpuinfo/CpuInfo.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
namespace arm_compute
{
@@ -110,6 +111,11 @@ CPUModel CPUInfo::get_cpu_model(unsigned int cpuid) const
return _impl->info.cpu_model(cpuid);
}
+cpuinfo::CpuIsaInfo CPUInfo::get_isa() const
+{
+ return _impl->info.isa();
+}
+
unsigned int CPUInfo::get_L1_cache_size() const
{
return _impl->L1_cache_size;
diff --git a/src/cpu/ICpuKernel.h b/src/cpu/ICpuKernel.h
index 650b3a7d0b..03aec5c08e 100644
--- a/src/cpu/ICpuKernel.h
+++ b/src/cpu/ICpuKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,12 +25,50 @@
#define ARM_COMPUTE_ICPUKERNEL_H
#include "arm_compute/core/CPP/ICPPKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
namespace arm_compute
{
namespace cpu
{
+enum class KernelSelectionType
+{
+ Preferred, /**< Retrieve the best implementation available for the given Cpu ISA, ignoring the build flags */
+ Supported /**< Retrieve the best implementation available for the given Cpu ISA that is supported by the current build */
+};
+
using ICpuKernel = arm_compute::ICPPKernel;
+
+template <class Derived>
+/* This is a temp name for stage 1 process of adding UT for multi-ISA.
+In the next stage NewICpuKernel will be called ICpuKernel again */
+class NewICpuKernel : public ICPPKernel
+{
+public:
+ /** Micro-kernel selector
+ *
+ * @param[in] selector Selection struct passed including information to help pick the appropriate micro-kernel
+ * @param[in] selection_type (Optional) Decides whether to get the best implementation for the given hardware or for the given build
+ *
+ * @return A matching micro-kernel else nullptr
+ */
+
+ template <typename SelectorType>
+ static const auto *get_implementation(const SelectorType &selector, KernelSelectionType selection_type = KernelSelectionType::Supported)
+ {
+ using kernel_type = typename std::remove_reference<decltype(Derived::get_available_kernels())>::type::value_type;
+
+ for(const auto &uk : Derived::get_available_kernels())
+ {
+ if(uk.is_selected(selector) && (selection_type == KernelSelectionType::Preferred || uk.ukernel != nullptr))
+ {
+ return &uk;
+ }
+ }
+
+ return static_cast<kernel_type *>(nullptr);
+ }
+};
} // namespace cpu
} // namespace arm_compute
#endif /* ARM_COMPUTE_ICPUKERNEL_H */
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 3af379d8af..c048b14a96 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,94 +43,60 @@ namespace kernels
{
namespace
{
-struct ActivationSelectorData
+static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels =
{
- DataType dt;
- const CPUInfo &ci;
-};
-
-using ActivationSelectorPtr = std::add_pointer<bool(const ActivationSelectorData &data)>::type;
-using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
-
-struct ActivationKernel
-{
- const char *name;
- const ActivationSelectorPtr is_selected;
- ActivationKernelPtr ukernel;
-};
-
-static const ActivationKernel available_kernels[] =
-{
-#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_fp16_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; },
REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation)
},
{
"sve_fp32_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_activation)
},
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_fp16_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_activation)
},
{
"neon_fp32_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_activation)
},
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
-#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
- "sve_qu8_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); },
+ "sve2_qu8_activation",
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; },
REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_activation)
},
{
- "sve_qs8_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); },
+ "sve2_qs8_activation",
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve2; },
REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_activation)
},
{
- "sve_qs16_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); },
+ "sve2_qs16_activation",
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16 && data.isa.sve2; },
REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)
},
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
{
"neon_qu8_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation)
},
{
"neon_qs8_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_activation)
},
{
"neon_qs16_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16; },
REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qsymm16_activation)
},
};
-const ActivationKernel *get_implementation(const ActivationSelectorData &data)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected(data))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
/* Supported activation in the 8-bit integer domain */
static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations =
{
@@ -155,7 +121,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
- const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto *uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
const DataType data_type = src->data_type();
@@ -208,7 +175,8 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, activation_info));
- const auto uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto uk = CpuActivationKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_act_info = activation_info;
@@ -269,6 +237,11 @@ const char *CpuActivationKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuActivationKernel::ActivationKernel> &CpuActivationKernel::get_available_kernels()
+{
+ return available_kernels;
+}
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h
index 8e78d86016..ac974850aa 100644
--- a/src/cpu/kernels/CpuActivationKernel.h
+++ b/src/cpu/kernels/CpuActivationKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,8 +34,11 @@ namespace cpu
namespace kernels
{
/** Interface for the activation kernel */
-class CpuActivationKernel : public ICpuKernel
+class CpuActivationKernel : public NewICpuKernel<CpuActivationKernel>
{
+private:
+ using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
+
public:
CpuActivationKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuActivationKernel);
@@ -70,8 +73,14 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-private:
- using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
+ struct ActivationKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ ActivationKernelPtr ukernel;
+ };
+
+ static const std::vector<ActivationKernel> &get_available_kernels();
private:
ActivationLayerInfo _act_info{};
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index f3ee032ec5..c27ee9f1bd 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,159 +41,116 @@ namespace kernels
{
namespace
{
-struct AddSelectorData
+static const std::vector<CpuAddKernel::AddKernel> available_kernels =
{
- DataType dt;
- const CPUInfo &ci;
-};
-
-using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type;
-using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
-struct AddKernel
-{
- const char *name;
- const AddSelectorPtr is_selected;
- AddKernelPtr ukernel;
-};
-
-static const AddKernel available_kernels[] =
-{
-#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
"sve2_qu8_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
+ return (data.dt == DataType::QASYMM8) && data.isa.sve2;
},
REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2)
},
{
"sve2_qs8_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
+ return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2;
},
REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2)
},
{
"sve2_qs16_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
+ return (data.dt == DataType::QSYMM16) && data.isa.sve2;
},
REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2)
},
-#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
-#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_fp32_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::F32) && data.ci.has_sve();
+ return (data.dt == DataType::F32) && data.isa.sve;
},
REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve)
},
{
"sve_fp16_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::F16) && data.ci.has_sve();
+ return (data.dt == DataType::F16) && data.isa.sve;
},
REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve)
},
{
"sve_u8_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::U8) && data.ci.has_sve();
+ return (data.dt == DataType::U8) && data.isa.sve;
},
REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve)
},
{
"sve_s16_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::S16) && data.ci.has_sve();
+ return (data.dt == DataType::S16) && data.isa.sve;
},
REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve)
},
{
"sve_s32_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::S32) && data.ci.has_sve();
+ return (data.dt == DataType::S32) && data.isa.sve;
},
REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve)
},
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_fp32_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::F32); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon)
},
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_add",
- [](const AddSelectorData & data)
+ [](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::F16) && data.ci.has_fp16();
+ return (data.dt == DataType::F16) && data.isa.fp16;
},
REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon)
},
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_u8_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::U8); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); },
REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon)
},
{
"neon_s16_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::S16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); },
REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon)
},
{
"neon_s32_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::S32); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); },
REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon)
},
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
-#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
{
"neon_qu8_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::add_qasymm8_neon)
},
{
"neon_qs8_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_qasymm8_signed_neon)
},
{
"neon_qs16_add",
- [](const AddSelectorData & data) { return (data.dt == DataType::QSYMM16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); },
REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon)
- },
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
-};
-
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected({ dt, cpuinfo }))
- {
- return &uk;
- }
}
- return nullptr;
-}
+};
Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
{
@@ -220,7 +177,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons
"Wrong shape for dst");
}
- const auto *uk = get_implementation(CPUInfo::get(), src0.data_type());
+ const auto *uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
return Status{};
@@ -246,7 +203,8 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
- const auto uk = get_implementation(CPUInfo::get(), src0->data_type());
+ const auto uk = CpuAddKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_policy = policy;
@@ -256,7 +214,7 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
// Configure kernel window
auto win_config = validate_and_configure_window(*src0, *src1, *dst);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICpuKernel::configure(win_config.second);
+ NewICpuKernel::configure(win_config.second);
}
Status CpuAddKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
@@ -273,7 +231,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(tensors.empty());
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
@@ -290,6 +248,11 @@ const char *CpuAddKernel::name() const
return _name.c_str();
}
+const std::vector<CpuAddKernel::AddKernel> &CpuAddKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
{
ARM_COMPUTE_UNUSED(thread_count);
@@ -298,7 +261,7 @@ size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
{
return 10240;
}
- else if (platform.get_cpu_model() == CPUModel::A76)
+ else if(platform.get_cpu_model() == CPUModel::A76)
{
return 9216;
}
diff --git a/src/cpu/kernels/CpuAddKernel.h b/src/cpu/kernels/CpuAddKernel.h
index a0c7e497dd..93b86de4ae 100644
--- a/src/cpu/kernels/CpuAddKernel.h
+++ b/src/cpu/kernels/CpuAddKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,9 +34,19 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform addition between two tensors */
-class CpuAddKernel : public ICpuKernel
+class CpuAddKernel : public NewICpuKernel<CpuAddKernel>
{
+private:
+ using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+
public:
+ struct AddKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ AddKernelPtr ukernel;
+ };
+
CpuAddKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuAddKernel);
/** Initialise the kernel's input, dst and border mode.
@@ -79,8 +89,7 @@ public:
*/
size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
-private:
- using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+ static const std::vector<AddKernel> &get_available_kernels();
private:
ConvertPolicy _policy{};
diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h
index a8ce97230e..9aeb537044 100644
--- a/src/cpu/kernels/CpuCastKernel.h
+++ b/src/cpu/kernels/CpuCastKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,7 +37,7 @@ namespace kernels
*
* @note When casting between quantized types the scale and zeroPoint are ignored
*/
-class CpuCastKernel : public ICpuKernel
+class CpuCastKernel : public NewICpuKernel<CpuCastKernel>
{
public:
CpuCastKernel() = default;
diff --git a/src/cpu/kernels/CpuCol2ImKernel.h b/src/cpu/kernels/CpuCol2ImKernel.h
index 8e09a2b689..43be476b2f 100644
--- a/src/cpu/kernels/CpuCol2ImKernel.h
+++ b/src/cpu/kernels/CpuCol2ImKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,7 +52,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuCol2ImKernel : public ICpuKernel
+class CpuCol2ImKernel : public NewICpuKernel<CpuCol2ImKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuConcatenateBatchKernel.h b/src/cpu/kernels/CpuConcatenateBatchKernel.h
index 91f2808f81..2b5946571b 100644
--- a/src/cpu/kernels/CpuConcatenateBatchKernel.h
+++ b/src/cpu/kernels/CpuConcatenateBatchKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,7 @@ namespace kernels
/** Interface for the batch concatenate kernel.
* The input tensor will be concatenated into the output tensor.
*/
-class CpuConcatenateBatchKernel : public ICpuKernel
+class CpuConcatenateBatchKernel : public NewICpuKernel<CpuConcatenateBatchKernel>
{
public:
CpuConcatenateBatchKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateDepthKernel.h b/src/cpu/kernels/CpuConcatenateDepthKernel.h
index 063118b33b..90b68d3a06 100644
--- a/src/cpu/kernels/CpuConcatenateDepthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateDepthKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace kernels
/** Interface for the depth concatenate kernel.
* The input tensor will be concatenated into the output tensor.
*/
-class CpuConcatenateDepthKernel : public ICpuKernel
+class CpuConcatenateDepthKernel : public NewICpuKernel<CpuConcatenateDepthKernel>
{
public:
CpuConcatenateDepthKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateHeightKernel.h b/src/cpu/kernels/CpuConcatenateHeightKernel.h
index 883c59a206..8ace9809cc 100644
--- a/src/cpu/kernels/CpuConcatenateHeightKernel.h
+++ b/src/cpu/kernels/CpuConcatenateHeightKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,7 +36,7 @@ namespace kernels
/** Interface for the height concatenate kernel.
* The source tensor will be concatenated into the destination tensor.
*/
-class CpuConcatenateHeightKernel : public ICpuKernel
+class CpuConcatenateHeightKernel : public NewICpuKernel<CpuConcatenateHeightKernel>
{
public:
CpuConcatenateHeightKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateWidthKernel.h b/src/cpu/kernels/CpuConcatenateWidthKernel.h
index 3b4612ab0d..d5f2ef24d6 100644
--- a/src/cpu/kernels/CpuConcatenateWidthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateWidthKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,7 +37,7 @@ namespace kernels
/** Interface for the width concatenate kernel.
* The source tensor will be concatenated into the destination tensor.
*/
-class CpuConcatenateWidthKernel : public ICPPKernel
+class CpuConcatenateWidthKernel : public NewICpuKernel<CpuConcatenateWidthKernel>
{
public:
CpuConcatenateWidthKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index 70f0a742f9..001a6fcab0 100644
--- a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ namespace kernels
*
* @note This function assumes the weights are already reshaped (transposed)
*/
-class CpuConvertFullyConnectedWeightsKernel : public ICpuKernel
+class CpuConvertFullyConnectedWeightsKernel : public NewICpuKernel<CpuConvertFullyConnectedWeightsKernel>
{
public:
CpuConvertFullyConnectedWeightsKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
index 8cce1eaf1d..9d5ee39126 100644
--- a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
+++ b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class CpuConvertQuantizedSignednessKernel : public ICpuKernel
+class CpuConvertQuantizedSignednessKernel : public NewICpuKernel<CpuConvertQuantizedSignednessKernel>
{
public:
CpuConvertQuantizedSignednessKernel() = default;
diff --git a/src/cpu/kernels/CpuCopyKernel.h b/src/cpu/kernels/CpuCopyKernel.h
index 193f38078b..ee4adeb4eb 100644
--- a/src/cpu/kernels/CpuCopyKernel.h
+++ b/src/cpu/kernels/CpuCopyKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to perform a copy between two tensors */
-class CpuCopyKernel : public ICpuKernel
+class CpuCopyKernel : public NewICpuKernel<CpuCopyKernel>
{
public:
CpuCopyKernel() = default;
diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
index 1afb6bed4c..eae682bb6d 100644
--- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
+++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class CpuDepthwiseConv2dNativeKernel : public ICpuKernel
+class CpuDepthwiseConv2dNativeKernel : public NewICpuKernel<CpuDepthwiseConv2dNativeKernel>
{
public:
CpuDepthwiseConv2dNativeKernel() = default;
diff --git a/src/cpu/kernels/CpuDequantizeKernel.h b/src/cpu/kernels/CpuDequantizeKernel.h
index f515cd36f9..834c039a76 100644
--- a/src/cpu/kernels/CpuDequantizeKernel.h
+++ b/src/cpu/kernels/CpuDequantizeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the dequantization layer kernel. */
-class CpuDequantizeKernel : public ICpuKernel
+class CpuDequantizeKernel : public NewICpuKernel<CpuDequantizeKernel>
{
public:
CpuDequantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dKernel.h b/src/cpu/kernels/CpuDirectConv2dKernel.h
index 3ba7f7ed5f..1f5568743e 100644
--- a/src/cpu/kernels/CpuDirectConv2dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform Direct Convolution Layer. */
-class CpuDirectConv2dKernel : public ICpuKernel
+class CpuDirectConv2dKernel : public NewICpuKernel<CpuDirectConv2dKernel>
{
public:
CpuDirectConv2dKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
index a68936bbae..95011f79aa 100644
--- a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace kernels
* @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part
* of the @ref DirectConvolutionLayerOutputStageKernelInfo.
*/
-class CpuDirectConv2dOutputStageKernel : public ICpuKernel
+class CpuDirectConv2dOutputStageKernel : public NewICpuKernel<CpuDirectConv2dOutputStageKernel>
{
public:
CpuDirectConv2dOutputStageKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.cpp b/src/cpu/kernels/CpuDirectConv3dKernel.cpp
index 36764a625d..22c60cd994 100644
--- a/src/cpu/kernels/CpuDirectConv3dKernel.cpp
+++ b/src/cpu/kernels/CpuDirectConv3dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,64 +49,32 @@ namespace kernels
{
namespace
{
-struct DirectConv3dSelectorData
-{
- DataType dt;
- const CPUInfo &ci;
-};
-using DirectConv3dSelectorPtr = std::add_pointer<bool(const DirectConv3dSelectorData &data)>::type;
-using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
-struct DirectConv3dKernel
-{
- const char *name;
- const DirectConv3dSelectorPtr is_selected;
- DirectConv3dKernelPtr ukernel;
-};
-
-static const DirectConv3dKernel available_kernels[] =
+static const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> available_kernels =
{
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_directconv3d",
- [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float16_t>)
},
#endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_fp32_directconv3d",
- [](const DirectConv3dSelectorData & data) { return data.dt == DataType::F32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::directconv3d_float_neon_ndhwc<float>)
},
{
"neon_qasymm8_directconv3d",
- [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<uint8_t>)
},
{
"neon_qasymm8_signed_directconv3d",
- [](const DirectConv3dSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::directconv3d_quantized_neon_ndhwc<int8_t>)
}
};
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const DirectConv3dKernel *get_implementation(const DirectConv3dSelectorData &data)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected(data))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const Conv3dInfo &conv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
@@ -117,7 +85,8 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, src1);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation != Size3D(1U, 1U, 1U));
- const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() });
+ const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
const DataLayout data_layout = src0->data_layout();
@@ -161,7 +130,8 @@ void CpuDirectConv3dKernel::configure(const ITensorInfo *src0, const ITensorInfo
ARM_COMPUTE_UNUSED(src2);
ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
- const auto *uk = get_implementation(DirectConv3dSelectorData{ src0->data_type(), CPUInfo::get() });
+ const auto *uk = CpuDirectConv3dKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_conv_info = conv_info;
@@ -210,6 +180,12 @@ const char *CpuDirectConv3dKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuDirectConv3dKernel::DirectConv3dKernel> &CpuDirectConv3dKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute \ No newline at end of file
diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.h b/src/cpu/kernels/CpuDirectConv3dKernel.h
index ff3b30f8ae..6ae70bd3b7 100644
--- a/src/cpu/kernels/CpuDirectConv3dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv3dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
+
namespace arm_compute
{
namespace cpu
@@ -34,8 +35,12 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform 3D Direct Convolution Layer. */
-class CpuDirectConv3dKernel : public ICpuKernel
+class CpuDirectConv3dKernel : public NewICpuKernel<CpuDirectConv3dKernel>
{
+private:
+ /* Template function for convolution 3d NDHWC */
+ using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
+
public:
CpuDirectConv3dKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv3dKernel);
@@ -71,14 +76,21 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-private:
- /* Template function for convolution 3d NDHWC */
- using DirectConv3dKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, const ITensor *, ITensor *, const Conv3dInfo &, const Window &)>::type;
+ struct DirectConv3dKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ DirectConv3dKernelPtr ukernel;
+ };
+
+ static const std::vector<DirectConv3dKernel> &get_available_kernels();
+private:
Conv3dInfo _conv_info{};
DirectConv3dKernelPtr _run_method{ nullptr };
std::string _name{};
};
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuElementwiseKernel.h b/src/cpu/kernels/CpuElementwiseKernel.h
index f323fe4470..bb081cbec1 100644
--- a/src/cpu/kernels/CpuElementwiseKernel.h
+++ b/src/cpu/kernels/CpuElementwiseKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ namespace kernels
* @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f]
*
*/
-class CpuElementwiseKernel : public ICpuKernel
+class CpuElementwiseKernel : public NewICpuKernel<CpuElementwiseKernel>
{
public:
CpuElementwiseKernel() = default;
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 3573fa0815..61bc64b235 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,77 +43,58 @@ namespace kernels
{
namespace
{
-struct ElementwiseUnarySelectorData
-{
- DataType dt;
- const CPUInfo &ci;
-};
-using ElementwiseUnarySelector = std::add_pointer<bool(const ElementwiseUnarySelectorData &)>::type;
-
-struct ElementwiseUnaryKernel
-{
- const char *name;
- const ElementwiseUnarySelector is_selected;
- CpuElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
-};
-
-static const ElementwiseUnaryKernel available_kernels[] =
+static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels =
{
#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_fp32_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
- REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>),
+ [](const DataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::F32 && data.isa.sve;
+ },
+ REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>)
},
{
"sve_fp16_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data)
+ {
+ return (data.dt == DataType::F16) && data.isa.sve;
+ },
REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<__fp16>),
},
{
"sve_s32_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>),
},
#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_fp32_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<float>),
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<__fp16>),
},
#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_s32_elementwise_unary",
- [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::S32; },
REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>),
},
#endif // defined(ARM_COMPUTE_ENABLE_NEON)
};
-const ElementwiseUnaryKernel *get_implementation(DataType dt)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected({ dt, CPUInfo::get() }))
- {
- return &uk;
- }
- }
- return nullptr;
-}
} // namespace
void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
{
ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
- const auto uk = get_implementation(src.data_type());
+ const auto uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
_op = op;
@@ -128,14 +109,15 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo
auto shape_and_window = compute_output_shape_and_window(src.tensor_shape());
auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
- ICpuKernel::configure(shape_and_window.second);
+ NewICpuKernel::configure(shape_and_window.second);
}
Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
- const auto *uk = get_implementation(src.data_type());
+ const auto *uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
switch(op)
@@ -177,6 +159,12 @@ const char *CpuElementwiseUnaryKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> &CpuElementwiseUnaryKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
index f72eddf737..c520b89618 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,8 +39,11 @@ namespace kernels
* Element-wise operation is computed by:
* @f[ dst(x) = OP(src(x))@f]
*/
-class CpuElementwiseUnaryKernel : public ICpuKernel
+class CpuElementwiseUnaryKernel : public NewICpuKernel<CpuElementwiseUnaryKernel>
{
+private:
+ using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
+
public:
CpuElementwiseUnaryKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseUnaryKernel);
@@ -64,11 +67,14 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
- /** Common signature for all the specialised elementwise unary micro-kernels
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
+ struct ElementwiseUnaryKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ ElementwiseUnaryUkernelPtr ukernel;
+ };
+
+ static const std::vector<ElementwiseUnaryKernel> &get_available_kernels();
private:
ElementWiseUnary _op{};
diff --git a/src/cpu/kernels/CpuFillKernel.h b/src/cpu/kernels/CpuFillKernel.h
index 3bc6a40309..5262ecc5c6 100644
--- a/src/cpu/kernels/CpuFillKernel.h
+++ b/src/cpu/kernels/CpuFillKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,7 +35,7 @@ namespace cpu
namespace kernels
{
/** Kernel for filling a tensor with a given constant value */
-class CpuFillKernel : public ICpuKernel
+class CpuFillKernel : public NewICpuKernel<CpuFillKernel>
{
public:
CpuFillKernel() = default;
diff --git a/src/cpu/kernels/CpuFloorKernel.cpp b/src/cpu/kernels/CpuFloorKernel.cpp
index bcac1a41fc..65e390a81a 100644
--- a/src/cpu/kernels/CpuFloorKernel.cpp
+++ b/src/cpu/kernels/CpuFloorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,58 +42,25 @@ namespace kernels
{
namespace
{
-struct FloorSelectorData
-{
- DataType dt;
-};
-
-using FloorSelectorPtr = std::add_pointer<bool(const FloorSelectorData &data)>::type;
-using FloorUKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
-
-struct FloorUKernel
-{
- const char *name;
- const FloorSelectorPtr is_selected;
- FloorUKernelPtr ukernel;
-};
-
-static const FloorUKernel available_kernels[] =
+static const std::vector<CpuFloorKernel::FloorKernel> available_kernels =
{
{
"neon_fp16_floor",
- [](const FloorSelectorData & data) { return data.dt == DataType::F16; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_floor)
},
{
"neon_fp32_floor",
- [](const FloorSelectorData & data) { return data.dt == DataType::F32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_floor)
- },
-};
-
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const FloorUKernel *get_implementation(const FloorSelectorData &data)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected(data))
- {
- return &uk;
- }
}
- return nullptr;
-}
+};
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- const auto *uk = get_implementation(FloorSelectorData{ src->data_type() });
+ const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
// Validate in case of configured output
@@ -114,7 +81,7 @@ void CpuFloorKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
auto_init_if_empty(*dst, src->tensor_shape(), 1, src->data_type());
- const auto *uk = get_implementation(FloorSelectorData{ src->data_type() });
+ const auto *uk = CpuFloorKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_run_method = uk->ukernel;
@@ -172,6 +139,12 @@ const char *CpuFloorKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuFloorKernel::FloorKernel> &CpuFloorKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuFloorKernel.h b/src/cpu/kernels/CpuFloorKernel.h
index ffb9658190..2b102a0515 100644
--- a/src/cpu/kernels/CpuFloorKernel.h
+++ b/src/cpu/kernels/CpuFloorKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,8 +34,11 @@ namespace cpu
namespace kernels
{
/** Cpu accelarated kernel to perform a floor operation */
-class CpuFloorKernel : public ICpuKernel
+class CpuFloorKernel : public NewICpuKernel<CpuFloorKernel>
{
+private:
+ using FloorKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
+
public:
CpuFloorKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuFloorKernel);
@@ -65,12 +68,18 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-private:
- using FloorUKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
+ struct FloorKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ FloorKernelPtr ukernel;
+ };
+
+ static const std::vector<FloorKernel> &get_available_kernels();
private:
- FloorUKernelPtr _run_method{ nullptr };
- std::string _name{};
+ FloorKernelPtr _run_method{ nullptr };
+ std::string _name{};
};
} // namespace kernels
} // namespace cpu
diff --git a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
index 047776bd1e..13b46142c4 100644
--- a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
+++ b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,7 +52,7 @@ namespace kernels
*
* After this operation, the dst matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
*/
-class CpuGemmInterleave4x4Kernel : public ICpuKernel
+class CpuGemmInterleave4x4Kernel : public NewICpuKernel<CpuGemmInterleave4x4Kernel>
{
public:
CpuGemmInterleave4x4Kernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
index 083ee187ef..6d06f12e54 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,7 @@ namespace kernels
* -# Compute the int32 matrix product of the resulting a * b and store the result as int32
*
*/
-class CpuGemmLowpMatrixMultiplyKernel : public ICpuKernel
+class CpuGemmLowpMatrixMultiplyKernel : public NewICpuKernel<CpuGemmLowpMatrixMultiplyKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
index 918f8c89d9..6cced66b47 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace kernels
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
*/
-class CpuGemmLowpMatrixAReductionKernel : public ICpuKernel
+class CpuGemmLowpMatrixAReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixAReductionKernel>
{
public:
/** Default constructor */
@@ -98,7 +98,7 @@ private:
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
*/
-class CpuGemmLowpMatrixBReductionKernel : public ICpuKernel
+class CpuGemmLowpMatrixBReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixBReductionKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
index 1ec969be92..1d70c0619e 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,7 @@ namespace kernels
* (a_offset * b_offset * k)
*
*/
-class CpuGemmLowpOffsetContributionKernel : public ICpuKernel
+class CpuGemmLowpOffsetContributionKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
index d97727dd09..13c64f4631 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,7 +63,7 @@ namespace kernels
* (a_offset * b_offset * k)
*/
-class CpuGemmLowpOffsetContributionOutputStageKernel : public ICpuKernel
+class CpuGemmLowpOffsetContributionOutputStageKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionOutputStageKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
index ae13e760ff..f6e8c816f3 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,7 +51,7 @@ namespace kernels
* -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
*
*/
-class CpuGemmLowpQuantizeDownInt32ScaleKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ScaleKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ScaleKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ScaleKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 53a9d34ed1..a9e2560657 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,7 +48,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index 67829e7773..bfac8681a5 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index b62cac4818..5e5683cfc3 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICpuKernel
+class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
index c9798fc24c..64338259e9 100644
--- a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ namespace kernels
* - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref CpuGemmMatrixMultiplyKernel
* - MTX_1 = C
*/
-class CpuGemmMatrixAdditionKernel : public ICpuKernel
+class CpuGemmMatrixAdditionKernel : public NewICpuKernel<CpuGemmMatrixAdditionKernel>
{
public:
CpuGemmMatrixAdditionKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
index 0b4e01579c..757b46e9a7 100644
--- a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ namespace kernels
* @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p lhs is a vector and the second input tensor @p rhs a matrix. The implementation also assumes that both tensors have not been reshaped
*
*/
-class CpuGemmMatrixMultiplyKernel : public ICpuKernel
+class CpuGemmMatrixMultiplyKernel : public NewICpuKernel<CpuGemmMatrixMultiplyKernel>
{
public:
CpuGemmMatrixMultiplyKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
index de920b5ed7..2acda35947 100644
--- a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
+++ b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,7 +68,7 @@ namespace kernels
* @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
*
*/
-class CpuGemmTranspose1xWKernel : public ICpuKernel
+class CpuGemmTranspose1xWKernel : public NewICpuKernel<CpuGemmTranspose1xWKernel>
{
public:
CpuGemmTranspose1xWKernel() = default;
diff --git a/src/cpu/kernels/CpuIm2ColKernel.h b/src/cpu/kernels/CpuIm2ColKernel.h
index 797d54c95c..d789adef95 100644
--- a/src/cpu/kernels/CpuIm2ColKernel.h
+++ b/src/cpu/kernels/CpuIm2ColKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuIm2ColKernel : public ICpuKernel
+class CpuIm2ColKernel : public NewICpuKernel<CpuIm2ColKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuKernelSelectionTypes.h b/src/cpu/kernels/CpuKernelSelectionTypes.h
new file mode 100644
index 0000000000..779fb86e6d
--- /dev/null
+++ b/src/cpu/kernels/CpuKernelSelectionTypes.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
+#define ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H
+
+#include "arm_compute/core/Types.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+// Selector data types
+struct DataTypeISASelectorData
+{
+ DataType dt;
+ const cpuinfo::CpuIsaInfo &isa;
+};
+
+struct PoolDataTypeISASelectorData
+{
+ DataType dt;
+ DataLayout dl;
+ int pool_stride_x;
+ Size2D pool_size;
+ const cpuinfo::CpuIsaInfo &isa;
+};
+
+// Selector pointer types
+using DataTypeISASelectorPtr = std::add_pointer<bool(const DataTypeISASelectorData &data)>::type;
+using PoolDataTypeISASelectorPtr = std::add_pointer<bool(const PoolDataTypeISASelectorData &data)>::type;
+
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // ARM_COMPUTE_CPU_KERNEL_SELECTION_TYPES_H \ No newline at end of file
diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h
index b65ec20044..3ab198510f 100644
--- a/src/cpu/kernels/CpuMulKernel.h
+++ b/src/cpu/kernels/CpuMulKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform multiplication between two tensors */
-class CpuMulKernel : public ICpuKernel
+class CpuMulKernel : public NewICpuKernel<CpuMulKernel>
{
public:
CpuMulKernel() = default;
@@ -118,7 +118,7 @@ private:
};
/** Interface for the complex pixelwise multiplication kernel. */
-class CpuComplexMulKernel : public ICpuKernel
+class CpuComplexMulKernel : public NewICpuKernel<CpuComplexMulKernel>
{
public:
CpuComplexMulKernel() = default;
diff --git a/src/cpu/kernels/CpuPermuteKernel.h b/src/cpu/kernels/CpuPermuteKernel.h
index 1b2672b5b9..aae28582b1 100644
--- a/src/cpu/kernels/CpuPermuteKernel.h
+++ b/src/cpu/kernels/CpuPermuteKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to perform tensor permutation given a permutation vector */
-class CpuPermuteKernel : public ICpuKernel
+class CpuPermuteKernel : public NewICpuKernel<CpuPermuteKernel>
{
public:
CpuPermuteKernel() = default;
diff --git a/src/cpu/kernels/CpuPool2dKernel.cpp b/src/cpu/kernels/CpuPool2dKernel.cpp
index f61cd0835d..953a9ffb67 100644
--- a/src/cpu/kernels/CpuPool2dKernel.cpp
+++ b/src/cpu/kernels/CpuPool2dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,136 +52,101 @@ namespace
{
using namespace misc::shape_calculator;
-struct PoolingSelectorData
-{
- DataType dt;
- DataLayout dl;
- int pool_stride_x;
- Size2D pool_size;
-};
-
-using PoolingSelectorPtr = std::add_pointer<bool(const PoolingSelectorData &data)>::type;
-using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
-struct PoolingKernel
-{
- const char *name;
- const PoolingSelectorPtr is_selected;
- PoolingKernelPtr ukernel;
-};
-
-static const PoolingKernel available_kernels[] =
+static const std::vector<CpuPool2dKernel::PoolingKernel> available_kernels =
{
{
"neon_qu8_nhwc_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_qasymm8_neon_nhwc)
},
{
"neon_qs8_nhwc_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_qasymm8_signed_neon_nhwc)
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_f16_nhwc_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); },
REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nhwc)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_fp32_nhwc_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); },
REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nhwc)
},
#if defined(ENABLE_NCHW_KERNELS)
{
"neon_qu8_nchw_pool2",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<uint8_t>)
},
{
"neon_qu8_nchw_pool3",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<uint8_t>)
},
{
"neon_qu8_nchw_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<uint8_t>)
},
{
"neon_qs8_nchw_pool2",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<int8_t>)
},
{
"neon_qs8_nchw_pool3",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<int8_t>)
},
{
"neon_qs8_nchw_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<int8_t>)
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_nchw_pool2",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
REGISTER_FP16_NEON(arm_compute::cpu::pooling2_fp16_neon_nchw)
},
{
"neon_fp16_nchw_pool3",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
REGISTER_FP16_NEON(arm_compute::cpu::pooling3_fp16_neon_nchw)
},
{
"neon_fp16_nchw_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16 && data.isa.fp16)); },
REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nchw)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_fp32_nchw_pool2",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
REGISTER_FP32_NEON(arm_compute::cpu::pooling2_fp32_neon_nchw)
},
{
"neon_fp32_nchw_pool3",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
REGISTER_FP32_NEON(arm_compute::cpu::pooling3_fp32_neon_nchw)
},
{
"neon_fp32_nchw_pool7",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
REGISTER_FP32_NEON(arm_compute::cpu::pooling7_fp32_neon_nchw)
},
{
"neon_fp32_nchw_poolMxN",
- [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); },
+ [](const PoolDataTypeISASelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); },
REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nchw)
},
#endif /* defined(ENABLE_NCHW_KERNELS) */
};
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const PoolingKernel *get_implementation(DataType dt, DataLayout dl, int pool_stride_x, Size2D pool_size)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected({ dt, dl, pool_stride_x, pool_size }))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info,
const ITensorInfo *indices, Size2D pool_size)
{
@@ -235,7 +200,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
}
}
- const auto *uk = get_implementation(src->data_type(), src->data_layout(), pool_stride_x, pool_size);
+ const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), pool_stride_x, pool_size, CPUInfo::get().get_isa() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
return Status{};
@@ -335,7 +300,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, indices, pool_size));
- const auto *uk = get_implementation(src->data_type(), src->data_layout(), pad_stride_info.stride().first, pool_size);
+ const auto *uk = CpuPool2dKernel::get_implementation(PoolDataTypeISASelectorData{ src->data_type(), src->data_layout(), (int)pad_stride_info.stride().first, pool_size, CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON(uk == nullptr);
// Set instance variables
@@ -350,7 +315,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
{
// Configure kernel window
Window win = calculate_max_window(*dst, Steps());
- ICpuKernel::configure(win);
+ NewICpuKernel::configure(win);
}
else
{
@@ -358,7 +323,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration,
pool_size.x(), pool_size.y());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICpuKernel::configure(win_config.second);
+ NewICpuKernel::configure(win_config.second);
}
}
@@ -391,7 +356,7 @@ void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const T
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -447,6 +412,12 @@ const char *CpuPool2dKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuPool2dKernel::PoolingKernel> &CpuPool2dKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuPool2dKernel.h b/src/cpu/kernels/CpuPool2dKernel.h
index aedeb7fbe9..7fd3247d6d 100644
--- a/src/cpu/kernels/CpuPool2dKernel.h
+++ b/src/cpu/kernels/CpuPool2dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,8 +35,11 @@ namespace cpu
namespace kernels
{
/** Interface for the pooling layer kernel */
-class CpuPool2dKernel : public ICpuKernel
+class CpuPool2dKernel : public NewICpuKernel<CpuPool2dKernel>
{
+private:
+ using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
+
public:
CpuPool2dKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel);
@@ -62,8 +65,14 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-private:
- using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
+ struct PoolingKernel
+ {
+ const char *name;
+ const PoolDataTypeISASelectorPtr is_selected;
+ PoolingKernelPtr ukernel;
+ };
+
+ static const std::vector<PoolingKernel> &get_available_kernels();
private:
PoolingLayerInfo _pool_info{};
diff --git a/src/cpu/kernels/CpuQuantizeKernel.h b/src/cpu/kernels/CpuQuantizeKernel.h
index eb0814926d..709e1c89c7 100644
--- a/src/cpu/kernels/CpuQuantizeKernel.h
+++ b/src/cpu/kernels/CpuQuantizeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,7 +37,7 @@ namespace kernels
*
* @note The implementation supports only 3D input tensors
*/
-class CpuQuantizeKernel : public ICpuKernel
+class CpuQuantizeKernel : public NewICpuKernel<CpuQuantizeKernel>
{
public:
CpuQuantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuReshapeKernel.h b/src/cpu/kernels/CpuReshapeKernel.h
index d4e2b44b54..6a5c528ecd 100644
--- a/src/cpu/kernels/CpuReshapeKernel.h
+++ b/src/cpu/kernels/CpuReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform tensor reshaping */
-class CpuReshapeKernel : public ICpuKernel
+class CpuReshapeKernel : public NewICpuKernel<CpuReshapeKernel>
{
public:
CpuReshapeKernel() = default;
diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp
index 3063d8f682..60564a97dd 100644
--- a/src/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/cpu/kernels/CpuScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,52 +48,37 @@ namespace kernels
{
namespace
{
-struct ScaleSelectorData
-{
- DataType dt;
- const CPUInfo &ci;
-};
-using ScaleSelectorPtr = std::add_pointer<bool(const ScaleSelectorData &data)>::type;
-using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
- InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
-struct ScaleKernel
-{
- const char *name;
- const ScaleSelectorPtr is_selected;
- ScaleKernelPtr ukernel;
-};
-
-static const ScaleKernel available_kernels[] =
+static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels =
{
#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_fp16_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve; },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale)
},
{
"sve_fp32_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale)
},
{
"sve_qu8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale)
},
{
"sve_qs8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale)
},
{
"sve_u8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale)
},
{
"sve_s16_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale)
},
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
@@ -101,60 +86,43 @@ static const ScaleKernel available_kernels[] =
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>)
},
#endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_fp32_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F32; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::common_neon_scale<float>)
},
{
"neon_qu8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::qasymm8_neon_scale)
},
{
"neon_qs8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::qasymm8_signed_neon_scale)
},
{
"neon_u8_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::U8; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8; },
REGISTER_INTEGER_NEON(arm_compute::cpu::u8_neon_scale)
},
{
"neon_s16_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::S16; },
+ [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16; },
REGISTER_INTEGER_NEON(arm_compute::cpu::s16_neon_scale)
},
#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
};
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const ScaleKernel *get_implementation(const ScaleSelectorData &data)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected(data))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
{
- const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
@@ -212,7 +180,7 @@ void CpuScaleKernel::configure(const ITensorInfo *src, const ITensorInfo *dx, co
dst,
info));
- const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_run_method = uk->ukernel;
@@ -618,6 +586,12 @@ const char *CpuScaleKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuScaleKernel::ScaleKernel> &CpuScaleKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuScaleKernel.h b/src/cpu/kernels/CpuScaleKernel.h
index 913b5a5593..94bbdb72a0 100644
--- a/src/cpu/kernels/CpuScaleKernel.h
+++ b/src/cpu/kernels/CpuScaleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,8 +35,14 @@ namespace cpu
namespace kernels
{
/** Arm(R) Neon(TM) kernel to perform scaling on a tensor */
-class CpuScaleKernel : public ICpuKernel
+class CpuScaleKernel : public NewICpuKernel<CpuScaleKernel>
{
+private:
+ /** Scale function to use for the particular function to use */
+ using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window);
+ using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
+ InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
+
public:
CpuScaleKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuScaleKernel);
@@ -67,6 +73,15 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
+ struct ScaleKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ ScaleKernelPtr ukernel;
+ };
+
+ static const std::vector<ScaleKernel> &get_available_kernels();
+
private:
#ifdef ENABLE_NCHW_KERNELS
/** function to perform scale using area interpolation on the given window
@@ -87,11 +102,6 @@ private:
void scale_nearest_nchw(const ITensor *src, ITensor *dst, const ITensor *dx, const ITensor *dy, const ITensor *offsets, const Window &window);
#endif // ENABLE_NCHW_KERNELS
- /** Scale function to use for the particular function to use */
- using ScaleFunctionPtr = void (CpuScaleKernel::*)(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const Window &window);
- using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
- InterpolationPolicy, BorderMode, PixelValue, float, bool, const Window &)>::type;
-
ScaleFunctionPtr _func{ nullptr };
InterpolationPolicy _policy{};
BorderMode _border_mode{};
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.h b/src/cpu/kernels/CpuSoftmaxKernel.h
index 8073a677d9..f317662620 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.h
+++ b/src/cpu/kernels/CpuSoftmaxKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the identifying the max value of 1D Logits */
-class CpuLogits1DMaxKernel : public ICpuKernel
+class CpuLogits1DMaxKernel : public NewICpuKernel<CpuLogits1DMaxKernel>
{
public:
CpuLogits1DMaxKernel() = default;
@@ -67,7 +67,7 @@ private:
/** Interface for softmax computation for QASYMM8 with pre-computed max. */
template <bool IS_LOG = false>
-class CpuLogits1DSoftmaxKernel : public ICpuKernel
+class CpuLogits1DSoftmaxKernel : public NewICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>>
{
public:
CpuLogits1DSoftmaxKernel() = default;
diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp
index ec65f12dfc..c12feb4331 100644
--- a/src/cpu/kernels/CpuSubKernel.cpp
+++ b/src/cpu/kernels/CpuSubKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,85 +39,52 @@ namespace kernels
{
namespace
{
-struct SubSelectorData
-{
- DataType dt;
-};
-
-using SubSelectorPtr = std::add_pointer<bool(const SubSelectorData &data)>::type;
-using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
-
-struct SubKernel
-{
- const char *name;
- const SubSelectorPtr is_selected;
- SubKernelPtr ukernel;
-};
-
-static const SubKernel available_kernels[] =
+static const std::vector<CpuSubKernel::SubKernel> available_kernels =
{
{
"neon_fp32_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::F32); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
REGISTER_FP32_NEON(arm_compute::cpu::sub_same_neon<float>)
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::F16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16) && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::sub_same_neon<float16_t>)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_u8_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::U8); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::U8); },
REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<uint8_t>)
},
{
"neon_s16_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::S16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S16); },
REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int16_t>)
},
{
"neon_s32_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::S32); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::S32); },
REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int32_t>)
},
{
"neon_qu8_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::sub_qasymm8_neon)
},
{
"neon_qs8_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::sub_qasymm8_signed_neon)
},
{
"neon_qs16_sub",
- [](const SubSelectorData & data) { return (data.dt == DataType::QSYMM16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QSYMM16); },
REGISTER_QSYMM16_NEON(arm_compute::cpu::sub_qsymm16_neon)
},
};
-/** Micro-kernel selector
- *
- * @param[in] data Selection data passed to help pick the appropriate micro-kernel
- *
- * @return A matching micro-kernel else nullptr
- */
-const SubKernel *get_implementation(DataType dt)
-{
- for(const auto &uk : available_kernels)
- {
- if(uk.is_selected({ dt }))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
{
ARM_COMPUTE_UNUSED(policy);
@@ -126,7 +93,8 @@ inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src
DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1);
- const auto *uk = get_implementation(src0.data_type());
+ const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0.data_type(), CPUInfo::get().get_isa() });
+
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
@@ -157,7 +125,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
set_shape_if_empty(*dst, out_shape);
set_data_type_if_unknown(*dst, src0->data_type());
- const auto *uk = get_implementation(src0->data_type());
+ const auto *uk = CpuSubKernel::get_implementation(DataTypeISASelectorData{ src0->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_policy = policy;
@@ -196,6 +164,12 @@ const char *CpuSubKernel::name() const
{
return _name.c_str();
}
+
+const std::vector<CpuSubKernel::SubKernel> &CpuSubKernel::get_available_kernels()
+{
+ return available_kernels;
+}
+
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 80d6be68b5..70f34b1b57 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,8 +34,11 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform subtraction between two tensors */
-class CpuSubKernel : public ICpuKernel
+class CpuSubKernel : public NewICpuKernel<CpuSubKernel>
{
+private:
+ using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+
public:
CpuSubKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuSubKernel);
@@ -70,8 +73,14 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-private:
- using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
+ struct SubKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ SubKernelPtr ukernel;
+ };
+
+ static const std::vector<SubKernel> &get_available_kernels();
private:
ConvertPolicy _policy{};
diff --git a/src/cpu/kernels/CpuTransposeKernel.h b/src/cpu/kernels/CpuTransposeKernel.h
index 6805eac642..7e1ee5f73d 100644
--- a/src/cpu/kernels/CpuTransposeKernel.h
+++ b/src/cpu/kernels/CpuTransposeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel which transposes the elements of a matrix */
-class CpuTransposeKernel : public ICpuKernel
+class CpuTransposeKernel : public NewICpuKernel<CpuTransposeKernel>
{
public:
CpuTransposeKernel() = default;
diff --git a/src/cpu/kernels/CpuWeightsReshapeKernel.h b/src/cpu/kernels/CpuWeightsReshapeKernel.h
index c80bf3b25e..6c2d7ef6f9 100644
--- a/src/cpu/kernels/CpuWeightsReshapeKernel.h
+++ b/src/cpu/kernels/CpuWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,7 +56,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuWeightsReshapeKernel : public ICpuKernel
+class CpuWeightsReshapeKernel : public NewICpuKernel<CpuWeightsReshapeKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuWinogradConv2dKernel.h b/src/cpu/kernels/CpuWinogradConv2dKernel.h
index db2d8acfdb..0c4e28c394 100644
--- a/src/cpu/kernels/CpuWinogradConv2dKernel.h
+++ b/src/cpu/kernels/CpuWinogradConv2dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,7 +35,7 @@ namespace arm_compute
namespace cpu
{
/** Interface for the kernel to perform Winograd input transform. */
-class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformInputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformInputKernel>
{
public:
/** Get the working space required to perform the transformation.
@@ -216,7 +216,7 @@ private:
};
/** Interface for the kernel to perform Winograd output transform. */
-class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformOutputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformOutputKernel>
{
public:
/** Get the working space required to perform the transformation.
@@ -418,7 +418,7 @@ private:
};
/** Interface for the kernel to perform Winograd weights transform. */
-class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel
+class ICpuWinogradConv2dTransformWeightsKernel : public NewICpuKernel<ICpuWinogradConv2dTransformWeightsKernel>
{
public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
index 8980922945..902e9616d1 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Types.h"
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
namespace arm_conv
{
@@ -44,7 +45,7 @@ namespace cpu
namespace kernels
{
/** This class is a wrapper for the depthwise convolution assembly kernels. */
-class CpuDepthwiseConv2dAssemblyWrapperKernel final : public ICpuKernel
+class CpuDepthwiseConv2dAssemblyWrapperKernel final : public NewICpuKernel<CpuDepthwiseConv2dAssemblyWrapperKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
index 8625fd96b4..daa3168beb 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,7 @@
#include "src/core/NEON/kernels/assembly/pooling.hpp"
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
#include "pool_common.hpp"
@@ -45,7 +46,7 @@ namespace kernels
* execute a single assembly kernel in the context of an NEFunction.
*
*/
-class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel
+class CpuPool2dAssemblyWrapperKernel final : public NewICpuKernel<CpuPool2dAssemblyWrapperKernel>
{
public:
/** Constructor
diff --git a/support/StringSupport.h b/support/StringSupport.h
index 5e237c7dff..e8b3ca7ab3 100644
--- a/support/StringSupport.h
+++ b/support/StringSupport.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -131,6 +131,12 @@ inline std::string to_string(T && value)
return stream.str();
}
+// Specialization for const std::string&
+inline std::string to_string(const std::string &value)
+{
+ return value;
+}
+
/** Convert string values to float.
*
* @note This function implements the same behaviour as std::stof. The latter
@@ -164,6 +170,12 @@ inline std::string to_string(T &&value)
return ::std::to_string(std::forward<T>(value));
}
+// Specialization for const std::string&
+inline std::string to_string(const std::string &value)
+{
+ return value;
+}
+
/** Convert string values to float.
*
* @note This function acts as a convenience wrapper around std::stof. The
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 69fe9053d8..8d70ca5415 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "arm_compute/runtime/RuntimeContext.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuActivationKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ActivationFunctionsDataset.h"
@@ -279,6 +281,43 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info));
ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
}
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ }))),
+ combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuActivationKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index f3e4dfc6e5..c72e082a74 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,8 @@
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuAddKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ConvertPolicyDataset.h"
@@ -85,6 +87,49 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
ConvertPolicy::WRAP);
ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
}
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::U8,
+ DataType::S16,
+ DataType::S32,
+ DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ })),
+ combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ DataType::U8,
+ DataType::S16,
+ DataType::S32
+ }))),
+ combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("DataType", { DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
+ DataType::QSYMM16
+ }))),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.sve = (cpu_ext == "SVE");
+ cpu_isa.sve2 = (cpu_ext == "SVE2");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuAddKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*
diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp
index 419ce56e44..d3bd3e0b1c 100644
--- a/tests/validation/NEON/Floor.cpp
+++ b/tests/validation/NEON/Floor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,8 @@
#include "arm_compute/runtime/NEON/functions/NEFloor.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuFloorKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -62,6 +64,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
const Status status = NEFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false));
ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
}
+
+
+DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
+ combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ framework::dataset::make("DataType", { DataType::F32,
+ DataType::F16,
+ })),
+ cpu_ext, data_type)
+{
+ using namespace cpu::kernels;
+
+ cpuinfo::CpuIsaInfo cpu_isa{};
+ cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = (data_type == DataType::F16);
+
+ const auto *selected_impl = CpuFloorKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred);
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
+
+ std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_floor";
+ std::string actual = selected_impl->name;
+
+ ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
+}
// clang-format on
// *INDENT-ON*