aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYair Schwarzbaum <yair.schwarzbaum@arm.com>2022-01-12 16:38:58 +0200
committerYair Schwarzbaum <yair.schwarzbaum@arm.com>2022-02-01 09:58:37 +0000
commit46d44d26183d835d209d7ef1b9023e217dd4019d (patch)
tree0bd403aa952b52f715a9c07bfd8a2a5906712862
parent0ef2c2176fd99319342a4174e15c0263ede236cd (diff)
downloadComputeLibrary-46d44d26183d835d209d7ef1b9023e217dd4019d.tar.gz
Enable kernel selection testing (Phase #2)
Resolves COMPMID-4987 Change-Id: I1201ca3eae107989d13b6a2c6d9560de24fe112d Signed-off-by: Yair Schwarzbaum <yair.schwarzbaum@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7015 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/cpu/ICpuKernel.h6
-rw-r--r--src/cpu/kernels/CpuActivationKernel.h2
-rw-r--r--src/cpu/kernels/CpuAddKernel.cpp4
-rw-r--r--src/cpu/kernels/CpuAddKernel.h2
-rw-r--r--src/cpu/kernels/CpuCastKernel.h2
-rw-r--r--src/cpu/kernels/CpuCol2ImKernel.h2
-rw-r--r--src/cpu/kernels/CpuConcatenateBatchKernel.h2
-rw-r--r--src/cpu/kernels/CpuConcatenateDepthKernel.h2
-rw-r--r--src/cpu/kernels/CpuConcatenateHeightKernel.h2
-rw-r--r--src/cpu/kernels/CpuConcatenateWidthKernel.h2
-rw-r--r--src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h2
-rw-r--r--src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h2
-rw-r--r--src/cpu/kernels/CpuCopyKernel.h2
-rw-r--r--src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h2
-rw-r--r--src/cpu/kernels/CpuDequantizeKernel.h2
-rw-r--r--src/cpu/kernels/CpuDirectConv2dKernel.h2
-rw-r--r--src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h2
-rw-r--r--src/cpu/kernels/CpuDirectConv3dKernel.h2
-rw-r--r--src/cpu/kernels/CpuElementwiseKernel.h2
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.cpp2
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.h2
-rw-r--r--src/cpu/kernels/CpuFillKernel.h2
-rw-r--r--src/cpu/kernels/CpuFloorKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmInterleave4x4Kernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h4
-rw-r--r--src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmMatrixAdditionKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h2
-rw-r--r--src/cpu/kernels/CpuGemmTranspose1xWKernel.h2
-rw-r--r--src/cpu/kernels/CpuIm2ColKernel.h2
-rw-r--r--src/cpu/kernels/CpuMulKernel.h4
-rw-r--r--src/cpu/kernels/CpuPermuteKernel.h2
-rw-r--r--src/cpu/kernels/CpuPool2dKernel.cpp6
-rw-r--r--src/cpu/kernels/CpuPool2dKernel.h2
-rw-r--r--src/cpu/kernels/CpuQuantizeKernel.h2
-rw-r--r--src/cpu/kernels/CpuReshapeKernel.h2
-rw-r--r--src/cpu/kernels/CpuScaleKernel.h2
-rw-r--r--src/cpu/kernels/CpuSoftmaxKernel.cpp256
-rw-r--r--src/cpu/kernels/CpuSoftmaxKernel.h36
-rw-r--r--src/cpu/kernels/CpuSubKernel.h2
-rw-r--r--src/cpu/kernels/CpuTransposeKernel.h2
-rw-r--r--src/cpu/kernels/CpuWeightsReshapeKernel.h2
-rw-r--r--src/cpu/kernels/CpuWinogradConv2dKernel.h6
-rw-r--r--src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h2
-rw-r--r--src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h2
-rw-r--r--src/cpu/operators/CpuConcatenate.h4
-rw-r--r--src/cpu/operators/CpuSoftmax.h6
53 files changed, 171 insertions, 247 deletions
diff --git a/src/cpu/ICpuKernel.h b/src/cpu/ICpuKernel.h
index 03aec5c08e..8f4106240d 100644
--- a/src/cpu/ICpuKernel.h
+++ b/src/cpu/ICpuKernel.h
@@ -37,12 +37,8 @@ enum class KernelSelectionType
Supported /**< Retrieve the best implementation available for the given Cpu ISA that is supported by the current build */
};
-using ICpuKernel = arm_compute::ICPPKernel;
-
template <class Derived>
-/* This is a temp name for stage 1 process of adding UT for multi-ISA.
-In the next stage NewICpuKernel will be called ICpuKernel again */
-class NewICpuKernel : public ICPPKernel
+class ICpuKernel : public ICPPKernel
{
public:
/** Micro-kernel selector
diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h
index ac974850aa..b0476303f0 100644
--- a/src/cpu/kernels/CpuActivationKernel.h
+++ b/src/cpu/kernels/CpuActivationKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the activation kernel */
-class CpuActivationKernel : public NewICpuKernel<CpuActivationKernel>
+class CpuActivationKernel : public ICpuKernel<CpuActivationKernel>
{
private:
using ActivationKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ActivationLayerInfo &, const Window &)>::type;
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index deb7379aea..d06621fae0 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -214,7 +214,7 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
// Configure kernel window
auto win_config = validate_and_configure_window(*src0, *src1, *dst);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- NewICpuKernel::configure(win_config.second);
+ ICpuKernel::configure(win_config.second);
}
Status CpuAddKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
@@ -231,7 +231,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(tensors.empty());
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
diff --git a/src/cpu/kernels/CpuAddKernel.h b/src/cpu/kernels/CpuAddKernel.h
index 93b86de4ae..6638135580 100644
--- a/src/cpu/kernels/CpuAddKernel.h
+++ b/src/cpu/kernels/CpuAddKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform addition between two tensors */
-class CpuAddKernel : public NewICpuKernel<CpuAddKernel>
+class CpuAddKernel : public ICpuKernel<CpuAddKernel>
{
private:
using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h
index 9aeb537044..7679178fa1 100644
--- a/src/cpu/kernels/CpuCastKernel.h
+++ b/src/cpu/kernels/CpuCastKernel.h
@@ -37,7 +37,7 @@ namespace kernels
*
* @note When casting between quantized types the scale and zeroPoint are ignored
*/
-class CpuCastKernel : public NewICpuKernel<CpuCastKernel>
+class CpuCastKernel : public ICpuKernel<CpuCastKernel>
{
public:
CpuCastKernel() = default;
diff --git a/src/cpu/kernels/CpuCol2ImKernel.h b/src/cpu/kernels/CpuCol2ImKernel.h
index 43be476b2f..deafcc14df 100644
--- a/src/cpu/kernels/CpuCol2ImKernel.h
+++ b/src/cpu/kernels/CpuCol2ImKernel.h
@@ -52,7 +52,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuCol2ImKernel : public NewICpuKernel<CpuCol2ImKernel>
+class CpuCol2ImKernel : public ICpuKernel<CpuCol2ImKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuConcatenateBatchKernel.h b/src/cpu/kernels/CpuConcatenateBatchKernel.h
index 2b5946571b..0de68a5d64 100644
--- a/src/cpu/kernels/CpuConcatenateBatchKernel.h
+++ b/src/cpu/kernels/CpuConcatenateBatchKernel.h
@@ -36,7 +36,7 @@ namespace kernels
/** Interface for the batch concatenate kernel.
* The input tensor will be concatenated into the output tensor.
*/
-class CpuConcatenateBatchKernel : public NewICpuKernel<CpuConcatenateBatchKernel>
+class CpuConcatenateBatchKernel : public ICpuKernel<CpuConcatenateBatchKernel>
{
public:
CpuConcatenateBatchKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateDepthKernel.h b/src/cpu/kernels/CpuConcatenateDepthKernel.h
index 90b68d3a06..5a0edb95bb 100644
--- a/src/cpu/kernels/CpuConcatenateDepthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateDepthKernel.h
@@ -40,7 +40,7 @@ namespace kernels
/** Interface for the depth concatenate kernel.
* The input tensor will be concatenated into the output tensor.
*/
-class CpuConcatenateDepthKernel : public NewICpuKernel<CpuConcatenateDepthKernel>
+class CpuConcatenateDepthKernel : public ICpuKernel<CpuConcatenateDepthKernel>
{
public:
CpuConcatenateDepthKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateHeightKernel.h b/src/cpu/kernels/CpuConcatenateHeightKernel.h
index 8ace9809cc..74d5d0c2c3 100644
--- a/src/cpu/kernels/CpuConcatenateHeightKernel.h
+++ b/src/cpu/kernels/CpuConcatenateHeightKernel.h
@@ -36,7 +36,7 @@ namespace kernels
/** Interface for the height concatenate kernel.
* The source tensor will be concatenated into the destination tensor.
*/
-class CpuConcatenateHeightKernel : public NewICpuKernel<CpuConcatenateHeightKernel>
+class CpuConcatenateHeightKernel : public ICpuKernel<CpuConcatenateHeightKernel>
{
public:
CpuConcatenateHeightKernel() = default;
diff --git a/src/cpu/kernels/CpuConcatenateWidthKernel.h b/src/cpu/kernels/CpuConcatenateWidthKernel.h
index d5f2ef24d6..418bc51b33 100644
--- a/src/cpu/kernels/CpuConcatenateWidthKernel.h
+++ b/src/cpu/kernels/CpuConcatenateWidthKernel.h
@@ -37,7 +37,7 @@ namespace kernels
/** Interface for the width concatenate kernel.
* The source tensor will be concatenated into the destination tensor.
*/
-class CpuConcatenateWidthKernel : public NewICpuKernel<CpuConcatenateWidthKernel>
+class CpuConcatenateWidthKernel : public ICpuKernel<CpuConcatenateWidthKernel>
{
public:
CpuConcatenateWidthKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index 001a6fcab0..9a1393323b 100644
--- a/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -41,7 +41,7 @@ namespace kernels
*
* @note This function assumes the weights are already reshaped (transposed)
*/
-class CpuConvertFullyConnectedWeightsKernel : public NewICpuKernel<CpuConvertFullyConnectedWeightsKernel>
+class CpuConvertFullyConnectedWeightsKernel : public ICpuKernel<CpuConvertFullyConnectedWeightsKernel>
{
public:
CpuConvertFullyConnectedWeightsKernel() = default;
diff --git a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
index 9d5ee39126..b5eaf65487 100644
--- a/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
+++ b/src/cpu/kernels/CpuConvertQuantizedSignednessKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class CpuConvertQuantizedSignednessKernel : public NewICpuKernel<CpuConvertQuantizedSignednessKernel>
+class CpuConvertQuantizedSignednessKernel : public ICpuKernel<CpuConvertQuantizedSignednessKernel>
{
public:
CpuConvertQuantizedSignednessKernel() = default;
diff --git a/src/cpu/kernels/CpuCopyKernel.h b/src/cpu/kernels/CpuCopyKernel.h
index ee4adeb4eb..c9ef8eba76 100644
--- a/src/cpu/kernels/CpuCopyKernel.h
+++ b/src/cpu/kernels/CpuCopyKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to perform a copy between two tensors */
-class CpuCopyKernel : public NewICpuKernel<CpuCopyKernel>
+class CpuCopyKernel : public ICpuKernel<CpuCopyKernel>
{
public:
CpuCopyKernel() = default;
diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
index eae682bb6d..e23a0fac87 100644
--- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
+++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
@@ -40,7 +40,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class CpuDepthwiseConv2dNativeKernel : public NewICpuKernel<CpuDepthwiseConv2dNativeKernel>
+class CpuDepthwiseConv2dNativeKernel : public ICpuKernel<CpuDepthwiseConv2dNativeKernel>
{
public:
CpuDepthwiseConv2dNativeKernel() = default;
diff --git a/src/cpu/kernels/CpuDequantizeKernel.h b/src/cpu/kernels/CpuDequantizeKernel.h
index 834c039a76..cfa991dc74 100644
--- a/src/cpu/kernels/CpuDequantizeKernel.h
+++ b/src/cpu/kernels/CpuDequantizeKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the dequantization layer kernel. */
-class CpuDequantizeKernel : public NewICpuKernel<CpuDequantizeKernel>
+class CpuDequantizeKernel : public ICpuKernel<CpuDequantizeKernel>
{
public:
CpuDequantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dKernel.h b/src/cpu/kernels/CpuDirectConv2dKernel.h
index 09fa5898cc..6ec4d4ee04 100644
--- a/src/cpu/kernels/CpuDirectConv2dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform Direct Convolution Layer. */
-class CpuDirectConv2dKernel : public NewICpuKernel<CpuDirectConv2dKernel>
+class CpuDirectConv2dKernel : public ICpuKernel<CpuDirectConv2dKernel>
{
public:
CpuDirectConv2dKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
index 95011f79aa..d3ef17b7c9 100644
--- a/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
+++ b/src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
@@ -40,7 +40,7 @@ namespace kernels
* @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part
* of the @ref DirectConvolutionLayerOutputStageKernelInfo.
*/
-class CpuDirectConv2dOutputStageKernel : public NewICpuKernel<CpuDirectConv2dOutputStageKernel>
+class CpuDirectConv2dOutputStageKernel : public ICpuKernel<CpuDirectConv2dOutputStageKernel>
{
public:
CpuDirectConv2dOutputStageKernel() = default;
diff --git a/src/cpu/kernels/CpuDirectConv3dKernel.h b/src/cpu/kernels/CpuDirectConv3dKernel.h
index 6ae70bd3b7..688f368b9f 100644
--- a/src/cpu/kernels/CpuDirectConv3dKernel.h
+++ b/src/cpu/kernels/CpuDirectConv3dKernel.h
@@ -35,7 +35,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform 3D Direct Convolution Layer. */
-class CpuDirectConv3dKernel : public NewICpuKernel<CpuDirectConv3dKernel>
+class CpuDirectConv3dKernel : public ICpuKernel<CpuDirectConv3dKernel>
{
private:
/* Template function for convolution 3d NDHWC */
diff --git a/src/cpu/kernels/CpuElementwiseKernel.h b/src/cpu/kernels/CpuElementwiseKernel.h
index bb081cbec1..8cd5d58a96 100644
--- a/src/cpu/kernels/CpuElementwiseKernel.h
+++ b/src/cpu/kernels/CpuElementwiseKernel.h
@@ -39,7 +39,7 @@ namespace kernels
* @f[ dst(x,y) = OP(src0(x,y), src1(x,y))@f]
*
*/
-class CpuElementwiseKernel : public NewICpuKernel<CpuElementwiseKernel>
+class CpuElementwiseKernel : public ICpuKernel<CpuElementwiseKernel>
{
public:
CpuElementwiseKernel() = default;
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 79c4896924..e8211fe93e 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -108,7 +108,7 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo
auto shape_and_window = compute_output_shape_and_window(src.tensor_shape());
auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
- NewICpuKernel::configure(shape_and_window.second);
+ ICpuKernel::configure(shape_and_window.second);
}
Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
index c520b89618..138049a60c 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -39,7 +39,7 @@ namespace kernels
* Element-wise operation is computed by:
* @f[ dst(x) = OP(src(x))@f]
*/
-class CpuElementwiseUnaryKernel : public NewICpuKernel<CpuElementwiseUnaryKernel>
+class CpuElementwiseUnaryKernel : public ICpuKernel<CpuElementwiseUnaryKernel>
{
private:
using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
diff --git a/src/cpu/kernels/CpuFillKernel.h b/src/cpu/kernels/CpuFillKernel.h
index 5262ecc5c6..ce41afc462 100644
--- a/src/cpu/kernels/CpuFillKernel.h
+++ b/src/cpu/kernels/CpuFillKernel.h
@@ -35,7 +35,7 @@ namespace cpu
namespace kernels
{
/** Kernel for filling a tensor with a given constant value */
-class CpuFillKernel : public NewICpuKernel<CpuFillKernel>
+class CpuFillKernel : public ICpuKernel<CpuFillKernel>
{
public:
CpuFillKernel() = default;
diff --git a/src/cpu/kernels/CpuFloorKernel.h b/src/cpu/kernels/CpuFloorKernel.h
index 2b102a0515..35ab534ca8 100644
--- a/src/cpu/kernels/CpuFloorKernel.h
+++ b/src/cpu/kernels/CpuFloorKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Cpu accelarated kernel to perform a floor operation */
-class CpuFloorKernel : public NewICpuKernel<CpuFloorKernel>
+class CpuFloorKernel : public ICpuKernel<CpuFloorKernel>
{
private:
using FloorKernelPtr = std::add_pointer<void(const void *, void *, int)>::type;
diff --git a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
index 13b46142c4..4fb6a52a8b 100644
--- a/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
+++ b/src/cpu/kernels/CpuGemmInterleave4x4Kernel.h
@@ -52,7 +52,7 @@ namespace kernels
*
* After this operation, the dst matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
*/
-class CpuGemmInterleave4x4Kernel : public NewICpuKernel<CpuGemmInterleave4x4Kernel>
+class CpuGemmInterleave4x4Kernel : public ICpuKernel<CpuGemmInterleave4x4Kernel>
{
public:
CpuGemmInterleave4x4Kernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
index 6d06f12e54..2cc789d6d9 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.h
@@ -43,7 +43,7 @@ namespace kernels
* -# Compute the int32 matrix product of the resulting a * b and store the result as int32
*
*/
-class CpuGemmLowpMatrixMultiplyKernel : public NewICpuKernel<CpuGemmLowpMatrixMultiplyKernel>
+class CpuGemmLowpMatrixMultiplyKernel : public ICpuKernel<CpuGemmLowpMatrixMultiplyKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
index 6cced66b47..e469629cdb 100644
--- a/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.h
@@ -40,7 +40,7 @@ namespace kernels
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
*/
-class CpuGemmLowpMatrixAReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixAReductionKernel>
+class CpuGemmLowpMatrixAReductionKernel : public ICpuKernel<CpuGemmLowpMatrixAReductionKernel>
{
public:
/** Default constructor */
@@ -98,7 +98,7 @@ private:
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
*/
-class CpuGemmLowpMatrixBReductionKernel : public NewICpuKernel<CpuGemmLowpMatrixBReductionKernel>
+class CpuGemmLowpMatrixBReductionKernel : public ICpuKernel<CpuGemmLowpMatrixBReductionKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
index 1d70c0619e..3514ca811d 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
@@ -46,7 +46,7 @@ namespace kernels
* (a_offset * b_offset * k)
*
*/
-class CpuGemmLowpOffsetContributionKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionKernel>
+class CpuGemmLowpOffsetContributionKernel : public ICpuKernel<CpuGemmLowpOffsetContributionKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
index 13c64f4631..ad8b05e49a 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
@@ -63,7 +63,7 @@ namespace kernels
* (a_offset * b_offset * k)
*/
-class CpuGemmLowpOffsetContributionOutputStageKernel : public NewICpuKernel<CpuGemmLowpOffsetContributionOutputStageKernel>
+class CpuGemmLowpOffsetContributionOutputStageKernel : public ICpuKernel<CpuGemmLowpOffsetContributionOutputStageKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
index f6e8c816f3..c7813edcd7 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
@@ -51,7 +51,7 @@ namespace kernels
* -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
*
*/
-class CpuGemmLowpQuantizeDownInt32ScaleKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ScaleKernel>
+class CpuGemmLowpQuantizeDownInt32ScaleKernel : public ICpuKernel<CpuGemmLowpQuantizeDownInt32ScaleKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ScaleKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index a9e2560657..681d099695 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -48,7 +48,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>
+class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index bfac8681a5..3e615b935e 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -49,7 +49,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>
+class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICpuKernel<CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index 5e5683cfc3..b773fdfdcf 100644
--- a/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -49,7 +49,7 @@ namespace kernels
* -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
*
*/
-class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public NewICpuKernel<CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>
+class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICpuKernel<CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>
{
public:
CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
index 64338259e9..4a748218d1 100644
--- a/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixAdditionKernel.h
@@ -41,7 +41,7 @@ namespace kernels
* - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref CpuGemmMatrixMultiplyKernel
* - MTX_1 = C
*/
-class CpuGemmMatrixAdditionKernel : public NewICpuKernel<CpuGemmMatrixAdditionKernel>
+class CpuGemmMatrixAdditionKernel : public ICpuKernel<CpuGemmMatrixAdditionKernel>
{
public:
CpuGemmMatrixAdditionKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
index 757b46e9a7..9c3dc8b1a0 100644
--- a/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
+++ b/src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
@@ -39,7 +39,7 @@ namespace kernels
* @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p lhs is a vector and the second input tensor @p rhs a matrix. The implementation also assumes that both tensors have not been reshaped
*
*/
-class CpuGemmMatrixMultiplyKernel : public NewICpuKernel<CpuGemmMatrixMultiplyKernel>
+class CpuGemmMatrixMultiplyKernel : public ICpuKernel<CpuGemmMatrixMultiplyKernel>
{
public:
CpuGemmMatrixMultiplyKernel() = default;
diff --git a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
index 2acda35947..0ca92641b7 100644
--- a/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
+++ b/src/cpu/kernels/CpuGemmTranspose1xWKernel.h
@@ -68,7 +68,7 @@ namespace kernels
* @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
*
*/
-class CpuGemmTranspose1xWKernel : public NewICpuKernel<CpuGemmTranspose1xWKernel>
+class CpuGemmTranspose1xWKernel : public ICpuKernel<CpuGemmTranspose1xWKernel>
{
public:
CpuGemmTranspose1xWKernel() = default;
diff --git a/src/cpu/kernels/CpuIm2ColKernel.h b/src/cpu/kernels/CpuIm2ColKernel.h
index d789adef95..8160310da6 100644
--- a/src/cpu/kernels/CpuIm2ColKernel.h
+++ b/src/cpu/kernels/CpuIm2ColKernel.h
@@ -58,7 +58,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuIm2ColKernel : public NewICpuKernel<CpuIm2ColKernel>
+class CpuIm2ColKernel : public ICpuKernel<CpuIm2ColKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h
index 3ab198510f..85fcf88a96 100644
--- a/src/cpu/kernels/CpuMulKernel.h
+++ b/src/cpu/kernels/CpuMulKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform multiplication between two tensors */
-class CpuMulKernel : public NewICpuKernel<CpuMulKernel>
+class CpuMulKernel : public ICpuKernel<CpuMulKernel>
{
public:
CpuMulKernel() = default;
@@ -118,7 +118,7 @@ private:
};
/** Interface for the complex pixelwise multiplication kernel. */
-class CpuComplexMulKernel : public NewICpuKernel<CpuComplexMulKernel>
+class CpuComplexMulKernel : public ICpuKernel<CpuComplexMulKernel>
{
public:
CpuComplexMulKernel() = default;
diff --git a/src/cpu/kernels/CpuPermuteKernel.h b/src/cpu/kernels/CpuPermuteKernel.h
index aae28582b1..9e1b93318e 100644
--- a/src/cpu/kernels/CpuPermuteKernel.h
+++ b/src/cpu/kernels/CpuPermuteKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel to perform tensor permutation given a permutation vector */
-class CpuPermuteKernel : public NewICpuKernel<CpuPermuteKernel>
+class CpuPermuteKernel : public ICpuKernel<CpuPermuteKernel>
{
public:
CpuPermuteKernel() = default;
diff --git a/src/cpu/kernels/CpuPool2dKernel.cpp b/src/cpu/kernels/CpuPool2dKernel.cpp
index 953a9ffb67..d0ca2d285d 100644
--- a/src/cpu/kernels/CpuPool2dKernel.cpp
+++ b/src/cpu/kernels/CpuPool2dKernel.cpp
@@ -315,7 +315,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
{
// Configure kernel window
Window win = calculate_max_window(*dst, Steps());
- NewICpuKernel::configure(win);
+ ICpuKernel::configure(win);
}
else
{
@@ -323,7 +323,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Poolin
auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration,
pool_size.x(), pool_size.y());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- NewICpuKernel::configure(win_config.second);
+ ICpuKernel::configure(win_config.second);
}
}
@@ -356,7 +356,7 @@ void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const T
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(NewICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
diff --git a/src/cpu/kernels/CpuPool2dKernel.h b/src/cpu/kernels/CpuPool2dKernel.h
index 7fd3247d6d..c952ea839d 100644
--- a/src/cpu/kernels/CpuPool2dKernel.h
+++ b/src/cpu/kernels/CpuPool2dKernel.h
@@ -35,7 +35,7 @@ namespace cpu
namespace kernels
{
/** Interface for the pooling layer kernel */
-class CpuPool2dKernel : public NewICpuKernel<CpuPool2dKernel>
+class CpuPool2dKernel : public ICpuKernel<CpuPool2dKernel>
{
private:
using PoolingKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &, const Window &, const Window &)>::type;
diff --git a/src/cpu/kernels/CpuQuantizeKernel.h b/src/cpu/kernels/CpuQuantizeKernel.h
index 709e1c89c7..28690bea54 100644
--- a/src/cpu/kernels/CpuQuantizeKernel.h
+++ b/src/cpu/kernels/CpuQuantizeKernel.h
@@ -37,7 +37,7 @@ namespace kernels
*
* @note The implementation supports only 3D input tensors
*/
-class CpuQuantizeKernel : public NewICpuKernel<CpuQuantizeKernel>
+class CpuQuantizeKernel : public ICpuKernel<CpuQuantizeKernel>
{
public:
CpuQuantizeKernel() = default;
diff --git a/src/cpu/kernels/CpuReshapeKernel.h b/src/cpu/kernels/CpuReshapeKernel.h
index 6a5c528ecd..17302c6731 100644
--- a/src/cpu/kernels/CpuReshapeKernel.h
+++ b/src/cpu/kernels/CpuReshapeKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform tensor reshaping */
-class CpuReshapeKernel : public NewICpuKernel<CpuReshapeKernel>
+class CpuReshapeKernel : public ICpuKernel<CpuReshapeKernel>
{
public:
CpuReshapeKernel() = default;
diff --git a/src/cpu/kernels/CpuScaleKernel.h b/src/cpu/kernels/CpuScaleKernel.h
index 94bbdb72a0..e0e9e387bd 100644
--- a/src/cpu/kernels/CpuScaleKernel.h
+++ b/src/cpu/kernels/CpuScaleKernel.h
@@ -35,7 +35,7 @@ namespace cpu
namespace kernels
{
/** Arm(R) Neon(TM) kernel to perform scaling on a tensor */
-class CpuScaleKernel : public NewICpuKernel<CpuScaleKernel>
+class CpuScaleKernel : public ICpuKernel<CpuScaleKernel>
{
private:
/** Scale function to use for the particular function to use */
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.cpp b/src/cpu/kernels/CpuSoftmaxKernel.cpp
index 054adfa23c..6766b10120 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.cpp
+++ b/src/cpu/kernels/CpuSoftmaxKernel.cpp
@@ -22,7 +22,6 @@
* SOFTWARE.
*/
#include "src/cpu/kernels/CpuSoftmaxKernel.h"
-
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -30,12 +29,10 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/common/Registrars.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
-#include "src/core/common/Registrars.h"
#include "src/cpu/kernels/softmax/list.h"
-
namespace arm_compute
{
namespace cpu
@@ -44,164 +41,60 @@ namespace kernels
{
namespace
{
-struct SoftmaxSelectorData
-{
- DataType dt;
- const CPUInfo &ci;
-};
-using SoftmaxSelectorPtr = std::add_pointer<bool(const SoftmaxSelectorData &data)>::type;
-using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type;
-using SoftmaxLogits1DKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, void *const, ITensor *, float, bool, const Window &)>::type;
-
-struct SoftmaxLogits1DKernel
-{
- const char *name;
- const SoftmaxSelectorPtr is_selected;
- SoftmaxLogits1DKernelPtr ukernel;
-};
-
-struct SoftmaxLogits1DMaxKernel
-{
- const char *name;
- const SoftmaxSelectorPtr is_selected;
- SoftmaxLogits1DMaxKernelPtr ukernel;
-};
-
-static const SoftmaxLogits1DKernel available_logits_1d_kernels[] =
-{
-#if defined(ARM_COMPUTE_ENABLE_SVE)
- {
- "sve_fp32_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); },
- REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_softmax)
- },
- {
- "sve_fp16_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); },
- REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_softmax)
- },
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-
-#if defined(ARM_COMPUTE_ENABLE_NEON)
- {
- "neon_fp32_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
- REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_softmax)
- },
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
- {
- "neon_fp16_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); },
- REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_softmax)
- },
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
-
-#if defined(ARM_COMPUTE_ENABLE_SVE2)
- {
- "sve2_qu8_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); },
- REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_softmax)
- },
- {
- "sve2_qs8_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); },
- REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_softmax)
- },
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
-#if defined(ARM_COMPUTE_ENABLE_NEON)
- {
- "neon_qu8_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); },
- REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_softmax)
- },
- {
- "neon_qs8_softmax_logits_1d",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
- REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_softmax)
- },
-#endif //defined(ARM_COMPUTE_ENABLE_NEON)
-};
-
-static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] =
+/* Softmax Logits 1D Max - identifying the max value of 1D Logits */
+static const std::vector<CpuLogits1DMaxKernel::SoftmaxLogits1DMaxKernel> available_kernels_max_logits =
{
#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_fp32_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32) && data.isa.sve; },
REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_logits)
},
{
"sve_fp16_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16) && data.isa.sve; },
REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_logits)
},
{
"sve_qu8_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8) && data.isa.sve; },
REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_qasymm8_logits)
},
{
"sve_qs8_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve(); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve; },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_qasymm8_signed_logits)
},
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_fp32_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_logits)
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"neon_fp16_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16); },
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_logits)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_qu8_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_logits)
},
{
"neon_qs8_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_singed_logits)
},
#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
};
-
-const SoftmaxLogits1DKernel *get_implementation_logits(const SoftmaxSelectorData &data)
-{
- for(const auto &uk : available_logits_1d_kernels)
- {
- if(uk.is_selected({ data.dt, CPUInfo::get() }))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
-const SoftmaxLogits1DMaxKernel *get_implementation_logits_max(const SoftmaxSelectorData &data)
-{
- for(const auto &uk : available_logits_1d_max_kernels)
- {
- if(uk.is_selected({ data.dt, CPUInfo::get() }))
- {
- return &uk;
- }
- }
- return nullptr;
-}
-
Status validate_arguments_logits_1d_max(const ITensorInfo &input, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
-
// Validate in case of configured output
if(output.total_size() != 0)
{
@@ -209,58 +102,104 @@ Status validate_arguments_logits_1d_max(const ITensorInfo &input, const ITensorI
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input, &output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output.tensor_shape(), TensorShape(input.tensor_shape()).set(0, 1));
}
-
return Status{};
}
-
-} // namespace
-
+} //namespace
+const std::vector<CpuLogits1DMaxKernel::SoftmaxLogits1DMaxKernel> &CpuLogits1DMaxKernel::get_available_kernels()
+{
+ return available_kernels_max_logits;
+}
void CpuLogits1DMaxKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(*src, *dst));
-
// Softmax across the x dimension
const TensorShape output_shape = TensorShape(src->tensor_shape()).set(0, 1);
// Output auto initialization if not yet initialized
auto_init_if_empty(*dst, output_shape, 1, src->data_type(), src->quantization_info());
-
- const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto *uk = get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
-
_run_method = uk->ukernel;
_name = std::string("CpuLogits1DMaxKernel").append("/").append(uk->name);
-
- Window win = calculate_max_window(*src, Steps());
+ Window win = calculate_max_window(*src, Steps());
ICpuKernel::configure(win);
}
-
Status CpuLogits1DMaxKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(*src, *dst));
-
return Status{};
}
-
void CpuLogits1DMaxKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
-
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
-
_run_method(src, dst, window);
}
-
const char *CpuLogits1DMaxKernel::name() const
{
return _name.c_str();
}
+/* Softmax Logits 1D - computation for QASYMM8 with pre-computed max. */
+template <bool IS_LOG>
+static const std::vector<typename CpuLogits1DSoftmaxKernel<IS_LOG>::SoftmaxLogits1DKernel> available_kernels_logits =
+{
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "sve_fp32_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32) && data.isa.sve; },
+ REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_softmax)
+ },
+ {
+ "sve_fp16_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16) && data.isa.sve; },
+ REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_softmax)
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
+ {
+ "neon_fp32_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
+ REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_softmax)
+ },
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+ {
+ "neon_fp16_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16); },
+ REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_softmax)
+ },
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+ {
+ "sve2_qu8_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8) && data.isa.sve2; },
+ REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_softmax)
+ },
+ {
+ "sve2_qs8_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2; },
+ REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_softmax)
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
+ {
+ "neon_qu8_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_softmax)
+ },
+ {
+ "neon_qs8_softmax_logits_1d",
+ [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_softmax)
+ },
+#endif //defined(ARM_COMPUTE_ENABLE_NEON)
+};
namespace
{
Status validate_arguments_logits_softmax(const ITensorInfo &src, const ITensorInfo &max,
@@ -270,14 +209,11 @@ Status validate_arguments_logits_softmax(const ITensorInfo &src, const ITensorIn
// Check input
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
-
const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(src.data_type());
-
// Check max
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &max);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(TensorShape(src.tensor_shape()).set(0, 1), max.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&src, &max);
-
// Check output if configured
if(dst.total_size() != 0)
{
@@ -286,7 +222,6 @@ Status validate_arguments_logits_softmax(const ITensorInfo &src, const ITensorIn
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &dst);
ARM_COMPUTE_RETURN_ERROR_ON(dst.quantization_info() != output_quantization);
}
-
// Check tmp if configured
if(tmp.total_size() != 0)
{
@@ -296,84 +231,69 @@ Status validate_arguments_logits_softmax(const ITensorInfo &src, const ITensorIn
// on the maximum number of threads that will run in parallel.
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &tmp);
}
-
return Status{};
}
} // namespace
-
+template <bool IS_LOG>
+const std::vector<typename CpuLogits1DSoftmaxKernel<IS_LOG>::SoftmaxLogits1DKernel> &CpuLogits1DSoftmaxKernel<IS_LOG>::get_available_kernels()
+{
+ return available_kernels_logits<IS_LOG>;
+}
template <bool IS_LOG>
void CpuLogits1DSoftmaxKernel<IS_LOG>::configure(const ITensorInfo *src, const ITensorInfo *max, ITensorInfo *dst, const float beta, ITensorInfo *tmp)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, max, dst, tmp);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_softmax(*src, *max, *dst, beta, *tmp, IS_LOG));
-
// Configure kernel window
const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());
-
// Output auto initialization if not yet initialized
const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(src->data_type(), IS_LOG) : dst->quantization_info();
auto_init_if_empty(*dst, TensorInfo(*src).set_quantization_info(output_quantization).reset_padding());
-
// Tmp auto initialization if not yet initialized
const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : src->data_type();
auto_init_if_empty(*tmp, TensorInfo(*src).set_data_type(tmp_data_type).reset_padding());
-
- const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->data_type(), CPUInfo::get() });
+ const auto *uk = CpuLogits1DSoftmaxKernel<IS_LOG>::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
-
std::string kernel_name = IS_LOG ? std::string("CpuLogits1DLogSoftmaxKernel") : std::string("CpuLogits1DSoftmaxKernel");
-
- _beta = beta;
- _run_method = uk->ukernel;
- _name = kernel_name.append("/").append(uk->name);
-
+ _beta = beta;
+ _run_method = uk->ukernel;
+ _name = kernel_name.append("/").append(uk->name);
// Configure kernel window
Window win = calculate_max_window(*max, Steps());
-
- ICpuKernel::configure(win);
+ ICPPKernel::configure(win);
}
-
template <bool IS_LOG>
Status CpuLogits1DSoftmaxKernel<IS_LOG>::validate(const ITensorInfo *src, const ITensorInfo *max,
const ITensorInfo *dst, const float beta, const ITensorInfo *tmp)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, max, dst, tmp);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_softmax(*src, *max, *dst, beta, *tmp, IS_LOG));
-
return Status{};
}
-
template <bool IS_LOG>
void CpuLogits1DSoftmaxKernel<IS_LOG>::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
-
- const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
- auto max = tensors.get_tensor(TensorType::ACL_SRC_1);
- auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
- auto tmp = tensors.get_tensor(TensorType::ACL_DST_1);
-
+ const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ auto max = tensors.get_tensor(TensorType::ACL_SRC_1);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
+ auto tmp = tensors.get_tensor(TensorType::ACL_DST_1);
const unsigned int num_elems_processed_per_iteration = src->info()->valid_region().shape.x();
const unsigned int tmp_size_for_thread = tmp->info()->element_size() * num_elems_processed_per_iteration;
-
ARM_COMPUTE_ERROR_ON(tmp->info()->total_size() < (info.num_threads * tmp_size_for_thread));
-
void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread);
_run_method(src, max, tmp_for_thread, dst, _beta, IS_LOG, window);
}
-
template <bool IS_LOG>
const char *CpuLogits1DSoftmaxKernel<IS_LOG>::name() const
{
return _name.c_str();
}
-
template class CpuLogits1DSoftmaxKernel<true>;
template class CpuLogits1DSoftmaxKernel<false>;
-
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.h b/src/cpu/kernels/CpuSoftmaxKernel.h
index f317662620..df7d3f7d9b 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.h
+++ b/src/cpu/kernels/CpuSoftmaxKernel.h
@@ -23,10 +23,8 @@
*/
#ifndef ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
#define ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
-
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
-
namespace arm_compute
{
namespace cpu
@@ -34,8 +32,11 @@ namespace cpu
namespace kernels
{
/** Interface for the identifying the max value of 1D Logits */
-class CpuLogits1DMaxKernel : public NewICpuKernel<CpuLogits1DMaxKernel>
+class CpuLogits1DMaxKernel : public ICpuKernel<CpuLogits1DMaxKernel>
{
+private:
+ using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type;
+
public:
CpuLogits1DMaxKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DMaxKernel);
@@ -52,27 +53,31 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-
-private:
- using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type;
+ struct SoftmaxLogits1DMaxKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ SoftmaxLogits1DMaxKernelPtr ukernel;
+ };
+ static const std::vector<SoftmaxLogits1DMaxKernel> &get_available_kernels();
private:
SoftmaxLogits1DMaxKernelPtr _run_method{ nullptr };
std::string _name{};
};
-
/** Interface for softmax computation for QASYMM8 with pre-computed max. */
template <bool IS_LOG = false>
-class CpuLogits1DSoftmaxKernel : public NewICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>>
+class CpuLogits1DSoftmaxKernel : public ICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>>
{
+private:
+ using SoftmaxLogits1DKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, void *const, ITensor *, float, bool, const Window &)>::type;
+
public:
CpuLogits1DSoftmaxKernel() = default;
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DSoftmaxKernel);
-
/** Set the input and output tensors.
*
* @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -92,13 +97,16 @@ public:
*/
static Status validate(const ITensorInfo *src, const ITensorInfo *max,
const ITensorInfo *dst, const float beta, const ITensorInfo *tmp);
-
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
-
-private:
- using SoftmaxLogits1DKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, void *const, ITensor *, float, bool, const Window &)>::type;
+ struct SoftmaxLogits1DKernel
+ {
+ const char *name;
+ const DataTypeISASelectorPtr is_selected;
+ SoftmaxLogits1DKernelPtr ukernel;
+ };
+ static const std::vector<SoftmaxLogits1DKernel> &get_available_kernels();
private:
float _beta{ 1.0f };
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 70f34b1b57..323a3f1316 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Interface for the kernel to perform subtraction between two tensors */
-class CpuSubKernel : public NewICpuKernel<CpuSubKernel>
+class CpuSubKernel : public ICpuKernel<CpuSubKernel>
{
private:
using SubKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
diff --git a/src/cpu/kernels/CpuTransposeKernel.h b/src/cpu/kernels/CpuTransposeKernel.h
index 7e1ee5f73d..cb85daeb40 100644
--- a/src/cpu/kernels/CpuTransposeKernel.h
+++ b/src/cpu/kernels/CpuTransposeKernel.h
@@ -34,7 +34,7 @@ namespace cpu
namespace kernels
{
/** Kernel which transposes the elements of a matrix */
-class CpuTransposeKernel : public NewICpuKernel<CpuTransposeKernel>
+class CpuTransposeKernel : public ICpuKernel<CpuTransposeKernel>
{
public:
CpuTransposeKernel() = default;
diff --git a/src/cpu/kernels/CpuWeightsReshapeKernel.h b/src/cpu/kernels/CpuWeightsReshapeKernel.h
index 6c2d7ef6f9..1a260edc96 100644
--- a/src/cpu/kernels/CpuWeightsReshapeKernel.h
+++ b/src/cpu/kernels/CpuWeightsReshapeKernel.h
@@ -56,7 +56,7 @@ namespace kernels
* \end{array} \right)
* @f]
*/
-class CpuWeightsReshapeKernel : public NewICpuKernel<CpuWeightsReshapeKernel>
+class CpuWeightsReshapeKernel : public ICpuKernel<CpuWeightsReshapeKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/CpuWinogradConv2dKernel.h b/src/cpu/kernels/CpuWinogradConv2dKernel.h
index 0c4e28c394..6909216d94 100644
--- a/src/cpu/kernels/CpuWinogradConv2dKernel.h
+++ b/src/cpu/kernels/CpuWinogradConv2dKernel.h
@@ -35,7 +35,7 @@ namespace arm_compute
namespace cpu
{
/** Interface for the kernel to perform Winograd input transform. */
-class ICpuWinogradConv2dTransformInputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformInputKernel>
+class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel<ICpuWinogradConv2dTransformInputKernel>
{
public:
/** Get the working space required to perform the transformation.
@@ -216,7 +216,7 @@ private:
};
/** Interface for the kernel to perform Winograd output transform. */
-class ICpuWinogradConv2dTransformOutputKernel : public NewICpuKernel<ICpuWinogradConv2dTransformOutputKernel>
+class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel<ICpuWinogradConv2dTransformOutputKernel>
{
public:
/** Get the working space required to perform the transformation.
@@ -418,7 +418,7 @@ private:
};
/** Interface for the kernel to perform Winograd weights transform. */
-class ICpuWinogradConv2dTransformWeightsKernel : public NewICpuKernel<ICpuWinogradConv2dTransformWeightsKernel>
+class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel<ICpuWinogradConv2dTransformWeightsKernel>
{
public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
index ea51d5d54d..a32a7a3ec8 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
@@ -45,7 +45,7 @@ namespace cpu
namespace kernels
{
/** This class is a wrapper for the depthwise convolution assembly kernels. */
-class CpuDepthwiseConv2dAssemblyWrapperKernel final : public NewICpuKernel<CpuDepthwiseConv2dAssemblyWrapperKernel>
+class CpuDepthwiseConv2dAssemblyWrapperKernel final : public ICpuKernel<CpuDepthwiseConv2dAssemblyWrapperKernel>
{
public:
/** Default constructor */
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
index daa3168beb..8713d5c54d 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
@@ -46,7 +46,7 @@ namespace kernels
* execute a single assembly kernel in the context of an NEFunction.
*
*/
-class CpuPool2dAssemblyWrapperKernel final : public NewICpuKernel<CpuPool2dAssemblyWrapperKernel>
+class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel<CpuPool2dAssemblyWrapperKernel>
{
public:
/** Constructor
diff --git a/src/cpu/operators/CpuConcatenate.h b/src/cpu/operators/CpuConcatenate.h
index 001ac68162..eb11926b48 100644
--- a/src/cpu/operators/CpuConcatenate.h
+++ b/src/cpu/operators/CpuConcatenate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,7 +67,7 @@ public:
void run(ITensorPack &tensors) override;
private:
- std::vector<std::unique_ptr<ICpuKernel>> _concat_kernels{};
+ std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels{};
unsigned int _num_srcs{ 0 };
unsigned int _axis{ 0 };
};
diff --git a/src/cpu/operators/CpuSoftmax.h b/src/cpu/operators/CpuSoftmax.h
index 20f3f006d3..64df8704f9 100644
--- a/src/cpu/operators/CpuSoftmax.h
+++ b/src/cpu/operators/CpuSoftmax.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -92,8 +92,8 @@ private:
CpuPermute _permute_input;
CpuPermute _permute_output;
- std::unique_ptr<ICpuKernel> _max_kernel;
- std::unique_ptr<ICpuKernel> _softmax_kernel;
+ std::unique_ptr<ICPPKernel> _max_kernel;
+ std::unique_ptr<ICPPKernel> _softmax_kernel;
TensorInfo _max;
TensorInfo _tmp;