From 4a95bba6ca61ce99995ece6fd237b5498c9f322c Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 28 Jun 2021 11:00:27 +0100 Subject: Set up the framework to choose the default LWS Resolve COMPMID-4486 Signed-off-by: Giorgio Arena Change-Id: Ib38b7943bd776a6d75d1da163908724c49eae73d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5864 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- src/core/gpu/cl/kernels/ClActivationKernel.cpp | 1 + src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp | 1 + src/core/gpu/cl/kernels/ClCastKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClCastKernel.h | 2 +- .../cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp | 5 +++++ .../gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h | 2 +- src/core/gpu/cl/kernels/ClCopyKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClCopyKernel.h | 2 +- src/core/gpu/cl/kernels/ClCropKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClCropKernel.h | 2 +- src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp | 1 + src/core/gpu/cl/kernels/ClDequantizeKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClDequantizeKernel.h | 2 +- src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClDirectConv2dKernel.h | 2 +- src/core/gpu/cl/kernels/ClElementwiseKernel.cpp | 7 ++++++- src/core/gpu/cl/kernels/ClElementwiseKernel.h | 14 ++++++++++---- src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h | 2 +- src/core/gpu/cl/kernels/ClFillKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClFillKernel.h | 2 +- src/core/gpu/cl/kernels/ClFloorKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClFloorKernel.h | 2 +- src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h | 2 +- .../gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h | 2 +- .../gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp | 5 +++++ .../gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h | 2 +- .../kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp | 5 +++++ .../cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h | 2 +- src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h | 2 +- src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h | 2 +- src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp | 1 + src/core/gpu/cl/kernels/ClMulKernel.cpp | 12 +++++++++++- src/core/gpu/cl/kernels/ClMulKernel.h | 4 ++-- src/core/gpu/cl/kernels/ClPermuteKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClPermuteKernel.h | 2 +- src/core/gpu/cl/kernels/ClPool2dKernel.cpp | 1 + src/core/gpu/cl/kernels/ClQuantizeKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClQuantizeKernel.h | 2 +- src/core/gpu/cl/kernels/ClReshapeKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClReshapeKernel.h | 2 +- src/core/gpu/cl/kernels/ClScaleKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClScaleKernel.h | 2 +- src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp | 10 ++++++++++ src/core/gpu/cl/kernels/ClSoftmaxKernel.h | 4 ++-- src/core/gpu/cl/kernels/ClTransposeKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClTransposeKernel.h | 2 +- .../gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h | 2 +- .../gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp | 1 + src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp | 1 + .../gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h | 2 +- src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp | 1 + .../gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp | 5 +++++ src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h | 2 +- 60 files changed, 187 insertions(+), 33 deletions(-) (limited to 'src/core/gpu/cl/kernels') diff --git a/src/core/gpu/cl/kernels/ClActivationKernel.cpp b/src/core/gpu/cl/kernels/ClActivationKernel.cpp index 17a8c6498d..e892d6a778 100644 --- a/src/core/gpu/cl/kernels/ClActivationKernel.cpp +++ b/src/core/gpu/cl/kernels/ClActivationKernel.cpp @@ -89,6 +89,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const ClActivationKernel::ClActivationKernel() : _run_in_place(false) { + _type = CLKernelType::ELEMENTWISE; } void ClActivationKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo act_info) diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp index 26f5113822..dbc628d6d5 100644 --- a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp @@ -62,6 +62,7 @@ Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, con ClBatchConcatenateKernel::ClBatchConcatenateKernel() : _batch_offset(0) { + _type = CLKernelType::ELEMENTWISE; } void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst) diff --git a/src/core/gpu/cl/kernels/ClCastKernel.cpp b/src/core/gpu/cl/kernels/ClCastKernel.cpp index 7a1d5c2824..fac9ebe5cf 100644 --- a/src/core/gpu/cl/kernels/ClCastKernel.cpp +++ b/src/core/gpu/cl/kernels/ClCastKernel.cpp @@ -72,6 +72,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Conver } } // namespace +ClCastKernel::ClCastKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClCastKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClCastKernel.h b/src/core/gpu/cl/kernels/ClCastKernel.h index 451aa9c1ab..6bf3cd9e50 100644 --- a/src/core/gpu/cl/kernels/ClCastKernel.h +++ b/src/core/gpu/cl/kernels/ClCastKernel.h @@ -41,7 +41,7 @@ namespace kernels class ClCastKernel : public IClKernel { public: - ClCastKernel() = default; + ClCastKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCastKernel); /** Set the src and dst of the kernel. * diff --git a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp index 49f2f68a76..d1abd274d6 100644 --- a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp @@ -40,6 +40,11 @@ namespace opencl { namespace kernels { +ClConvertFullyConnectedWeightsKernel::ClConvertFullyConnectedWeightsKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClConvertFullyConnectedWeightsKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout) { diff --git a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h index 11ab4d2a0d..6f4f09dc32 100644 --- a/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h +++ b/src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h @@ -47,7 +47,7 @@ namespace kernels class ClConvertFullyConnectedWeightsKernel : public IClKernel { public: - ClConvertFullyConnectedWeightsKernel() = default; + ClConvertFullyConnectedWeightsKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClConvertFullyConnectedWeightsKernel); /** Set the src and dst tensor. * diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.cpp b/src/core/gpu/cl/kernels/ClCopyKernel.cpp index d6c87f8fad..98c6f34e60 100644 --- a/src/core/gpu/cl/kernels/ClCopyKernel.cpp +++ b/src/core/gpu/cl/kernels/ClCopyKernel.cpp @@ -68,6 +68,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Window } // namespace +ClCopyKernel::ClCopyKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClCopyKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.h b/src/core/gpu/cl/kernels/ClCopyKernel.h index b1b9672bcb..f3eb0aab62 100644 --- a/src/core/gpu/cl/kernels/ClCopyKernel.h +++ b/src/core/gpu/cl/kernels/ClCopyKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClCopyKernel : public IClKernel { public: - ClCopyKernel() = default; + ClCopyKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCopyKernel); /** Initialize the kernel's src, dst. * diff --git a/src/core/gpu/cl/kernels/ClCropKernel.cpp b/src/core/gpu/cl/kernels/ClCropKernel.cpp index 1d322eefa1..ef2e48b45d 100644 --- a/src/core/gpu/cl/kernels/ClCropKernel.cpp +++ b/src/core/gpu/cl/kernels/ClCropKernel.cpp @@ -46,6 +46,11 @@ void ClCropKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Coordinat configure(CLKernelLibrary::get().get_compile_context(), src, dst, start, end, batch_index, extrapolation_value, dst_window); } +ClCropKernel::ClCropKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClCropKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window) { diff --git a/src/core/gpu/cl/kernels/ClCropKernel.h b/src/core/gpu/cl/kernels/ClCropKernel.h index ec0f8e58da..7120dbbc81 100644 --- a/src/core/gpu/cl/kernels/ClCropKernel.h +++ b/src/core/gpu/cl/kernels/ClCropKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClCropKernel : public IClKernel { public: - ClCropKernel() = default; + ClCropKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCropKernel); /** Configure kernel * diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp index 4039570da4..e3e384f748 100644 --- a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp @@ -61,6 +61,7 @@ Status validate_arguments(const ITensorInfo *src, unsigned int depth_offset, con ClDepthConcatenateKernel::ClDepthConcatenateKernel() : _depth_offset(0) { + _type = CLKernelType::ELEMENTWISE; } void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst) diff --git a/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp index f2758b759f..d69da8716c 100644 --- a/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDequantizeKernel.cpp @@ -61,6 +61,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) } } // namespace +ClDequantizeKernel::ClDequantizeKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClDequantizeKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClDequantizeKernel.h b/src/core/gpu/cl/kernels/ClDequantizeKernel.h index 33e0164cc9..2460674067 100644 --- a/src/core/gpu/cl/kernels/ClDequantizeKernel.h +++ b/src/core/gpu/cl/kernels/ClDequantizeKernel.h @@ -39,7 +39,7 @@ class ClDequantizeKernel : public IClKernel { public: /** Default constructor */ - ClDequantizeKernel() = default; + ClDequantizeKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDequantizeKernel); /** Initialise the kernel's input and output * diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp index 94c4044bff..7b98671da2 100644 --- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -377,6 +377,11 @@ BorderSize ClDirectConv2dKernel::border_size() const return _border_size; } +ClDirectConv2dKernel::ClDirectConv2dKernel() +{ + _type = CLKernelType::DIRECT; +} + void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h index e76666fd36..b592a2191b 100644 --- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClDirectConv2dKernel : public IClKernel { public: - ClDirectConv2dKernel() = default; + ClDirectConv2dKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDirectConv2dKernel); /** Set the src, weights, biases and dst tensors info. * diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp index 335ee9c392..7bfdb9efdd 100644 --- a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp +++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp @@ -98,7 +98,7 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &src return Status{}; } -Status validate_arguments_divide_operation(const ITensorInfo* src1, const ITensorInfo* src2, const ITensorInfo* dst) +Status validate_arguments_divide_operation(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); @@ -271,6 +271,11 @@ std::pair validate_and_configure_window_for_division(ITensorInfo } } // namespace +ClElementwiseKernel::ClElementwiseKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClElementwiseKernel::configure_common(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) { configure_common(CLKernelLibrary::get().get_compile_context(), src1, src2, dst); diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.h b/src/core/gpu/cl/kernels/ClElementwiseKernel.h index 4ed8ae73ab..7f55151a87 100644 --- a/src/core/gpu/cl/kernels/ClElementwiseKernel.h +++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.h @@ -45,7 +45,7 @@ class ClElementwiseKernel : public IClKernel { public: /** Default constructor */ - ClElementwiseKernel() = default; + ClElementwiseKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClElementwiseKernel); // Inherited methods overridden: @@ -89,9 +89,15 @@ protected: ActivationLayerInfo _act_info{}; private: - const ITensorInfo *_src1{ nullptr }; /**< Source tensor info 1 */ - const ITensorInfo *_src2{ nullptr }; /**< Source tensor info 2 */ - ITensorInfo *_dst{ nullptr }; /**< Destination tensor info */ + const ITensorInfo *_src1 + { + nullptr + }; /**< Source tensor info 1 */ + const ITensorInfo *_src2 + { + nullptr + }; /**< Source tensor info 2 */ + ITensorInfo *_dst{ nullptr }; /**< Destination tensor info */ }; class ClLogicalBinaryKernel : public ClElementwiseKernel diff --git a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp index 5cbb3f2e38..1525c0fe54 100644 --- a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp +++ b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp @@ -66,6 +66,11 @@ Status validate_arguments(const ITensorInfo &src, const ITensorInfo &dst, const } } // namespace +ClElementWiseUnaryKernel::ClElementWiseUnaryKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClElementWiseUnaryKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const ElementWiseUnary &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h index 7e5edef3ee..225869b58b 100644 --- a/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h +++ b/src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClElementWiseUnaryKernel : public IClKernel { public: - ClElementWiseUnaryKernel() = default; + ClElementWiseUnaryKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClElementWiseUnaryKernel); /** Initialise the kernel's srcs, dst. * diff --git a/src/core/gpu/cl/kernels/ClFillKernel.cpp b/src/core/gpu/cl/kernels/ClFillKernel.cpp index b194ee549b..526a466a00 100644 --- a/src/core/gpu/cl/kernels/ClFillKernel.cpp +++ b/src/core/gpu/cl/kernels/ClFillKernel.cpp @@ -42,6 +42,11 @@ namespace opencl { namespace kernels { +ClFillKernel::ClFillKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClFillKernel::configure(ITensorInfo *tensor, const PixelValue &constant_value, Window *window) diff --git a/src/core/gpu/cl/kernels/ClFillKernel.h b/src/core/gpu/cl/kernels/ClFillKernel.h index b439eac0de..9542c20508 100644 --- a/src/core/gpu/cl/kernels/ClFillKernel.h +++ b/src/core/gpu/cl/kernels/ClFillKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClFillKernel : public IClKernel { public: - ClFillKernel() = default; + ClFillKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFillKernel); /** Initialise the kernel's tensor and filling value * diff --git a/src/core/gpu/cl/kernels/ClFloorKernel.cpp b/src/core/gpu/cl/kernels/ClFloorKernel.cpp index 7296d40eaf..2047128963 100644 --- a/src/core/gpu/cl/kernels/ClFloorKernel.cpp +++ b/src/core/gpu/cl/kernels/ClFloorKernel.cpp @@ -61,6 +61,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) } } // namespace +ClFloorKernel::ClFloorKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClFloorKernel::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClFloorKernel.h b/src/core/gpu/cl/kernels/ClFloorKernel.h index 646dfb30d8..3bc648b7be 100644 --- a/src/core/gpu/cl/kernels/ClFloorKernel.h +++ b/src/core/gpu/cl/kernels/ClFloorKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClFloorKernel : public IClKernel { public: - ClFloorKernel() = default; + ClFloorKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFloorKernel); /** Configure kernel for a given list of arguments * diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp index 817a105b14..6079644935 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp @@ -262,6 +262,11 @@ inline std::pair validate_and_configure_window(ITensorInfo *src0 } } // namespace +ClGemmMatrixMultiplyKernel::ClGemmMatrixMultiplyKernel() +{ + _type = CLKernelType::GEMM; +} + void ClGemmMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info) diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h index c1601335ee..c303f78b07 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h @@ -45,7 +45,7 @@ namespace kernels class ClGemmMatrixMultiplyKernel : public IClKernel { public: - ClGemmMatrixMultiplyKernel() = default; + ClGemmMatrixMultiplyKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyKernel); /** Initialise the kernel's input, output and alpha * diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp index 97d64c433c..5ae55ab04a 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp @@ -194,6 +194,11 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens } } // namespace +ClGemmMatrixMultiplyNativeKernel::ClGemmMatrixMultiplyNativeKernel() +{ + _type = CLKernelType::GEMM; +} + void ClGemmMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h index 4770b18b8e..c3bdc7589e 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h @@ -39,7 +39,7 @@ namespace kernels class ClGemmMatrixMultiplyNativeKernel : public IClKernel { public: - ClGemmMatrixMultiplyNativeKernel() = default; + ClGemmMatrixMultiplyNativeKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyNativeKernel); /** Initialise the kernel's input and dst. * diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp index 27409b66ac..591834f762 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp @@ -184,6 +184,11 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens } } // namespace +ClGemmMatrixMultiplyReshapedKernel::ClGemmMatrixMultiplyReshapedKernel() +{ + _type = CLKernelType::GEMM; +} + void ClGemmMatrixMultiplyReshapedKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h index ab648f15ae..b8ae4b9ae3 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h @@ -45,7 +45,7 @@ namespace kernels class ClGemmMatrixMultiplyReshapedKernel : public IClKernel { public: - ClGemmMatrixMultiplyReshapedKernel() = default; + ClGemmMatrixMultiplyReshapedKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyReshapedKernel); /** Initialise the kernel's input and output. * diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp index 4eea2c6f76..32ee0f9705 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp @@ -181,6 +181,11 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens } } // namespace +ClGemmMatrixMultiplyReshapedOnlyRhsKernel::ClGemmMatrixMultiplyReshapedOnlyRhsKernel() +{ + _type = CLKernelType::GEMM; +} + void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info) diff --git a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h index ff6c391e15..3d6164eca9 100644 --- a/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h @@ -43,7 +43,7 @@ namespace kernels class ClGemmMatrixMultiplyReshapedOnlyRhsKernel : public ICLKernel { public: - ClGemmMatrixMultiplyReshapedOnlyRhsKernel() = default; + ClGemmMatrixMultiplyReshapedOnlyRhsKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmMatrixMultiplyReshapedOnlyRhsKernel); /** Initialise the kernel's input and output. * diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp index 98161edfff..f92945e2a4 100644 --- a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp @@ -111,6 +111,11 @@ std::pair validate_and_configure_window(ITensorInfo *src, ITenso } } // namespace +ClGemmReshapeLhsMatrixKernel::ClGemmReshapeLhsMatrixKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClGemmReshapeLhsMatrixKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h index b830ba02b4..73d811f3c3 100644 --- a/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h @@ -41,7 +41,7 @@ namespace kernels class ClGemmReshapeLhsMatrixKernel : public ICLKernel { public: - ClGemmReshapeLhsMatrixKernel() = default; + ClGemmReshapeLhsMatrixKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmReshapeLhsMatrixKernel); /** Initialise the kernel's input and output. * diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp index e1ef7c61aa..3a6f3c7e8f 100644 --- a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp +++ b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp @@ -107,6 +107,11 @@ std::pair validate_and_configure_window(ITensorInfo *src, ITenso } } // namespace +ClGemmReshapeRhsMatrixKernel::ClGemmReshapeRhsMatrixKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClGemmReshapeRhsMatrixKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMRHSMatrixInfo &rhs_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h index e877d87408..27f80d3428 100644 --- a/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h +++ b/src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h @@ -40,7 +40,7 @@ namespace kernels class ClGemmReshapeRhsMatrixKernel : public ICLKernel { public: - ClGemmReshapeRhsMatrixKernel() = default; + ClGemmReshapeRhsMatrixKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClGemmReshapeRhsMatrixKernel); /** Initialise the kernel's input and output. * diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp index 4436e98fe3..9ff30eedcd 100644 --- a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp @@ -64,6 +64,7 @@ Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, co ClHeightConcatenateKernel::ClHeightConcatenateKernel() : _height_offset(0) { + _type = CLKernelType::ELEMENTWISE; } Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst) diff --git a/src/core/gpu/cl/kernels/ClMulKernel.cpp b/src/core/gpu/cl/kernels/ClMulKernel.cpp index b8081bbacf..65f3bec099 100644 --- a/src/core/gpu/cl/kernels/ClMulKernel.cpp +++ b/src/core/gpu/cl/kernels/ClMulKernel.cpp @@ -92,6 +92,11 @@ Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, cons } } // namespace +ClMulKernel::ClMulKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) { @@ -128,7 +133,7 @@ void ClMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo else { if(src1->element_size() == 4 || src2->element_size() == 4) - { + { // use 64 bit accumulator for 32-bit input acc_type = "long"; } @@ -316,6 +321,11 @@ Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *sr } } // namespace +ClComplexMulKernel::ClComplexMulKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClComplexMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst); diff --git a/src/core/gpu/cl/kernels/ClMulKernel.h b/src/core/gpu/cl/kernels/ClMulKernel.h index 44162f3db3..9c70301d89 100644 --- a/src/core/gpu/cl/kernels/ClMulKernel.h +++ b/src/core/gpu/cl/kernels/ClMulKernel.h @@ -39,7 +39,7 @@ class ClMulKernel : public IClKernel { public: /** Default constructor */ - ClMulKernel() = default; + ClMulKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClMulKernel); /** Initialise the kernel's src and dst. * @@ -88,7 +88,7 @@ class ClComplexMulKernel : public ICLKernel { public: /** Default constructor */ - ClComplexMulKernel() = default; + ClComplexMulKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClComplexMulKernel); /** Initialise the kernel's src and dst. * diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp index ffc13060a8..722bf454f2 100644 --- a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp @@ -77,6 +77,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const } } // namespace +ClPermuteKernel::ClPermuteKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClPermuteKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.h b/src/core/gpu/cl/kernels/ClPermuteKernel.h index b844214595..326110a27c 100644 --- a/src/core/gpu/cl/kernels/ClPermuteKernel.h +++ b/src/core/gpu/cl/kernels/ClPermuteKernel.h @@ -42,7 +42,7 @@ class ClPermuteKernel : public IClKernel { public: /** Default constructor */ - ClPermuteKernel() = default; + ClPermuteKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPermuteKernel); /** Set the src and dst of the kernel. * diff --git a/src/core/gpu/cl/kernels/ClPool2dKernel.cpp b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp index 0e15bffd14..9d5a24fdf2 100644 --- a/src/core/gpu/cl/kernels/ClPool2dKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp @@ -205,6 +205,7 @@ std::tuple validate_and_configure_window(ITenso ClPool2dKernel::ClPool2dKernel() : _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1) { + _type = CLKernelType::POOL; } BorderSize ClPool2dKernel::border_size() const diff --git a/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp index 48d351d536..7900489db7 100644 --- a/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClQuantizeKernel.cpp @@ -61,6 +61,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) } } // namespace +ClQuantizeKernel::ClQuantizeKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClQuantizeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClQuantizeKernel.h b/src/core/gpu/cl/kernels/ClQuantizeKernel.h index 8d37f33032..cd972987f5 100644 --- a/src/core/gpu/cl/kernels/ClQuantizeKernel.h +++ b/src/core/gpu/cl/kernels/ClQuantizeKernel.h @@ -42,7 +42,7 @@ class ClQuantizeKernel : public IClKernel { public: /** Default constructor */ - ClQuantizeKernel() = default; + ClQuantizeKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClQuantizeKernel); /** Set the input, output. * diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp index 923b9cb264..fcda061930 100644 --- a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp @@ -62,6 +62,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) } } // namespace +ClReshapeKernel::ClReshapeKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClReshapeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.h b/src/core/gpu/cl/kernels/ClReshapeKernel.h index 0501b93f40..3cd8369012 100644 --- a/src/core/gpu/cl/kernels/ClReshapeKernel.h +++ b/src/core/gpu/cl/kernels/ClReshapeKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClReshapeKernel : public IClKernel { public: - ClReshapeKernel() = default; + ClReshapeKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClReshapeKernel); /** Set the src and dst of the kernel * diff --git a/src/core/gpu/cl/kernels/ClScaleKernel.cpp b/src/core/gpu/cl/kernels/ClScaleKernel.cpp index 7fb5d2a5d3..57ca331539 100644 --- a/src/core/gpu/cl/kernels/ClScaleKernel.cpp +++ b/src/core/gpu/cl/kernels/ClScaleKernel.cpp @@ -140,6 +140,11 @@ Status ClScaleKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, c return Status{}; } +ClScaleKernel::ClScaleKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info) { ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, info)); diff --git a/src/core/gpu/cl/kernels/ClScaleKernel.h b/src/core/gpu/cl/kernels/ClScaleKernel.h index ad7632c713..826c4821b5 100644 --- a/src/core/gpu/cl/kernels/ClScaleKernel.h +++ b/src/core/gpu/cl/kernels/ClScaleKernel.h @@ -42,7 +42,7 @@ class ClScaleKernel : public IClKernel { public: /** Default constructor */ - ClScaleKernel() = default; + ClScaleKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClScaleKernel); /** Initialise the kernel's inputs, output and interpolation policy diff --git a/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp b/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp index 000c9ad04d..1dd905d66e 100644 --- a/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp +++ b/src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp @@ -154,6 +154,11 @@ const unsigned int ClLogits1DMaxShiftExpSumKernel::_serial_vector_size = 8; /**< Vector size in the parallel case (obtained through auto-tuning, enables the best memory access pattern for Bifrost) .*/ const unsigned int ClLogits1DMaxShiftExpSumKernel::_parallel_vector_size = 4; +ClLogits1DMaxShiftExpSumKernel::ClLogits1DMaxShiftExpSumKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClLogits1DMaxShiftExpSumKernel::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &max, ITensorInfo &dst, ITensorInfo &sum, const SoftmaxKernelInfo &info) { auto padding_info = get_padding_info({ &src, &max, &dst, &sum }); @@ -273,6 +278,11 @@ void ClLogits1DMaxShiftExpSumKernel::run_op(ITensorPack &tensors, const Window & while(window_collapsed.slide_window_slice_3D(slice)); } +ClLogits1DNormKernel::ClLogits1DNormKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClLogits1DNormKernel::configure(const CLCompileContext &compile_context, const ITensorInfo &src, const ITensorInfo &sum, ITensorInfo &dst, const SoftmaxKernelInfo &info) { auto padding_info = get_padding_info({ &src, &dst, &sum }); diff --git a/src/core/gpu/cl/kernels/ClSoftmaxKernel.h b/src/core/gpu/cl/kernels/ClSoftmaxKernel.h index af980eaa8e..db1aca3b9b 100644 --- a/src/core/gpu/cl/kernels/ClSoftmaxKernel.h +++ b/src/core/gpu/cl/kernels/ClSoftmaxKernel.h @@ -51,7 +51,7 @@ public: using ParallelReductionInfo = std::tuple; /** Default constructor */ - ClLogits1DMaxShiftExpSumKernel() = default; + ClLogits1DMaxShiftExpSumKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DMaxShiftExpSumKernel); /** Configure the kernel using the given information about tensors * @@ -94,7 +94,7 @@ class ClLogits1DNormKernel : public IClKernel { public: /** Default constructor */ - ClLogits1DNormKernel() = default; + ClLogits1DNormKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DNormKernel); /** Set the input and output tensors. diff --git a/src/core/gpu/cl/kernels/ClTransposeKernel.cpp b/src/core/gpu/cl/kernels/ClTransposeKernel.cpp index 704d0152cf..40bd4b034a 100644 --- a/src/core/gpu/cl/kernels/ClTransposeKernel.cpp +++ b/src/core/gpu/cl/kernels/ClTransposeKernel.cpp @@ -43,6 +43,11 @@ namespace opencl { namespace kernels { +ClTransposeKernel::ClTransposeKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClTransposeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClTransposeKernel.h b/src/core/gpu/cl/kernels/ClTransposeKernel.h index 21d4fd41f5..7d1226cc0d 100644 --- a/src/core/gpu/cl/kernels/ClTransposeKernel.h +++ b/src/core/gpu/cl/kernels/ClTransposeKernel.h @@ -38,7 +38,7 @@ namespace kernels class ClTransposeKernel : public IClKernel { public: - ClTransposeKernel() = default; + ClTransposeKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClTransposeKernel); /** Set the src and dst of the kernel. * diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp index 9f970719ed..8607620e92 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp @@ -68,6 +68,11 @@ Status ClWidthConcatenate2TensorsKernel::validate(const ITensorInfo *src1, const return Status{}; } +ClWidthConcatenate2TensorsKernel::ClWidthConcatenate2TensorsKernel() +{ + _type = CLKernelType::ELEMENTWISE; +} + void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst); diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h index ddade29113..56202bad2e 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h @@ -41,7 +41,7 @@ class ClWidthConcatenate2TensorsKernel : public IClKernel { public: /** Default constructor */ - ClWidthConcatenate2TensorsKernel() = default; + ClWidthConcatenate2TensorsKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel); /** Initialise the kernel's sources and destination * diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp index 281d190381..edbc23c1d3 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp @@ -66,6 +66,7 @@ Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, cons ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel() { + _type = CLKernelType::ELEMENTWISE; } Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp index d188a5226b..5510c746f8 100644 --- a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp @@ -63,6 +63,7 @@ Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, con ClWidthConcatenateKernel::ClWidthConcatenateKernel() { + _type = CLKernelType::ELEMENTWISE; } Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst) diff --git a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp index 381b4bcae9..ae43fed12d 100644 --- a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp @@ -91,6 +91,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace +ClWinogradFilterTransformKernel::ClWinogradFilterTransformKernel() +{ + _type = CLKernelType::WINOGRAD; +} + void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); diff --git a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h index 2bc2ceb36e..13200dc419 100644 --- a/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h +++ b/src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h @@ -40,7 +40,7 @@ class ClWinogradFilterTransformKernel : public IClKernel { public: /** Default constructor */ - ClWinogradFilterTransformKernel() = default; + ClWinogradFilterTransformKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWinogradFilterTransformKernel); /** Set the input and output tensor. * diff --git a/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp index 17f0eb9e2c..62db2282e0 100644 --- a/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp @@ -103,6 +103,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen ClWinogradInputTransformKernel::ClWinogradInputTransformKernel() : _border_size(0), _data_layout(DataLayout::UNKNOWN), _num_tiles_x(0), _num_tiles_y(0), _step_z(1) { + _type = CLKernelType::WINOGRAD; } BorderSize ClWinogradInputTransformKernel::border_size() const diff --git a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp index a6c05420ed..f6ade57e5d 100644 --- a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp @@ -122,6 +122,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace +ClWinogradOutputTransformKernel::ClWinogradOutputTransformKernel() +{ + _type = CLKernelType::WINOGRAD; +} + void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *bias, ITensorInfo *dst, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info) { diff --git a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h index 48b27e658c..2948d3f181 100644 --- a/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h +++ b/src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h @@ -40,7 +40,7 @@ class ClWinogradOutputTransformKernel : public IClKernel { public: /** Default constructor */ - ClWinogradOutputTransformKernel() = default; + ClWinogradOutputTransformKernel(); ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWinogradOutputTransformKernel); /** Set the input and output tensor. * -- cgit v1.2.1