From 7e20e29904c98adae5a91c6492fd78da88b7a9bf Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 2 Feb 2021 11:49:34 +0000 Subject: Make memset/copy functions state-less Port following functions: - CLCopy - CLFill - CLPermute - CLReshapeLayer - CLCropResize Resolves: COMPMID-4002 Signed-off-by: Sheri Zhang Change-Id: I8392aa515aaeb5b44dab6122be6a795d08376d5f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5003 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- arm_compute/runtime/CL/CLFunctions.h | 3 +- arm_compute/runtime/CL/functions/CLCopy.h | 48 +++++++--- arm_compute/runtime/CL/functions/CLCrop.h | 106 +++++++++++++++++++++ arm_compute/runtime/CL/functions/CLCropResize.h | 17 ++-- .../CL/functions/CLDeconvolutionLayerUpsample.h | 9 +- arm_compute/runtime/CL/functions/CLFill.h | 57 ++++++++--- .../CL/functions/CLGenerateProposalsLayer.h | 6 +- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 12 +-- .../runtime/CL/functions/CLMaxUnpoolingLayer.h | 8 +- arm_compute/runtime/CL/functions/CLPadLayer.h | 9 +- arm_compute/runtime/CL/functions/CLPermute.h | 29 +++++- arm_compute/runtime/CL/functions/CLQLSTMLayer.h | 12 +-- arm_compute/runtime/CL/functions/CLRNNLayer.h | 24 ++--- arm_compute/runtime/CL/functions/CLReshapeLayer.h | 30 +----- .../runtime/CL/functions/CLSpaceToBatchLayer.h | 8 +- 15 files changed, 267 insertions(+), 111 deletions(-) create mode 100644 arm_compute/runtime/CL/functions/CLCrop.h (limited to 'arm_compute') diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 26c2670cbc..b2bdb9a3e7 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,6 +49,7 @@ #include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLCopy.h" +#include "arm_compute/runtime/CL/functions/CLCrop.h" #include "arm_compute/runtime/CL/functions/CLCropResize.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h index f1a091df84..795a183e1f 100644 --- a/arm_compute/runtime/CL/functions/CLCopy.h +++ b/arm_compute/runtime/CL/functions/CLCopy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_CLCOPY_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" +#include namespace arm_compute { @@ -35,32 +35,54 @@ class CLCompileContext; class ICLTensor; class ITensorInfo; -class CLCopy : public ICLSimpleFunction +/** Basic function to run @ref opencl::kernels::ClCopyKernel */ +class CLCopy : public IFunction { public: + /** Constructor */ + CLCopy(); + /** Destructor */ + ~CLCopy(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopy(const CLCopy &) = delete; + /** Default move constructor */ + CLCopy(CLCopy &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopy &operator=(const CLCopy &) = delete; + /** Default move assignment operator */ + CLCopy &operator=(CLCopy &&); /** Initialise the function's source and destination. * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. */ - void configure(ICLTensor *input, ICLTensor *output); + void configure(ICLTensor *input, ICLTensor *output, Window *dst_window = nullptr); /** Initialise the function's source and destination. * * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. * */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, Window *dst_window = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLCopy * - * @param[in] input Source tensor. Data types supported: All. - * @param[in] output Output tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor. Data types supported: All. + * @param[in] output Output tensor. Data types supported: Same as @p input. + * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLCOPY_H */ diff --git a/arm_compute/runtime/CL/functions/CLCrop.h b/arm_compute/runtime/CL/functions/CLCrop.h new file mode 100644 index 0000000000..dc509b5b84 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLCrop.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_CROP_H +#define ARM_COMPUTE_CL_CROP_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" +#include + +namespace arm_compute +{ +class CLCompileContext; +class ICLTensor; +class ITensorInfo; + +/** Basic function to run @ref opencl::kernels::ClCropKernel */ +class CLCrop : public IFunction +{ +public: + /** Constructor */ + CLCrop(); + /** Destructor */ + ~CLCrop(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCrop(const CLCrop &) = delete; + /** Default move constructor */ + CLCrop(CLCrop &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCrop &operator=(const CLCrop &) = delete; + /** Default move assignment operator */ + CLCrop &operator=(CLCrop &&); + /** Configure function + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); + /** Configure function + * + * @note Supported tensor rank: up to 4 + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. + * @param[out] output Destination tensor. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC. + * @param[in] output Destination tensor info. Data type supported: F32 + * @param[in] start Coordinates of where to start cropping the image. + * @param[in] end Coordinates of where to end cropping the image. + * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. + * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. + * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, + Window *output_window = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CL_CROP_H */ diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h index e781cfe61f..0dc3c48b32 100644 --- a/arm_compute/runtime/CL/functions/CLCropResize.h +++ b/arm_compute/runtime/CL/functions/CLCropResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,9 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLCopy.h" +#include "arm_compute/runtime/CL/functions/CLCrop.h" +#include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/CL/functions/CLScale.h" #include @@ -36,8 +39,6 @@ namespace arm_compute { // Forward Declarations class CLCompileContext; -class CLCopyKernel; -class CLCropKernel; class ITensor; class ITensorInfo; @@ -125,12 +126,12 @@ public: InterpolationPolicy _method; float _extrapolation_value; - std::vector> _scale; - std::vector> _copy; - std::vector> _crop_results; - std::vector> _scaled_results; + std::vector> _scale; + std::vector> _copy; + std::vector> _crop_results; + std::vector> _scaled_results; - std::vector> _internal_kernels; + std::vector> _internal_functions; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CL_CROP_RESIZE_H */ diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 3ebc858d32..6c1302fbf7 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include @@ -35,13 +35,12 @@ namespace arm_compute // Forward declarations class CLDeconvolutionLayerUpsampleKernel; class CLCompileContext; -class CLMemsetKernel; class ICLTensor; class ITensorInfo; /** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions: * - * -# @ref CLMemsetKernel + * -# @ref CLFill * -# @ref CLDeconvolutionLayerUpsampleKernel */ class CLDeconvolutionLayerUpsample : public IFunction @@ -90,7 +89,7 @@ public: private: std::unique_ptr _upsample; - std::unique_ptr _memset; + CLFill _fill; ICLTensor *_output; }; } // namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h index fef8324432..9a27d158a6 100644 --- a/arm_compute/runtime/CL/functions/CLFill.h +++ b/arm_compute/runtime/CL/functions/CLFill.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,32 +24,63 @@ #ifndef ARM_COMPUTE_CLFILL_H #define ARM_COMPUTE_CLFILL_H -#include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" +#include namespace arm_compute { class CLCompileContext; class ICLTensor; -/** Function to run @ref CLMemsetKernel to fill a tensor with a scalar value */ -class CLFill : public ICLSimpleFunction +/** Basic function to run @ref opencl::kernels::ClFillKernel */ +class CLFill : public IFunction { public: - /** Initialize the function + /** Constructor */ + CLFill(); + /** Destructor */ + ~CLFill(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFill(const CLFill &) = delete; + /** Default move constructor */ + CLFill(CLFill &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFill &operator=(const CLFill &) = delete; + /** Default move assignment operator */ + CLFill &operator=(CLFill &&); + /** Initialize the kernel's tensor and filling value * - * @param[in,out] tensor Source tensor. Data types supported: All. - * @param[in] constant_value Constant value to use to fill tensor. + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. */ - void configure(ICLTensor *tensor, PixelValue constant_value); - /** Initialize the function + void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Initialise the kernel's tensor and filling value * * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Source tensor. Data types supported: All. - * @param[in] constant_value Constant value to use to fill tensor. + * @param[in,out] tensor Input tensor to fill. Supported data types: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value); + void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLFill + * + * @param[in] tensor Source tensor info. Data types supported: All. + * @param[in] constant_value The value used to fill the planes of the tensor + * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. + * + * @return a status + */ + static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLFILL_H */ diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index cf5fd500a0..4d6bc66487 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" @@ -42,7 +43,6 @@ class CLBoundingBoxTransformKernel; class CLDequantizationLayerKernel; class CLComputeAllAnchorsKernel; class CLPadLayerKernel; -class CLPermuteKernel; class CLQuantizationLayerKernel; class ICLTensor; class ITensorInfo; @@ -137,9 +137,9 @@ private: MemoryGroup _memory_group; // OpenCL kernels - std::unique_ptr _permute_deltas_kernel; + CLPermute _permute_deltas; CLReshapeLayer _flatten_deltas; - std::unique_ptr _permute_scores_kernel; + CLPermute _permute_scores; CLReshapeLayer _flatten_scores; std::unique_ptr _compute_anchors_kernel; std::unique_ptr _bounding_box_kernel; diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 017f26aa1e..20b068316c 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,9 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" +#include "arm_compute/runtime/CL/functions/CLCopy.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" +#include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h" @@ -44,8 +46,6 @@ namespace arm_compute { class CLCompileContext; -class CLCopyKernel; -class CLMemsetKernel; class CLTransposeKernel; class ICLTensor; @@ -239,14 +239,14 @@ private: CLPixelWiseMultiplication _pixelwise_mul_output_state2; CLFullyConnectedLayer _fully_connected_output_state; CLActivationLayer _projection_clip; - std::unique_ptr _copy_cell_state; - std::unique_ptr _copy_output; + CLCopy _copy_cell_state; + CLCopy _copy_output; CLConcatenateLayer _concat_scratch_buffer; CLConcatenateLayer _concat_inputs_forget_gate; CLConcatenateLayer _concat_weights_forget_gate; CLConcatenateLayer _concat_weights_input_gate; CLConcatenateLayer _concat_weights_output; - std::unique_ptr _ones_memset_kernel; + CLFill _ones_fill; CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; CLArithmeticAddition _accum_input_gate_bias; diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h index 693862fb89..24d620d372 100644 --- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H #include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include @@ -35,12 +36,11 @@ class CLCompileContext; class ICLTensor; class ITensorInfo; class CLMaxUnpoolingLayerKernel; -class CLMemsetKernel; struct PoolingLayerInfo; /** Function to perform MaxUnpooling. This function calls the following OpenCL kernels: * - * -# @ref CLMemsetKernel + * -# @ref CLFill * -# @ref CLMaxUnpoolingLayerKernel */ class CLMaxUnpoolingLayer : public IFunction @@ -99,7 +99,7 @@ public: void run() override; private: - std::unique_ptr _memset_kernel; + CLFill _fill; std::unique_ptr _unpooling_layer_kernel; }; } diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h index 2bbde30fc2..dae95f63e6 100644 --- a/arm_compute/runtime/CL/functions/CLPadLayer.h +++ b/arm_compute/runtime/CL/functions/CLPadLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,19 +26,20 @@ #include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLCopy.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute { class CLCompileContext; class CLPadLayerKernel; -class CLCopyKernel; class ICLTensor; /** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels: * * -# @ref CLPadLayerKernel if there is padding to be added - * -# @ref CLCopyKernel otherwise + * -# @ref CLCopy otherwise */ class CLPadLayer : public IFunction { @@ -100,7 +101,7 @@ private: void configure_reflect_mode(ICLTensor *input, ICLTensor *output); std::unique_ptr _pad_kernel; - std::unique_ptr _copy_kernel; + CLCopy _copy; bool _perform_pad; }; } // namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h index 50e81da7c4..bcd9566fbf 100644 --- a/arm_compute/runtime/CL/functions/CLPermute.h +++ b/arm_compute/runtime/CL/functions/CLPermute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_CLPERMUTE_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" -#include +#include namespace arm_compute { @@ -35,10 +35,22 @@ class CLCompileContext; class ICLTensor; class ITensorInfo; -/** Basic function to execute an @ref CLPermuteKernel. */ -class CLPermute : public ICLSimpleFunction +/** Basic function to execute an @ref opencl::kernels::ClPermuteKernel. */ +class CLPermute : public IFunction { public: + /** Constructor */ + CLPermute(); + /** Destructor */ + ~CLPermute(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermute(const CLPermute &) = delete; + /** Default move constructor */ + CLPermute(CLPermute &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermute &operator=(const CLPermute &) = delete; + /** Default move assignment operator */ + CLPermute &operator=(CLPermute &&); /** Set the input and output tensors. * * @note Arbitrary permutation vectors are supported with rank not greater than 4 @@ -69,6 +81,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLPERMUTE_H */ diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h index a8f9221b3d..954f224424 100644 --- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLCopy.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" @@ -38,7 +39,6 @@ namespace arm_compute { // Forward declarations class CLCompileContext; -class CLCopyKernel; class ICLTensor; class CLGEMMLowpMatrixAReductionKernel; class CLQLSTMLayerNormalizationKernel; @@ -49,12 +49,12 @@ class ITensorInfo; * This function calls the following CL functions/kernels: * * -# @ref CLActivationLayer Activation functions (tanh and logistic) - * -# @ref CLCopyKernel Copy kernel for copying output_state_out to output - * -# @ref CLArithmeticAddition Elementwise addition and subtraction + * -# @ref CLCopy Copy function for copying output_state_out to output + * -# @ref CLArithmeticAddition Elementwise addition and subtraction * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 * -# @ref CLGEMMLowpMatrixAReductionKernel For precomputing effective biases to use - * -# @ref CLPixelWiseMultiplication Elementwise multiplication + * -# @ref CLPixelWiseMultiplication Elementwise multiplication * -# @ref CLTranspose Transpose function for reshaping the weights * */ class CLQLSTMLayer : public IFunction @@ -354,7 +354,7 @@ private: CLArithmeticAddition _accumulate_projection{}; CLActivationLayer _projection_clip{}; std::array, _layer_norm_count> _layer_norms; - std::unique_ptr _copy_output; + CLCopy _copy_output; TensorCopyKernel _projection_bias_copy{}; TensorCopyKernel _projection_output_to_accumulate_copy{}; diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index ff3fb5449b..50575daaa3 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLCopy.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" @@ -34,7 +35,6 @@ namespace arm_compute { -class CLCopyKernel; class ICLTensor; /** Basic function to run @ref CLRNNLayer */ @@ -93,16 +93,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLGEMM _gemm_state_f; - CLArithmeticAddition _add_kernel; - CLActivationLayer _activation; - CLFullyConnectedLayer _fully_connected_kernel; - std::unique_ptr _copy_kernel; - CLTensor _fully_connected_out; - CLTensor _gemm_output; - CLTensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + CLGEMM _gemm_state_f; + CLArithmeticAddition _add_kernel; + CLActivationLayer _activation; + CLFullyConnectedLayer _fully_connected_kernel; + CLCopy _copy; + CLTensor _fully_connected_out; + CLTensor _gemm_output; + CLTensor _add_output; + bool _is_prepared; }; } #endif /* ARM_COMPUTE_CLRNN_LAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h index b4d52ec8cf..60ed81680e 100644 --- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h +++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include namespace arm_compute { @@ -33,7 +34,7 @@ class CLCompileContext; class ICLTensor; class ITensorInfo; -/** Basic function to run @ref CLReshapeLayerKernel */ +/** Basic function to run @ref opencl::kernels::ClReshapeKernel */ class CLReshapeLayer : public IFunction { public: @@ -79,30 +80,5 @@ private: struct Impl; std::unique_ptr _impl; }; - -namespace experimental -{ -/** Basic function to run @ref CLReshapeLayerKernel */ -class CLReshape : public ICLOperator -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor info. Data type supported: All - * @param[out] output Output info. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayer - * - * @param[in] input Input tensor info. Data type supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace experimental } // namespace arm_compute #endif /*ARM_COMPUTE_CLRESHAPELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h index 5c5e5bed9a..dc02fa1363 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include @@ -33,14 +34,13 @@ namespace arm_compute { class CLCompileContext; -class CLMemsetKernel; class CLSpaceToBatchLayerKernel; class ICLTensor; class ITensorInfo; /** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions: * - * -# @ref CLMemsetKernel + * -# @ref CLFill * -# @ref CLSpaceToBatchLayerKernel */ class CLSpaceToBatchLayer : public IFunction @@ -125,7 +125,7 @@ public: private: std::unique_ptr _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - std::unique_ptr _memset_kernel; /**< Memset kernel to run */ + CLFill _fill; /**< Fill function to run */ bool _has_padding; /**< Flag to check if the output has padding */ }; } // namespace arm_compute -- cgit v1.2.1