diff options
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h | 71 |
1 files changed, 45 insertions, 26 deletions
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h index c1de5f15ce..efea9a1550 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,30 +24,31 @@ #ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H #define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels: * - * -# @ref CLWinogradInputTransform - * -# @ref CLWinogradFilterTransformKernel (only once) - * -# @ref CLGEMM - * -# @ref CLWinogradOutputTransformKernel + * -# @ref opencl::ClWinogradConv2d * */ class CLWinogradConvolutionLayer : public IFunction { public: - /** Default constructor */ + /** Default Constructor */ CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Default Destructor */ + ~CLWinogradConvolutionLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLWinogradConvolutionLayer(const CLWinogradConvolutionLayer &) = delete; /** Default move constructor */ @@ -58,6 +59,16 @@ public: CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default; /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true * @@ -73,8 +84,13 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + void configure(ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); /** Set the input and output tensors. * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout @@ -93,8 +109,14 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1 and 1x5 kernels along with unit strides for both NCHW and NHWC data layout @@ -114,24 +136,21 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - CLGEMM _batched_mm; - CLWinogradInputTransform _input_transform; - CLWinogradFilterTransformKernel _filter_transform; - CLWinogradOutputTransformKernel _output_transform; - CLTensor _input0; - CLTensor _input1; - CLTensor _batched_mm_output; - const ICLTensor *_original_weights; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */ |