aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h71
1 files changed, 45 insertions, 26 deletions
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index c1de5f15ce..efea9a1550 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,30 +24,31 @@
#ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
*
- * -# @ref CLWinogradInputTransform
- * -# @ref CLWinogradFilterTransformKernel (only once)
- * -# @ref CLGEMM
- * -# @ref CLWinogradOutputTransformKernel
+ * -# @ref opencl::ClWinogradConv2d
*
*/
class CLWinogradConvolutionLayer : public IFunction
{
public:
- /** Default constructor */
+ /** Default Constructor */
CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLWinogradConvolutionLayer();
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLWinogradConvolutionLayer(const CLWinogradConvolutionLayer &) = delete;
/** Default move constructor */
@@ -58,6 +59,16 @@ public:
CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
* @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
*
@@ -73,8 +84,13 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ void configure(ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
/** Set the input and output tensors.
*
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
@@ -93,8 +109,14 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer
*
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1 and 1x5 kernels along with unit strides for both NCHW and NHWC data layout
@@ -114,24 +136,21 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _batched_mm;
- CLWinogradInputTransform _input_transform;
- CLWinogradFilterTransformKernel _filter_transform;
- CLWinogradOutputTransformKernel _output_transform;
- CLTensor _input0;
- CLTensor _input1;
- CLTensor _batched_mm_output;
- const ICLTensor *_original_weights;
- bool _is_prepared;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */