From 2d9de0a3fa6ad858e70040124f362799a962bb6a Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Thu, 15 Mar 2018 17:58:20 +0000 Subject: COMPMID-1009 Support 4x4 output tile for Winograd Filter Transform on OpenCL. Change-Id: I68c6453e0f192de659582404f109a89616b9fbb9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124811 Tested-by: Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Gian Marco Iodice --- .../core/CL/kernels/CLWinogradFilterTransformKernel.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h') diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h index ec5e51482a..c4ae5745b8 100644 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -48,20 +48,22 @@ public: ~CLWinogradFilterTransformKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[out] output Destination tensor. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). + * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. + * @param[out] output Destination tensor. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input + * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. */ - void configure(const ICLTensor *input, ICLTensor *output); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &output_tile); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel * - * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input + * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). + * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. + * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input + * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; -- cgit v1.2.1