From 247f52cfe337f7b2542b900e3d8cf122e9d4f11c Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 22 Mar 2018 11:24:56 +0000 Subject: COMPMID-1013 - Create WinogradInfo data structure COMPMID-1014 - Refactoring Winograd's dataset Change-Id: I6abdcbf9a90d663f4db666cd410afece9f1d034d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125899 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../CL/kernels/CLWinogradFilterTransformKernel.h | 28 +++++++----- .../CL/kernels/CLWinogradInputTransformKernel.h | 30 ++++++++----- .../CL/kernels/CLWinogradOutputTransformKernel.h | 36 ++++++++++------ arm_compute/core/Types.h | 23 ++++++++++ arm_compute/core/utils/misc/ShapeCalculator.h | 50 ++++++++++++++-------- .../CL/functions/CLWinogradInputTransform.h | 28 +++++++----- 6 files changed, 132 insertions(+), 63 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h index c4ae5745b8..7115710d59 100644 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -48,22 +48,30 @@ public: ~CLWinogradFilterTransformKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[out] output Destination tensor. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input - * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. + * @note Winograd filter transform supports the following configurations: + * Output tile size: 2x2, 4x4 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &output_tile); + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel * - * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input - * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. + * @note Winograd filter transform supports the following configurations: + * Output tile size: 2x2, 4x4 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h index 15cd6e2649..2d1eadf3cf 100644 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -46,28 +46,38 @@ public: CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to permute. Data types supported: F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ - void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel * - * @param[in] input First tensor input info. Data types supported: F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: + using WinogradKey = std::pair, std::pair>; + BorderSize _border_size; const ICLTensor *_input; ICLTensor *_output; diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h index 35117c65db..b0d0bbeeaa 100644 --- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h @@ -48,31 +48,39 @@ public: ~CLWinogradOutputTransformKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input - * @param[in] kernel_dims Kernel dimensions (Width and height). Currently only supported 3x3 kernels - * @param[in] output_convolved_dims Output dimensions after the convolution (Width and height) - * @param[in] num_tiles Number of tiles of size 2x2 in the output tensor along the X and Y direction + * @note Winograd output transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles); + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel * - * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input - * @param[in] kernel_dims Kernel dimensions (Width and height). Currently only supported 3x3 kernels - * @param[in] output_convolved_dims Output dimensions after the convolution (Width and height) - * @param[in] num_tiles Number of tiles of size 2x2 in the output tensor along the X and Y direction + * @note Winograd output transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; private: + using WinogradKey = std::pair, std::pair>; + const ICLTensor *_input; const ICLTensor *_bias; ICLTensor *_output; diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 73baf78918..46e6dba1a0 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1136,6 +1136,29 @@ private: GEMMReshapeInfo _reshape_info; }; +/** Winograd information */ +struct WinogradInfo +{ + /** Default constructor + * + * @param[in] output_tile_sz Width and height of the output tile + * @param[in] kernel_sz Width and height of the kernel + * @param[in] input_dims Width and height of the input tensor before the convolution is applied + * @param[in] conv_info Convolution info (Pads, strides) + * @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied + */ + WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) + : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout) + { + } + + Size2D output_tile_size{}; /**< Width and height of the output tile */ + Size2D kernel_size{}; /**< Width and height of the kernel*/ + Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ + PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ + DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ +}; + /** IO formatting information class*/ struct IOFormatInfo { diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 8816819bcd..c3d5b64a92 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -196,31 +196,35 @@ inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorI return output_shape; } -inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const Size2D &output_tile) +inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { TensorShape tensor_shape{ input.tensor_shape() }; - tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); - tensor_shape.set(Window::DimY, input.dimension(2)); - tensor_shape.set(Window::DimZ, (output_tile.width == 2) ? 16 : 36); + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); - if(input.data_layout() == DataLayout::NCHW) - { - tensor_shape.set(Window::DimX, input.dimension(3)); - } + tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); + tensor_shape.set(Window::DimX, input.dimension(3)); + tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); + tensor_shape.set(Window::DimZ, input_tile_size.area()); return tensor_shape; } - -inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const PadStrideInfo &conv_info, const Size2D &kernel_size) +inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + // Compute height - const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); + const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width)); + const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height)); const unsigned int width = input.tensor_shape()[get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)]; const unsigned int height = num_tiles_x * num_tiles_y; - const unsigned int depth = 16; // COMPMID-990 + const unsigned int depth = input_tile_size.area(); TensorShape output_shape{ input.tensor_shape() }; output_shape.set(0, width); @@ -229,14 +233,24 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp return output_shape; } - -inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const Size2D &output_convolved_dims, DataLayout data_layout) +inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D input_dimensions = winograd_info.input_dimensions; + const DataLayout data_layout = winograd_info.output_data_layout; + + // Compute output shape + unsigned int output_width = 0; + unsigned int output_height = 0; + std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, + kernel_size.width, kernel_size.height, conv_info); + TensorShape tensor_shape{ input.tensor_shape() }; // Output dimension - const unsigned int out_w = output_convolved_dims.width; - const unsigned int out_h = output_convolved_dims.height; + const unsigned int out_w = output_width; + const unsigned int out_h = output_height; const unsigned int out_c = input.dimension(0); tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w); @@ -245,7 +259,6 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in return tensor_shape; } - inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) { const TensorShape input_shape{ input.tensor_shape() }; @@ -271,7 +284,6 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons return output_shape; } - inline TensorShape compute_min_max_shape(const ITensorInfo *input) { TensorShape output_shape{ input->tensor_shape() }; diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index 54b8bdecba..0e0d6bf284 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -39,22 +39,30 @@ class CLWinogradInputTransform : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in] input The input tensor to transform. Data types supported: F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ - void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform. * - * @param[in] input First tensor input info. Data types supported: F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); }; } #endif /*__ARM_COMPUTE_CLWINOGRADINPUTTRANSFORM_H__ */ -- cgit v1.2.1