From 247f52cfe337f7b2542b900e3d8cf122e9d4f11c Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Thu, 22 Mar 2018 11:24:56 +0000
Subject: COMPMID-1013 - Create WinogradInfo data structure COMPMID-1014 -
 Refactoring Winograd's dataset

Change-Id: I6abdcbf9a90d663f4db666cd410afece9f1d034d
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125899
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 .../CL/kernels/CLWinogradFilterTransformKernel.h   | 28 +++++++-----
 .../CL/kernels/CLWinogradInputTransformKernel.h    | 30 ++++++++-----
 .../CL/kernels/CLWinogradOutputTransformKernel.h   | 36 ++++++++++------
 arm_compute/core/Types.h                           | 23 ++++++++++
 arm_compute/core/utils/misc/ShapeCalculator.h      | 50 ++++++++++++++--------
 .../CL/functions/CLWinogradInputTransform.h        | 28 +++++++-----
 6 files changed, 132 insertions(+), 63 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
index c4ae5745b8..7115710d59 100644
--- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
@@ -48,22 +48,30 @@ public:
     ~CLWinogradFilterTransformKernel() = default;
     /** Set the input and output tensor.
      *
-     * @param[in]  input       Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
-     *                         kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
-     * @param[out] output      Destination tensor. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input
-     * @param[in]  output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported.
+     * @note Winograd filter transform supports the following configurations:
+     *       Output tile size: 2x2, 4x4
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in]  input         Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32.
+     * @param[out] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info Contains Winograd's information described in @ref WinogradInfo
      */
-    void configure(const ICLTensor *input, ICLTensor *output, const Size2D &output_tile);
+    void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
      *
-     * @param[in] input       Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
-     *                        kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
-     * @param[in] output      Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input
-     * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported.
+     * @note Winograd filter transform supports the following configurations:
+     *       Output tile size: 2x2, 4x4
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in]  input         Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32.
+     * @param[out] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info Contains Winograd's information described in @ref WinogradInfo
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
index 15cd6e2649..2d1eadf3cf 100644
--- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
@@ -46,28 +46,38 @@ public:
     CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default;
     /** Set the input and output of the kernel.
      *
-     * @param[in] input       The input tensor to permute. Data types supported: F32
-     * @param[in] output      The output tensor. Data types supported: Same as @p input
-     * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
-     * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported
+     * @note Winograd input transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in] input         The input tensor to transform. Data types supported: F32
+     * @param[in] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      */
-    void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims);
+    void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
      *
-     * @param[in] input       First tensor input info. Data types supported: F32.
-     * @param[in] output      Output tensor info. Data types supported: same as @p input.
-     * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
-     * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported
+     * @note Winograd input transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in] input         The input tensor to transform. Data types supported: F32
+     * @param[in] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;
 
 private:
+    using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
+
     BorderSize       _border_size;
     const ICLTensor *_input;
     ICLTensor       *_output;
diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
index 35117c65db..b0d0bbeeaa 100644
--- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
@@ -48,31 +48,39 @@ public:
     ~CLWinogradOutputTransformKernel() = default;
     /** Set the input and output tensor.
      *
-     * @param[in]  input                 Source tensor with shape [C, N, 16, batches]. Data types supported: F32.
-     * @param[in]  bias                  Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
-     * @param[out] output                Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
-     * @param[in]  kernel_dims           Kernel dimensions (Width and height). Currently only supported 3x3 kernels
-     * @param[in]  output_convolved_dims Output dimensions after the convolution (Width and height)
-     * @param[in]  num_tiles             Number of tiles of size 2x2 in the output tensor along the X and Y direction
+     * @note Winograd output transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in]  input         Source tensor with shape [C, N, 16, batches]. Data types supported: F32.
+     * @param[in]  bias          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+     * @param[out] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info Contains Winograd's information described in @ref WinogradInfo
      */
-    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles);
+    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
      *
-     * @param[in]  input                 Source tensor with shape [C, N, 16, batches]. Data types supported: F32.
-     * @param[in]  bias                  Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
-     * @param[out] output                Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
-     * @param[in]  kernel_dims           Kernel dimensions (Width and height). Currently only supported 3x3 kernels
-     * @param[in]  output_convolved_dims Output dimensions after the convolution (Width and height)
-     * @param[in]  num_tiles             Number of tiles of size 2x2 in the output tensor along the X and Y direction
+     * @note Winograd output transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in]  input         Source tensor with shape [C, N, 16, batches]. Data types supported: F32.
+     * @param[in]  bias          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+     * @param[out] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info Contains Winograd's information described in @ref WinogradInfo
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
 
 private:
+    using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
+
     const ICLTensor *_input;
     const ICLTensor *_bias;
     ICLTensor       *_output;
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 73baf78918..46e6dba1a0 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1136,6 +1136,29 @@ private:
     GEMMReshapeInfo _reshape_info;
 };
 
+/** Winograd information */
+struct WinogradInfo
+{
+    /** Default constructor
+     *
+     * @param[in] output_tile_sz Width and height of the output tile
+     * @param[in] kernel_sz      Width and height of the kernel
+     * @param[in] input_dims     Width and height of the input tensor before the convolution is applied
+     * @param[in] conv_info      Convolution info (Pads, strides)
+     * @param[in] data_layout    Data layout to use for the output tensor once the convolution has been applied
+     */
+    WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+        : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
+    {
+    }
+
+    Size2D        output_tile_size{};                     /**< Width and height of the output tile */
+    Size2D        kernel_size{};                          /**< Width and height of the kernel*/
+    Size2D        input_dimensions{};                     /**< Width and height of the input tensor before the convolution is applied */
+    PadStrideInfo convolution_info{};                     /**< Convolution info (Pads, strides,...) */
+    DataLayout    output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+};
+
 /** IO formatting information class*/
 struct IOFormatInfo
 {
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 8816819bcd..c3d5b64a92 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -196,31 +196,35 @@ inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorI
     return output_shape;
 }
 
-inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const Size2D &output_tile)
+inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
     TensorShape tensor_shape{ input.tensor_shape() };
 
-    tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
-    tensor_shape.set(Window::DimY, input.dimension(2));
-    tensor_shape.set(Window::DimZ, (output_tile.width == 2) ? 16 : 36);
+    const Size2D kernel_size      = winograd_info.kernel_size;
+    const Size2D output_tile_size = winograd_info.output_tile_size;
+    const Size2D input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
 
-    if(input.data_layout() == DataLayout::NCHW)
-    {
-        tensor_shape.set(Window::DimX, input.dimension(3));
-    }
+    tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
+    tensor_shape.set(Window::DimX, input.dimension(3));
+    tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
+    tensor_shape.set(Window::DimZ, input_tile_size.area());
 
     return tensor_shape;
 }
-
-inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const PadStrideInfo &conv_info, const Size2D &kernel_size)
+inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
+    const PadStrideInfo conv_info        = winograd_info.convolution_info;
+    const Size2D        kernel_size      = winograd_info.kernel_size;
+    const Size2D        output_tile_size = winograd_info.output_tile_size;
+    const Size2D        input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+
     // Compute height
-    const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f);
-    const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f);
+    const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
+    const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
 
     const unsigned int width  = input.tensor_shape()[get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)];
     const unsigned int height = num_tiles_x * num_tiles_y;
-    const unsigned int depth  = 16; // COMPMID-990
+    const unsigned int depth  = input_tile_size.area();
 
     TensorShape output_shape{ input.tensor_shape() };
     output_shape.set(0, width);
@@ -229,14 +233,24 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp
 
     return output_shape;
 }
-
-inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const Size2D &output_convolved_dims, DataLayout data_layout)
+inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
+    const PadStrideInfo conv_info        = winograd_info.convolution_info;
+    const Size2D        kernel_size      = winograd_info.kernel_size;
+    const Size2D        input_dimensions = winograd_info.input_dimensions;
+    const DataLayout    data_layout      = winograd_info.output_data_layout;
+
+    // Compute output shape
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+    std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
+                                                              kernel_size.width, kernel_size.height, conv_info);
+
     TensorShape tensor_shape{ input.tensor_shape() };
 
     // Output dimension
-    const unsigned int out_w = output_convolved_dims.width;
-    const unsigned int out_h = output_convolved_dims.height;
+    const unsigned int out_w = output_width;
+    const unsigned int out_h = output_height;
     const unsigned int out_c = input.dimension(0);
 
     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w);
@@ -245,7 +259,6 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
 
     return tensor_shape;
 }
-
 inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
 {
     const TensorShape input_shape{ input.tensor_shape() };
@@ -271,7 +284,6 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
 
     return output_shape;
 }
-
 inline TensorShape compute_min_max_shape(const ITensorInfo *input)
 {
     TensorShape output_shape{ input->tensor_shape() };
diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
index 54b8bdecba..0e0d6bf284 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
@@ -39,22 +39,30 @@ class CLWinogradInputTransform : public ICLSimpleFunction
 public:
     /** Set the input and output tensors.
      *
-     * @param[in] input       The input tensor to transform. Data types supported: F32
-     * @param[in] output      The output tensor. Data types supported: Same as @p input
-     * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
-     * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported
+     * @note Winograd input transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in] input         The input tensor to transform. Data types supported: F32
+     * @param[in] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      */
-    void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims);
+    void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform.
      *
-     * @param[in] input       First tensor input info. Data types supported: F32.
-     * @param[in] output      Output tensor info. Data types supported: same as @p input.
-     * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
-     * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported
+     * @note Winograd input transform supports the following configurations:
+     *       Output tile size: 2x2
+     *       Kernel size: 3x3
+     *       Strides: only unit strides
+     *
+     * @param[in] input         The input tensor to transform. Data types supported: F32
+     * @param[in] output        The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
 };
 }
 #endif /*__ARM_COMPUTE_CLWINOGRADINPUTTRANSFORM_H__ */
-- 
cgit v1.2.1