From 247f52cfe337f7b2542b900e3d8cf122e9d4f11c Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 22 Mar 2018 11:24:56 +0000 Subject: COMPMID-1013 - Create WinogradInfo data structure COMPMID-1014 - Refactoring Winograd's dataset Change-Id: I6abdcbf9a90d663f4db666cd410afece9f1d034d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125899 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../CL/kernels/CLWinogradFilterTransformKernel.h | 28 +- .../CL/kernels/CLWinogradInputTransformKernel.h | 30 +- .../CL/kernels/CLWinogradOutputTransformKernel.h | 36 +- arm_compute/core/Types.h | 23 + arm_compute/core/utils/misc/ShapeCalculator.h | 50 +- .../CL/functions/CLWinogradInputTransform.h | 28 +- .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 46 +- .../CL/kernels/CLWinogradInputTransformKernel.cpp | 91 ++-- .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 63 ++- .../CL/functions/CLWinogradConvolutionLayer.cpp | 53 ++- .../CL/functions/CLWinogradInputTransform.cpp | 8 +- tests/datasets/ShapeDatasets.h | 32 ++ tests/datasets/WinogradFilterTransformDataset.h | 128 ------ tests/datasets/WinogradInputTransformDataset.h | 57 +-- tests/datasets/WinogradOutputTransformDataset.h | 75 ++- tests/validation/CL/Winograd.cpp | 181 ++++---- tests/validation/fixtures/WinogradLayerFixture.h | 85 ++-- tests/validation/reference/Winograd.cpp | 504 +++++++++++---------- tests/validation/reference/Winograd.h | 14 +- utils/TypePrinter.h | 19 + 20 files changed, 792 insertions(+), 759 deletions(-) delete mode 100644 tests/datasets/WinogradFilterTransformDataset.h diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h index c4ae5745b8..7115710d59 100644 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h @@ -48,22 +48,30 @@ public: ~CLWinogradFilterTransformKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[out] output Destination tensor. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input - * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. + * @note Winograd filter transform supports the following configurations: + * Output tile size: 2x2, 4x4 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &output_tile); + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel * - * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. - * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16]. Data type supported: same as @p input - * @param[in] output_tile Output tile. Currently only 2x2 and 4x4 tiles are supported. + * @note Winograd filter transform supports the following configurations: + * Output tile size: 2x2, 4x4 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). Data types supported: F32. + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h index 15cd6e2649..2d1eadf3cf 100644 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -46,28 +46,38 @@ public: CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to permute. Data types supported: F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ - void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel * - * @param[in] input First tensor input info. Data types supported: F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: + using WinogradKey = std::pair, std::pair>; + BorderSize _border_size; const ICLTensor *_input; ICLTensor *_output; diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h index 35117c65db..b0d0bbeeaa 100644 --- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h @@ -48,31 +48,39 @@ public: ~CLWinogradOutputTransformKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input - * @param[in] kernel_dims Kernel dimensions (Width and height). Currently only supported 3x3 kernels - * @param[in] output_convolved_dims Output dimensions after the convolution (Width and height) - * @param[in] num_tiles Number of tiles of size 2x2 in the output tensor along the X and Y direction + * @note Winograd output transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles); + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel * - * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input - * @param[in] kernel_dims Kernel dimensions (Width and height). Currently only supported 3x3 kernels - * @param[in] output_convolved_dims Output dimensions after the convolution (Width and height) - * @param[in] num_tiles Number of tiles of size 2x2 in the output tensor along the X and Y direction + * @note Winograd output transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input Source tensor with shape [C, N, 16, batches]. Data types supported: F32. + * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input + * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; private: + using WinogradKey = std::pair, std::pair>; + const ICLTensor *_input; const ICLTensor *_bias; ICLTensor *_output; diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 73baf78918..46e6dba1a0 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1136,6 +1136,29 @@ private: GEMMReshapeInfo _reshape_info; }; +/** Winograd information */ +struct WinogradInfo +{ + /** Default constructor + * + * @param[in] output_tile_sz Width and height of the output tile + * @param[in] kernel_sz Width and height of the kernel + * @param[in] input_dims Width and height of the input tensor before the convolution is applied + * @param[in] conv_info Convolution info (Pads, strides) + * @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied + */ + WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) + : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout) + { + } + + Size2D output_tile_size{}; /**< Width and height of the output tile */ + Size2D kernel_size{}; /**< Width and height of the kernel*/ + Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ + PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ + DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ +}; + /** IO formatting information class*/ struct IOFormatInfo { diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 8816819bcd..c3d5b64a92 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -196,31 +196,35 @@ inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorI return output_shape; } -inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const Size2D &output_tile) +inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { TensorShape tensor_shape{ input.tensor_shape() }; - tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); - tensor_shape.set(Window::DimY, input.dimension(2)); - tensor_shape.set(Window::DimZ, (output_tile.width == 2) ? 16 : 36); + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); - if(input.data_layout() == DataLayout::NCHW) - { - tensor_shape.set(Window::DimX, input.dimension(3)); - } + tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); + tensor_shape.set(Window::DimX, input.dimension(3)); + tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); + tensor_shape.set(Window::DimZ, input_tile_size.area()); return tensor_shape; } - -inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const PadStrideInfo &conv_info, const Size2D &kernel_size) +inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + // Compute height - const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); + const unsigned int num_tiles_x = std::ceil((input.tensor_shape().x() - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width)); + const unsigned int num_tiles_y = std::ceil((input.tensor_shape().y() - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height)); const unsigned int width = input.tensor_shape()[get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)]; const unsigned int height = num_tiles_x * num_tiles_y; - const unsigned int depth = 16; // COMPMID-990 + const unsigned int depth = input_tile_size.area(); TensorShape output_shape{ input.tensor_shape() }; output_shape.set(0, width); @@ -229,14 +233,24 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp return output_shape; } - -inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const Size2D &output_convolved_dims, DataLayout data_layout) +inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D input_dimensions = winograd_info.input_dimensions; + const DataLayout data_layout = winograd_info.output_data_layout; + + // Compute output shape + unsigned int output_width = 0; + unsigned int output_height = 0; + std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, + kernel_size.width, kernel_size.height, conv_info); + TensorShape tensor_shape{ input.tensor_shape() }; // Output dimension - const unsigned int out_w = output_convolved_dims.width; - const unsigned int out_h = output_convolved_dims.height; + const unsigned int out_w = output_width; + const unsigned int out_h = output_height; const unsigned int out_c = input.dimension(0); tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w); @@ -245,7 +259,6 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in return tensor_shape; } - inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) { const TensorShape input_shape{ input.tensor_shape() }; @@ -271,7 +284,6 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons return output_shape; } - inline TensorShape compute_min_max_shape(const ITensorInfo *input) { TensorShape output_shape{ input->tensor_shape() }; diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h index 54b8bdecba..0e0d6bf284 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h +++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h @@ -39,22 +39,30 @@ class CLWinogradInputTransform : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in] input The input tensor to transform. Data types supported: F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. */ - void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform. * - * @param[in] input First tensor input info. Data types supported: F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] kernel_dims Kernel dimensions. Currently only 3x3 kernels are supported + * @note Winograd input transform supports the following configurations: + * Output tile size: 2x2 + * Kernel size: 3x3 + * Strides: only unit strides + * + * @param[in] input The input tensor to transform. Data types supported: F32 + * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input + * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); }; } #endif /*__ARM_COMPUTE_CLWINOGRADINPUTTRANSFORM_H__ */ diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index 5a03332e99..5b8921b8e4 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -44,18 +44,26 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != 3); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != input->dimension(1)); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + + const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); + + ARM_COMPUTE_RETURN_ERROR_ON(kernel_size != Size2D(3U, 3U)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_w) != kernel_size.width || input->dimension(idx_h) != kernel_size.height); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON(output_tile != Size2D(2U, 2U) && output_tile != Size2D(4U, 4U)); + ARM_COMPUTE_RETURN_ERROR_ON(output_tile_size != Size2D(2U, 2U) && output_tile_size != Size2D(4U, 4U)); // Checks performed when output is configured if(output->total_size() != 0) { - const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input, output_tile)); + const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input, winograd_info)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -64,9 +72,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &output_tile) +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) { - ARM_COMPUTE_UNUSED(output_tile); ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); constexpr unsigned int num_elems_processed_per_iteration_x = 3; @@ -92,36 +99,41 @@ CLWinogradFilterTransformKernel::CLWinogradFilterTransformKernel() { } -void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &output_tile) +void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input->info(), output_tile))); + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input->info(), winograd_info))); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), output_tile)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), winograd_info)); + + const size_t idx_c = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DNUM_CHANNELS=" + support::cpp11::to_string(input->info()->dimension(2))); + build_opts.add_option("-DNUM_CHANNELS=" + support::cpp11::to_string(input->info()->dimension(idx_c))); + + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; // Create kernel - std::string kernel_name = std::string("winograd_filter_transform_") + output_tile.to_string() + std::string("_3x3_nchw"); + std::string kernel_name = "winograd_filter_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_nchw"; _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); _input = input; _output = output; // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), output_tile); + auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); } -Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &output_tile) +Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, output_tile)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), output_tile).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, winograd_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); return Status{}; } diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index 3b9350f9ba..df7ffe83a0 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -37,17 +37,25 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd input transform only supports unit strides"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_dims.width != 3 || kernel_dims.height != 3, "Winograd input transform only supports 3x3 kernels"); - ARM_COMPUTE_UNUSED(kernel_dims); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size != Size2D(3U, 3U), "Winograd input transform only supports 3x3 kernels"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_tile_size != Size2D(2U, 2U), "Winograd input transform only supports 2x2 output tile size"); + ARM_COMPUTE_UNUSED(conv_info); + ARM_COMPUTE_UNUSED(output_tile_size); + ARM_COMPUTE_UNUSED(kernel_size); // Validate configured output if(output->total_size() != 0) { - const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, conv_info, kernel_dims); + const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -56,15 +64,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON(kernel_dims.width != 3 || kernel_dims.height != 3); - ARM_COMPUTE_UNUSED(kernel_dims); + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; - constexpr unsigned int num_elems_read_per_iteration_x = 4u; - constexpr unsigned int num_elems_read_per_iteration_y = 4u; + const unsigned int num_elems_read_per_iteration_x = output_tile_size.width + kernel_size.width - 1; + const unsigned int num_elems_read_per_iteration_y = output_tile_size.height + kernel_size.height - 1; Window win = calculate_max_window(*input, Steps(1, 1)); @@ -87,28 +96,33 @@ BorderSize CLWinogradInputTransformKernel::border_size() const return _border_size; } -void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), conv_info, kernel_dims)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), winograd_info)); + + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; // Compute number of elements to process in the X and Y direction - const int num_elements_x = input->info()->dimension(0) - 2 + conv_info.pad_left() + conv_info.pad_right(); - const int num_elements_y = input->info()->dimension(1) - 2 + conv_info.pad_top() + conv_info.pad_bottom(); + const int num_elements_x = input->info()->dimension(0) - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); + const int num_elements_y = input->info()->dimension(1) - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom(); // Check if we need to extend the right or bottom border - const unsigned int extra_border_right = (num_elements_x % 2 == 0) ? 0u : 1u; - const unsigned int extra_border_bottom = (num_elements_y % 2 == 0) ? 0u : 1u; + // FIXME: This actually is not needed. Added just for validating the result; + const unsigned int extra_border_right = ((num_elements_x % output_tile_size.width) == 0) ? 0u : static_cast(output_tile_size.width - 1); + const unsigned int extra_border_bottom = ((num_elements_y % output_tile_size.height) == 0) ? 0u : static_cast(output_tile_size.height - 1); _input = input; _output = output; _border_size = BorderSize(conv_info.pad_top(), conv_info.pad_right() + extra_border_right, conv_info.pad_bottom() + extra_border_bottom, conv_info.pad_left()); - _num_tiles_x = std::ceil(num_elements_x / 2.0f); - _num_tiles_y = std::ceil(num_elements_y / 2.0f); + _num_tiles_x = std::ceil(num_elements_x / static_cast(output_tile_size.width)); + _num_tiles_y = std::ceil(num_elements_y / static_cast(output_tile_size.height)); - const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input->info(), conv_info, Size2D(3U, 3U)); + const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input->info(), winograd_info); - // Output auto inizialitation if not yet initialized + // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); ARM_COMPUTE_ERROR_ON(_num_tiles_x * _num_tiles_y != static_cast(output->info()->dimension(1))); @@ -119,24 +133,35 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top())); // Create kernel - if((_input->info()->dimension(2) % 2) != 0) - { - _step_z = 1; - _kernel = static_cast(CLKernelLibrary::get().create_kernel("winograd_input_transform_2x2_3x3_stepz1_nchw", build_opts.options())); - } - else + std::string kernel_name = "winograd_input_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string(); + + // Check optimized kernel if output_dims == 2x2 + if(output_tile_size.width == 2 && output_tile_size.height == 2) { - _step_z = 2; - _kernel = static_cast(CLKernelLibrary::get().create_kernel("winograd_input_transform_2x2_3x3_stepz2_nchw", build_opts.options())); - _lws_hint = cl::NDRange(1, 1, 8); + if((_input->info()->dimension(2) % 2) != 0) + { + _step_z = 1; + } + else + { + _step_z = 2; + _lws_hint = cl::NDRange(1, 1, 8); + } } + // Append stepz and data layout + kernel_name += "_stepz"; + kernel_name += support::cpp11::to_string(_step_z); + kernel_name += "_nchw"; + + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + // Create window and update padding - auto win_config = validate_and_configure_window(input->info(), output->info(), conv_info, kernel_dims); + auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); - _config_id = "winograd_transform_input_2x2_3x3_"; + _config_id = kernel_name; _config_id += support::cpp11::to_string(input->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(1)); @@ -148,11 +173,11 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor _config_id += support::cpp11::to_string(conv_info.pad_top()); } -Status CLWinogradInputTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +Status CLWinogradInputTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, conv_info, kernel_dims)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), conv_info, kernel_dims).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, winograd_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), winograd_info).first); return Status{}; } diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index c9823275eb..b59bc79327 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -46,13 +46,27 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, const Size2D &num_tiles) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != num_tiles.area()); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_dims.width != 3 || kernel_dims.height != 3, "Only 3x3 kernels are supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast(std::sqrt(input->dimension(2))) != 4, "Only 2x2 output tile is supported"); - ARM_COMPUTE_UNUSED(kernel_dims); + ARM_COMPUTE_RETURN_ERROR_ON(winograd_info.output_data_layout != DataLayout::NCHW); + + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D input_dimensions = winograd_info.input_dimensions; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size != Size2D(3U, 3U), "Only 3x3 kernels are supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) != 16, "Only 2x2 output tile is supported"); + + // Compute number of elements to process in the X and Y direction + const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); + const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom(); + const int num_tiles_x = std::ceil(num_elements_x / static_cast(output_tile_size.width)); + const int num_tiles_y = std::ceil(num_elements_y / static_cast(output_tile_size.height)); + + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != static_cast((num_tiles_x * num_tiles_y))); + ARM_COMPUTE_UNUSED(output_tile_size); if(bias != nullptr) { @@ -63,7 +77,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con // Checks performed when output is configured if(output->total_size() != 0) { - const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_winograd_output_transform_shape(*input, output_convolved_dims, DataLayout::NCHW)); + const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_winograd_output_transform_shape(*input, winograd_info)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -72,7 +86,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output) +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output, const Size2D &output_tile_size) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -82,7 +96,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen bool window_changed = false; AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); - AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), 2), ceil_to_multiple(output->dimension(1), 2)); + AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), output_tile_size.width), ceil_to_multiple(output->dimension(1), output_tile_size.height)); if(bias != nullptr) { @@ -105,36 +119,44 @@ CLWinogradOutputTransformKernel::CLWinogradOutputTransformKernel() { } -void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, - const Size2D &num_tiles) +void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_UNUSED(kernel_dims); // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_winograd_output_transform_shape(*input->info(), output_convolved_dims, DataLayout::NCHW))); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_winograd_output_transform_shape(*input->info(), winograd_info))); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), kernel_dims, output_convolved_dims, num_tiles)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info)); _input = input; _bias = bias; _output = output; + // Compute num_tiles_x + const Size2D input_dimensions = winograd_info.input_dimensions; + const Size2D kernel_size = winograd_info.kernel_size; + const Size2D output_tile_size = winograd_info.output_tile_size; + const PadStrideInfo conv_info = winograd_info.convolution_info; + const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); + const int num_tiles_x = std::ceil(num_elements_x / static_cast(output_tile_size.width)); + // Set build options CLBuildOptions build_opts; build_opts.add_option_if(_bias != nullptr, std::string("-DHAS_BIAS")); - build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(num_tiles.width)); + build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(num_tiles_x)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("winograd_output_transform_2x2_3x3_nchw", build_opts.options())); + std::string kernel_name = "winograd_output_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_nchw"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info()); + auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); // Set config_id for enabling LWS tuning - _config_id = "winograd_output_transform_2x2_3x3"; + _config_id = kernel_name; + _config_id += "_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(0)); @@ -146,11 +168,10 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC _config_id += support::cpp11::to_string(output->info()->dimension(1)); } -Status CLWinogradOutputTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const Size2D &kernel_dims, const Size2D &output_convolved_dims, - const Size2D &num_tiles) +Status CLWinogradOutputTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, (bias != nullptr ? bias->clone().get() : nullptr), output, kernel_dims, output_convolved_dims, num_tiles)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (bias != nullptr ? bias->clone().get() : nullptr), output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, (bias != nullptr ? bias->clone().get() : nullptr), output, winograd_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (bias != nullptr ? bias->clone().get() : nullptr), output->clone().get(), winograd_info.output_tile_size).first); return Status{}; } diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 7af36bf06b..0aa7f8d1b5 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -39,21 +39,22 @@ CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptrinfo()->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); + // Input shape + const TensorShape input_shape = input->info()->tensor_shape(); + // Kernel size const unsigned int kernel_w = weights->info()->tensor_shape()[idx_width]; const unsigned int kernel_h = weights->info()->tensor_shape()[idx_height]; - // Number of tiles along the X and Y direction - const unsigned int num_tiles_x = std::ceil((input->info()->tensor_shape().x() - (kernel_w - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((input->info()->tensor_shape().y() - (kernel_h - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); - - // Compute output shape - const TensorShape output_convolved_shape = misc::shape_calculator::compute_deep_convolution_shape(*input->info(), *weights->info(), conv_info); + const WinogradInfo winograd_info = WinogradInfo(Size2D(2, 2), + Size2D(kernel_w, kernel_h), + Size2D(input_shape[idx_width], input_shape[idx_height]), + conv_info, + input->info()->data_layout()); // Manage intermediate tensors _memory_group.manage(&_input0); @@ -62,17 +63,16 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we // Do not manage _input1 as it contains the weights // Configure input transform - _input_transform.configure(input, &_input0, conv_info, Size2D(kernel_w, kernel_h)); + _input_transform.configure(input, &_input0, winograd_info); // Configure filter transform - _filter_transform.configure(weights, &_input1, Size2D(2U, 2U)); + _filter_transform.configure(weights, &_input1, winograd_info); // Configure batched matrix multiply _batched_mm.configure(&_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/)); // Configure output transform - _output_transform.configure(&_batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], output_convolved_shape[idx_height]), Size2D(num_tiles_x, - num_tiles_y)); + _output_transform.configure(&_batched_mm_output, biases, output, winograd_info); // Configure activation layer _is_activationlayer_enabled = act_info.enabled(); @@ -90,31 +90,32 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { - // TODO(COMPMID-1013): This part will be removed // Get indeces for the width and height const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); + // Input shape + const TensorShape input_shape = input->tensor_shape(); + // Kernel size const unsigned int kernel_w = weights->tensor_shape()[idx_width]; const unsigned int kernel_h = weights->tensor_shape()[idx_height]; - // Number of tiles along the X and Y direction - const unsigned int num_tiles_x = std::ceil((input->tensor_shape().x() - (kernel_w - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((input->tensor_shape().y() - (kernel_h - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); - - // Compute output shape - const TensorShape output_convolved_shape = misc::shape_calculator::compute_deep_convolution_shape(*input, *weights, conv_info); + const WinogradInfo winograd_info = WinogradInfo(Size2D(2, 2), + Size2D(kernel_w, kernel_h), + Size2D(input_shape[idx_width], input_shape[idx_height]), + conv_info, + input->data_layout()); // Validate input transform - const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, conv_info, Size2D(kernel_w, kernel_h)); + const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info); const TensorInfo input0 = input->clone()->set_tensor_shape(input0_shape); - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradInputTransform::validate(input, &input0, conv_info, Size2D(kernel_w, kernel_h))); + ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradInputTransform::validate(input, &input0, winograd_info)); // Validate filter transform - const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, Size2D(2U, 2U)); + const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info); const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape); - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, Size2D(2U, 2U))); + ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, winograd_info)); // Validate batched matrix multiply TensorShape batched_mm_output_shape = input0.tensor_shape(); @@ -122,10 +123,8 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/))); - // Validate output transform - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradOutputTransformKernel::validate(&batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], - output_convolved_shape[idx_height]), - Size2D(num_tiles_x, num_tiles_y))); + // Configure output transform + ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradOutputTransformKernel::validate(&batched_mm_output, biases, output, winograd_info)); // Validate Activation Layer if(act_info.enabled()) diff --git a/src/runtime/CL/functions/CLWinogradInputTransform.cpp b/src/runtime/CL/functions/CLWinogradInputTransform.cpp index 0499d4cd2f..09e84564e2 100644 --- a/src/runtime/CL/functions/CLWinogradInputTransform.cpp +++ b/src/runtime/CL/functions/CLWinogradInputTransform.cpp @@ -30,16 +30,16 @@ using namespace arm_compute; -void CLWinogradInputTransform::configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +void CLWinogradInputTransform::configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, conv_info, kernel_dims); + k->configure(input, output, winograd_info); _kernel = std::move(k); _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); } -Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &conv_info, const Size2D &kernel_dims) +Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradInputTransformKernel::validate(input, output, conv_info, kernel_dims)); + ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradInputTransformKernel::validate(input, output, winograd_info)); return Status{}; } diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index e939a6f5a7..928ff736bc 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -372,6 +372,38 @@ public: } }; +/** Data set containing small 3x3 tensor shapes. */ +class Small3x3Shapes final : public ShapeDataset +{ +public: + Small3x3Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 3U, 3U, 7U, 4U }, + TensorShape{ 3U, 3U, 4U, 13U }, + TensorShape{ 3U, 3U, 9U, 2U }, + TensorShape{ 3U, 3U, 3U, 5U }, + }) + { + } +}; + +/** Data set containing large 3x3 tensor shapes. */ +class Large3x3Shapes final : public ShapeDataset +{ +public: + Large3x3Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 3U, 3U, 32U, 64U }, + TensorShape{ 3U, 3U, 51U, 13U }, + TensorShape{ 3U, 3U, 53U, 47U }, + TensorShape{ 3U, 3U, 128U, 384U }, + }) + { + } +}; + /** Data set containing small tensor shapes for deconvolution. */ class SmallDeconvolutionShapes final : public ShapeDataset { diff --git a/tests/datasets/WinogradFilterTransformDataset.h b/tests/datasets/WinogradFilterTransformDataset.h deleted file mode 100644 index 07d0283b55..0000000000 --- a/tests/datasets/WinogradFilterTransformDataset.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_WINOGRAD_FILTER_TRANSFORM_DATASET -#define ARM_COMPUTE_TEST_WINOGRAD_FILTER_TRANSFORM_DATASET - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class WinogradFilterTransformDataset -{ -public: - using type = std::tuple; - - struct iterator - { - iterator(std::vector::const_iterator a_it, - std::vector::const_iterator is_nchw_it) - : _a_it{ std::move(a_it) }, - _is_nchw_it{ std::move(is_nchw_it) } - { - } - - std::string description() const - { - std::stringstream description; - description << "Input=" << *_a_it << ":"; - description << "IsNCHW=" << *_is_nchw_it << ":"; - return description.str(); - } - - WinogradFilterTransformDataset::type operator*() const - { - return std::make_tuple(*_a_it, *_is_nchw_it); - } - - iterator &operator++() - { - ++_a_it; - ++_is_nchw_it; - - return *this; - } - - private: - std::vector::const_iterator _a_it; - std::vector::const_iterator _is_nchw_it; - }; - - iterator begin() const - { - return iterator(_a_shapes.begin(), _is_nchw.begin()); - } - - int size() const - { - return std::min(_a_shapes.size(), _is_nchw.size()); - } - - void add_config(TensorShape a, bool is_nchw) - { - _a_shapes.emplace_back(std::move(a)); - _is_nchw.emplace_back(std::move(is_nchw)); - } - -protected: - WinogradFilterTransformDataset() = default; - WinogradFilterTransformDataset(WinogradFilterTransformDataset &&) = default; - -private: - std::vector _a_shapes{}; - std::vector _is_nchw{}; -}; - -class SmallWinogradFilterTransformDataset final : public WinogradFilterTransformDataset -{ -public: - SmallWinogradFilterTransformDataset() - { - add_config(TensorShape(3U, 3U, 7U, 4U), true); - add_config(TensorShape(3U, 3U, 4U, 13U), true); - add_config(TensorShape(3U, 3U, 9U, 2U), true); - add_config(TensorShape(3U, 3U, 3U, 5U), true); - } -}; - -class LargeWinogradFilterTransformDataset final : public WinogradFilterTransformDataset -{ -public: - LargeWinogradFilterTransformDataset() - { - add_config(TensorShape(3U, 3U, 32U, 64U), true); - add_config(TensorShape(3U, 3U, 51U, 13U), true); - add_config(TensorShape(3U, 3U, 53U, 47U), true); - add_config(TensorShape(3U, 3U, 128U, 384U), true); - } -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_WINOGRAD_FILTER_TRANSFORM_DATASET */ diff --git a/tests/datasets/WinogradInputTransformDataset.h b/tests/datasets/WinogradInputTransformDataset.h index 07e41ebf7a..625daa0e6e 100644 --- a/tests/datasets/WinogradInputTransformDataset.h +++ b/tests/datasets/WinogradInputTransformDataset.h @@ -37,13 +37,12 @@ namespace datasets class WinogradInputTransformDataset { public: - using type = std::tuple; + using type = std::tuple; struct iterator { - iterator(std::vector::const_iterator in_it, std::vector::const_iterator info_it, std::vector::const_iterator kernel_dims_it, - std::vector::const_iterator format_it) - : _in_it{ std::move(in_it) }, _info_it{ std::move(info_it) }, _kernel_dims_it{ std::move(kernel_dims_it) }, _format_it{ std::move(format_it) } + iterator(std::vector::const_iterator in_it, std::vector::const_iterator info_it) + : _in_it{ std::move(in_it) }, _info_it{ std::move(info_it) } { } @@ -51,50 +50,42 @@ public: { std::stringstream description; description << "In=" << *_in_it << ":"; - description << "Info=" << *_info_it; - description << "KernelDims=" << *_kernel_dims_it; - description << "IsNCHW=" << *_format_it; + description << "WinogradInfo=" << *_info_it; return description.str(); } WinogradInputTransformDataset::type operator*() const { - return std::make_tuple(*_in_it, *_info_it, *_kernel_dims_it, *_format_it); + return std::make_tuple(*_in_it, *_info_it); } iterator &operator++() { ++_in_it; ++_info_it; - ++_kernel_dims_it; - ++_format_it; return *this; } private: - std::vector::const_iterator _in_it; - std::vector::const_iterator _info_it; - std::vector::const_iterator _kernel_dims_it; - std::vector::const_iterator _format_it; + std::vector::const_iterator _in_it; + std::vector::const_iterator _info_it; }; iterator begin() const { - return iterator(_in_shapes.begin(), _infos.begin(), _kernel_dims.begin(), _format.begin()); + return iterator(_in_shapes.begin(), _infos.begin()); } int size() const { - return std::min(_in_shapes.size(), std::min(_infos.size(), std::min(_kernel_dims.size(), _format.size()))); + return std::min(_in_shapes.size(), _infos.size()); } - void add_config(TensorShape in, PadStrideInfo info, Size2D kernel_dims, bool format) + void add_config(TensorShape in, WinogradInfo info) { _in_shapes.emplace_back(std::move(in)); _infos.emplace_back(std::move(info)); - _kernel_dims.emplace_back(std::move(kernel_dims)); - _format.emplace_back(std::move(format)); } protected: @@ -102,10 +93,8 @@ protected: WinogradInputTransformDataset(WinogradInputTransformDataset &&) = default; private: - std::vector _in_shapes{}; - std::vector _infos{}; - std::vector _kernel_dims{}; - std::vector _format{}; + std::vector _in_shapes{}; + std::vector _infos{}; }; class SmallWinogradInputTransformDataset final : public WinogradInputTransformDataset @@ -113,13 +102,13 @@ class SmallWinogradInputTransformDataset final : public WinogradInputTransformDa public: SmallWinogradInputTransformDataset() { - add_config(TensorShape(9U, 9U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); - add_config(TensorShape(27U, 13U, 2U), PadStrideInfo(1, 1, 0, 0), Size2D(3U, 3U), true); - add_config(TensorShape(128U, 64U, 1U, 3U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); - add_config(TensorShape(9U, 9U, 3U, 4U), PadStrideInfo(1, 1, 0, 0), Size2D(3U, 3U), true); - add_config(TensorShape(27U, 13U, 2U, 4U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); - add_config(TensorShape(9U, 9U, 3U, 5U), PadStrideInfo(1, 1, 0, 0), Size2D(3U, 3U), true); - add_config(TensorShape(14U, 14U, 512U, 2U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); + add_config(TensorShape(9U, 9U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 4U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); } }; @@ -128,10 +117,10 @@ class LargeWinogradInputTransformDataset final : public WinogradInputTransformDa public: LargeWinogradInputTransformDataset() { - add_config(TensorShape(42U, 37U, 8U, 15U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); - add_config(TensorShape(57U, 60U, 13U, 8U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 3U), true); - add_config(TensorShape(128U, 64U, 21U, 13U), PadStrideInfo(1, 1, 0, 0), Size2D(3U, 3U), true); - add_config(TensorShape(83U, 72U, 14U, 5U), PadStrideInfo(1, 1, 0, 0), Size2D(3U, 3U), true); + add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(42U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(57U, 60U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); } }; } // namespace datasets diff --git a/tests/datasets/WinogradOutputTransformDataset.h b/tests/datasets/WinogradOutputTransformDataset.h index c42d6c8ebd..35fce952cd 100644 --- a/tests/datasets/WinogradOutputTransformDataset.h +++ b/tests/datasets/WinogradOutputTransformDataset.h @@ -37,20 +37,14 @@ namespace datasets class WinogradOutputTransformDataset { public: - using type = std::tuple; + using type = std::tuple; struct iterator { - iterator(std::vector::const_iterator a_it, - std::vector::const_iterator b_it, - std::vector::const_iterator c_it, - std::vector::const_iterator d_it, - std::vector::const_iterator data_layout_it) + iterator(std::vector::const_iterator a_it, + std::vector::const_iterator info_it) : _a_it{ std::move(a_it) }, - _b_it{ std::move(b_it) }, - _c_it{ std::move(c_it) }, - _d_it{ std::move(d_it) }, - _data_layout_it{ std::move(data_layout_it) } + _info_it{ std::move(info_it) } { } @@ -58,54 +52,42 @@ public: { std::stringstream description; description << "Input=" << *_a_it << ":"; - description << "KernelDims=" << *_b_it << ":"; - description << "OutputDims=" << *_c_it << ":"; - description << "NumTiles=" << *_d_it << ":"; - description << "DataLayout=" << *_data_layout_it; + description << "WinogradInfo=" << *_info_it << ":"; return description.str(); } WinogradOutputTransformDataset::type operator*() const { - return std::make_tuple(*_a_it, *_b_it, *_c_it, *_d_it, *_data_layout_it); + return std::make_tuple(*_a_it, *_info_it); } iterator &operator++() { ++_a_it; - ++_b_it; - ++_c_it; - ++_d_it; - ++_data_layout_it; + ++_info_it; return *this; } private: - std::vector::const_iterator _a_it; - std::vector::const_iterator _b_it; - std::vector::const_iterator _c_it; - std::vector::const_iterator _d_it; - std::vector::const_iterator _data_layout_it; + std::vector::const_iterator _a_it; + std::vector::const_iterator _info_it; }; iterator begin() const { - return iterator(_a_shapes.begin(), _b_dims.begin(), _c_dims.begin(), _d_dims.begin(), _data_layout.begin()); + return iterator(_a_shapes.begin(), _info.begin()); } int size() const { - return std::min(_a_shapes.size(), std::min(_b_dims.size(), std::min(_c_dims.size(), std::min(_d_dims.size(), _data_layout.size())))); + return std::min(_a_shapes.size(), _info.size()); } - void add_config(TensorShape a, Size2D b, Size2D c, Size2D d, DataLayout data_layout) + void add_config(TensorShape a, WinogradInfo b) { _a_shapes.emplace_back(std::move(a)); - _b_dims.emplace_back(std::move(b)); - _c_dims.emplace_back(std::move(c)); - _d_dims.emplace_back(std::move(d)); - _data_layout.emplace_back(std::move(data_layout)); + _info.emplace_back(std::move(b)); } protected: @@ -113,11 +95,8 @@ protected: WinogradOutputTransformDataset(WinogradOutputTransformDataset &&) = default; private: - std::vector _a_shapes{}; - std::vector _b_dims{}; - std::vector _c_dims{}; - std::vector _d_dims{}; - std::vector _data_layout{}; + std::vector _a_shapes{}; + std::vector _info{}; }; class SmallWinogradOutputTransformDataset final : public WinogradOutputTransformDataset @@ -125,12 +104,12 @@ class SmallWinogradOutputTransformDataset final : public WinogradOutputTransform public: SmallWinogradOutputTransformDataset() { - add_config(TensorShape(24U, 49U, 16U), Size2D(3, 3), Size2D(14U, 14U), Size2D(7U, 7U), DataLayout::NCHW); - add_config(TensorShape(13U, 6U, 16U), Size2D(3, 3), Size2D(5U, 4U), Size2D(3U, 2U), DataLayout::NCHW); - add_config(TensorShape(7U, 20U, 16U), Size2D(3, 3), Size2D(8U, 9U), Size2D(4U, 5U), DataLayout::NCHW); - add_config(TensorShape(24U, 49U, 16U, 3U), Size2D(3, 3), Size2D(14U, 14U), Size2D(7U, 7U), DataLayout::NCHW); - add_config(TensorShape(13U, 6U, 16U, 2U), Size2D(3, 3), Size2D(5U, 4U), Size2D(3U, 2U), DataLayout::NCHW); - add_config(TensorShape(7U, 20U, 16U, 5U), Size2D(3, 3), Size2D(8U, 9U), Size2D(4U, 5U), DataLayout::NCHW); + add_config(TensorShape(13U, 6U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 20U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(1U, 442U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(7U, 12U, 16U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(24U, 49U, 16U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(7U, 12U, 16U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); } }; @@ -139,12 +118,12 @@ class LargeWinogradOutputTransformDataset final : public WinogradOutputTransform public: LargeWinogradOutputTransformDataset() { - add_config(TensorShape(128U, 3136U, 16U), Size2D(3, 3), Size2D(112U, 112U), Size2D(56U, 56U), DataLayout::NCHW); - add_config(TensorShape(256U, 784U, 16U), Size2D(3, 3), Size2D(55U, 55U), Size2D(28U, 28U), DataLayout::NCHW); - add_config(TensorShape(512U, 169U, 16U), Size2D(3, 3), Size2D(26U, 26U), Size2D(13U, 13U), DataLayout::NCHW); - add_config(TensorShape(128U, 3136U, 16U, 3U), Size2D(3, 3), Size2D(112U, 112U), Size2D(56U, 56U), DataLayout::NCHW); - add_config(TensorShape(256U, 784U, 16U, 2U), Size2D(3, 3), Size2D(55U, 55U), Size2D(28U, 28U), DataLayout::NCHW); - add_config(TensorShape(512U, 169U, 16U, 5U), Size2D(3, 3), Size2D(26U, 26U), Size2D(13U, 13U), DataLayout::NCHW); + add_config(TensorShape(64U, 12544U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(224U, 224U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(32U, 3080U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 756U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(64U, 12544U, 16U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(224U, 224U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(32U, 3080U, 16U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 756U, 16U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); } }; } // namespace datasets diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 9aba8f776c..8fa5826470 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -35,7 +35,6 @@ #include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SmallConvolutionLayerDataset.h" -#include "tests/datasets/WinogradFilterTransformDataset.h" #include "tests/datasets/WinogradInputTransformDataset.h" #include "tests/datasets/WinogradOutputTransformDataset.h" #include "tests/framework/Asserts.h" @@ -64,7 +63,7 @@ TEST_SUITE(InputTransform) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo",{ TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F16), // F16 not supported TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported @@ -83,44 +82,34 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(7U, 320U, 16U, 3U), 1, DataType::F32), TensorInfo(TensorShape(37U, 304U, 16U), 1, DataType::F32) })), - framework::dataset::make("PadStrideInfo", { - PadStrideInfo(1, 1, 1, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(2, 1, 1, 1), - PadStrideInfo(1, 1, 0, 1), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 1, 1) - })), - framework::dataset::make("KernelDims", { - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(5U, 5U), - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(3U, 3U) + framework::dataset::make("WinogradInfo", { + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(2, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(34U, 42U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), + WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(31U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW) })), framework::dataset::make("Expected", { false, false, false, false, false, false, false })), - input_info, output_info, conv_info, kernel_dims, expected) + input_info, output_info, winograd_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, kernel_dims)) == expected, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* using CLWinogradInputTransformFixture = WinogradInputTransformValidationFixture; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallWinogradInputTransformDataset(), datasets::LargeWinogradInputTransformDataset()), +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallWinogradInputTransformDataset(), datasets::LargeWinogradInputTransformDataset()), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("DataType", { DataType::F32 })), - shape_in, conv_info, kernel_dims, is_nchw_format, data_type) + shape_in, winograd_info, data_layout, data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); - - TensorShape shape_out = compute_winograd_input_transform_shape(TensorInfo(shape_in, 1, data_type), conv_info, kernel_dims); + TensorShape shape_out = compute_winograd_input_transform_shape(TensorInfo(shape_in, 1, data_type), winograd_info); // Create tensors - CLTensor in = create_tensor(shape_in, data_type); + CLTensor in = create_tensor(shape_in, data_type, 1, 0, QuantizationInfo(), data_layout); CLTensor out = create_tensor(shape_out, data_type); ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -130,15 +119,19 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da CLWinogradInputTransform winograd_input_transform; // Configure the function - winograd_input_transform.configure(&in, &out, conv_info, kernel_dims); + winograd_input_transform.configure(&in, &out, winograd_info); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallWinogradInputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallWinogradInputTransformDataset(), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) { validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradInputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeWinogradInputTransformDataset(), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) { validate(CLAccessor(_target), _reference); } @@ -166,19 +159,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(2U, 37U, 16U), 1, DataType::F32), TensorInfo(TensorShape(22U, 37U, 36U), 1, DataType::F32) })), - framework::dataset::make("OutputTile", { - Size2D(2U, 2U), - Size2D(2U, 2U), - Size2D(2U, 2U), - Size2D(3U, 3U), - Size2D(2U, 2U), - Size2D(2U, 2U), - Size2D(4U, 4U) - })), + framework::dataset::make("WinogradInfo", { + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(3U, 3U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), + WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ) + })), framework::dataset::make("Expected", { false, false, false, false, true, true, true })), - input_info, output_info, output_tile, expected) + input_info, output_info, winograd_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLWinogradFilterTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), output_tile)) == expected, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bool(CLWinogradFilterTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* @@ -186,36 +179,40 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( using CLWinogradFilterTransform = CLSynthetizeFunctionWithZeroConstantBorder; using CLWinogradFilterTransformFixture = WinogradFilterTransformValidationFixture; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallWinogradFilterTransformDataset(), datasets::LargeWinogradFilterTransformDataset()), - framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::Small3x3Shapes(), datasets::Large3x3Shapes()), + framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("DataType", { DataType::F32 })), - shape_a, is_nchw_format, output_tile, data_type) + shape_a, output_tile, data_layout, data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); + WinogradInfo winograd_info(output_tile, Size2D(shape_a[0], shape_a[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */); - TensorShape shape_b = compute_winograd_filter_transform_shape(TensorInfo(shape_a, 1, data_type), output_tile); + TensorShape shape_b = compute_winograd_filter_transform_shape(TensorInfo(shape_a, 1, data_type), winograd_info); // Create tensors - CLTensor a = create_tensor(shape_a, data_type); - CLTensor b = create_tensor(shape_b, data_type); + CLTensor a = create_tensor(shape_a, data_type, 1, 0, QuantizationInfo(), data_layout); + CLTensor b = create_tensor(shape_b, data_type, 1, 0, QuantizationInfo(), data_layout); ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); // Create and configure function CLWinogradFilterTransform winograd_filter_transform; - winograd_filter_transform.configure(&a, &b, output_tile); + winograd_filter_transform.configure(&a, &b, winograd_info); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallWinogradFilterTransformDataset(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small3x3Shapes(), + framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeWinogradFilterTransformDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output @@ -227,65 +224,47 @@ TEST_SUITE_END() // FilterTransform TEST_SUITE(OutputTransform) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo",{ - TensorInfo(TensorShape(24U, 49U, 16U, 5U), 1, DataType::F16), // F16 not supported - TensorInfo(TensorShape(128U, 3136U, 16U, 5U), 1, DataType::QASYMM8), // QASYMM8 not supported - TensorInfo(TensorShape(256U, 784U, 16U, 5U), 1, DataType::F32), // Kernel size not supported - TensorInfo(TensorShape(512U, 169U, 16U, 5U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(13U, 6U, 16U, 4U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(7U, 16U, 16U, 7U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(1U, 442U, 16U, 37U), 1, DataType::F32) // Wrong number of tiles + TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F16), // F16 not supported + TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::QASYMM8), // QASYMM8 not supported + TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32), // Kernel size not supported + TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(13U, 108U, 16U, 4U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32) // Wrong WinogradInfo }), framework::dataset::make("BiasInfo", { - TensorInfo(TensorShape(24U), 1, DataType::F16), - TensorInfo(TensorShape(128U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(256U), 1, DataType::F32), + TensorInfo(TensorShape(512U), 1, DataType::F16), + TensorInfo(TensorShape(512U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(512U), 1, DataType::F32), TensorInfo(TensorShape(512U), 1, DataType::F32), TensorInfo(TensorShape(13U), 1, DataType::F32), TensorInfo(TensorShape(7U), 1, DataType::F32), - TensorInfo(TensorShape(1U), 1, DataType::F32) + TensorInfo(TensorShape(7U), 1, DataType::F32) })), framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(14U, 14U, 24U, 5U), 1, DataType::F16), - TensorInfo(TensorShape(112U, 112U, 128U, 5U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(55U, 55U, 256U, 5U), 1, DataType::F32), - TensorInfo(TensorShape(26U, 26U, 512U, 5U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 4U, 13U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 8U, 7U, 7U), 1, DataType::F32), - TensorInfo(TensorShape(51U, 33U, 1U, 37U), 1, DataType::F32) - })), - framework::dataset::make("KernelDims", { - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(5U, 5U), - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(3U, 3U), - Size2D(3U, 3U) - })), - framework::dataset::make("OutputDims", { - Size2D(14U, 14U), - Size2D(112U, 112U), - Size2D(55U, 55U), - Size2D(26U, 26U), - Size2D(5U, 4U), - Size2D(8U, 8U), - Size2D(51U, 33U) + TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F16), + TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 23U, 13U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 10U, 7U, 7U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 9U, 7U, 7U), 1, DataType::F32) })), - framework::dataset::make("NumTiles", { - Size2D(7U, 7U), - Size2D(56U, 56U), - Size2D(28U, 28U), - Size2D(13U, 13U), - Size2D(3U, 2U), - Size2D(4U, 4U), - Size2D(26U, 16U) + framework::dataset::make("WinogradInfo", { + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 2U), Size2D(5U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(17U, 23U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), + WinogradInfo(Size2D(2U, 3U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), })), framework::dataset::make("Expected", { false, false, false, true, false, true, false })), - input_info, bias_info, output_info, kernel_dims, output_dims, num_tiles, expected) + input_info, bias_info, output_info, winograd_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), kernel_dims, output_dims, num_tiles)) == expected, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* @@ -295,9 +274,9 @@ using CLWinogradOutputTransformFixture = WinogradOutputTransformValidationFixtur DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallWinogradOutputTransformDataset(), datasets::LargeWinogradOutputTransformDataset()), framework::dataset::make("DataType", { DataType::F32 })), - shape_a, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type) + shape_a, winograd_info, data_type) { - TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), output_convolved_dims, data_layout); + TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), winograd_info); // Create tensors CLTensor a = create_tensor(shape_a, data_type); @@ -308,7 +287,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da // Create and configure function CLWinogradOutputTransform winograd_output_transform; - winograd_output_transform.configure(&a, nullptr, &b, kernel_dims, output_convolved_dims, num_tiles); + winograd_output_transform.configure(&a, nullptr, &b, winograd_info); } FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, combine(datasets::SmallWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h index 481eb93e80..17229cac25 100644 --- a/tests/validation/fixtures/WinogradLayerFixture.h +++ b/tests/validation/fixtures/WinogradLayerFixture.h @@ -142,8 +142,9 @@ protected: fill(bias, 2, 0.f, 0.f); } - return (act_info.enabled()) ? reference::activation_layer(reference::convolution_layer(src, weights, bias, output_shape, info), act_info) : reference::convolution_layer(src, weights, bias, - output_shape, info); + SimpleTensor conv_out = reference::convolution_layer(src, weights, bias, output_shape, info); + + return (act_info.enabled()) ? reference::activation_layer(conv_out, act_info) : conv_out; } TensorType _target{}; @@ -155,12 +156,12 @@ class WinogradInputTransformValidationFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, PadStrideInfo conv_info, Size2D kernel_dims, bool is_nchw_format, DataType data_type) + void setup(TensorShape input_shape, WinogradInfo winograd_info, DataLayout data_layout, DataType data_type) { - TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), conv_info, kernel_dims); + TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); - _target = compute_target(input_shape, output_shape, conv_info, kernel_dims, is_nchw_format, data_type); - _reference = compute_reference(input_shape, output_shape, conv_info, kernel_dims, is_nchw_format, data_type); + _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, winograd_info, data_layout, data_type); } protected: @@ -184,16 +185,14 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims, bool is_nchw_format, DataType data_type) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataLayout data_layout, DataType data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); - - TensorType src = create_tensor(input_shape, data_type); - TensorType dst = create_tensor(output_shape, data_type); + TensorType src = create_tensor(input_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + TensorType dst = create_tensor(output_shape, data_type, 1, 0, QuantizationInfo(), data_layout); // Create and configure function FunctionType transf; - transf.configure(&src, &dst, conv_info, kernel_dims); + transf.configure(&src, &dst, winograd_info); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -208,23 +207,21 @@ protected: // Fill tensors fill(AccessorType(src), 0, -1.f, 1.f); - // Compute CLWinogradInputTransform function + // Compute Winograd input transform function transf.run(); return dst; } - SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims, bool is_nchw_format, DataType data_type) + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataLayout data_layout, DataType data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); - // Create reference - SimpleTensor src{ input_shape, data_type }; + SimpleTensor src{ input_shape, data_type, 1, 0, QuantizationInfo(), data_layout }; // Fill reference fill(src, 0, -1.f, 1.f); - return reference::winograd_input_transform(src, output_shape, conv_info, kernel_dims); + return reference::winograd_input_transform(src, output_shape, winograd_info); } TensorType _target{}; @@ -236,12 +233,13 @@ class WinogradFilterTransformValidationFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, bool is_nchw_format, Size2D output_tile, DataType data_type) + void setup(TensorShape input_shape, Size2D output_tile, DataLayout data_layout, DataType data_type) { - TensorShape output_shape = compute_winograd_filter_transform_shape(TensorInfo(input_shape, 1, data_type), output_tile); + WinogradInfo winograd_info(output_tile, Size2D(input_shape[0], input_shape[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */); + TensorShape output_shape = compute_winograd_filter_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); - _target = compute_target(input_shape, output_shape, is_nchw_format, output_tile, data_type); - _reference = compute_reference(input_shape, output_shape, is_nchw_format, output_tile, data_type); + _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, winograd_info, data_layout, data_type); } protected: @@ -265,17 +263,15 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, bool is_nchw_format, const Size2D &output_tile, DataType data_type) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataLayout data_layout, DataType data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); - // Create tensors - TensorType src = create_tensor(input_shape, data_type, 1); - TensorType dst = create_tensor(output_shape, data_type, 1); + TensorType src = create_tensor(input_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + TensorType dst = create_tensor(output_shape, data_type, 1, 0, QuantizationInfo(), data_layout); // Create and configure function FunctionType filter_transform; - filter_transform.configure(&src, &dst, output_tile); + filter_transform.configure(&src, &dst, winograd_info); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -295,17 +291,15 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, bool is_nchw_format, const Size2D &output_tile, DataType data_type) + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataLayout data_layout, DataType data_type) { - ARM_COMPUTE_UNUSED(is_nchw_format); - // Create reference - SimpleTensor src{ input_shape, data_type, 1 }; + SimpleTensor src{ input_shape, data_type, 1, 0, QuantizationInfo(), data_layout }; // Fill reference fill(src, 0, -1.f, 1.f); - return reference::winograd_filter_transform(src, output_shape, output_tile); + return reference::winograd_filter_transform(src, output_shape, winograd_info); } TensorType _target{}; @@ -317,12 +311,12 @@ class WinogradOutputTransformValidationFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, Size2D kernel_dims, Size2D output_convolved_dims, Size2D num_tiles, DataLayout data_layout, DataType data_type) + void setup(TensorShape input_shape, WinogradInfo winograd_info, DataType data_type) { - TensorShape output_shape = compute_winograd_output_transform_shape(TensorInfo(input_shape, 1, data_type), output_convolved_dims, data_layout); + TensorShape output_shape = compute_winograd_output_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); - _target = compute_target(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); - _reference = compute_reference(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); + _target = compute_target(input_shape, output_shape, winograd_info, data_type); + _reference = compute_reference(input_shape, output_shape, winograd_info, data_type); } protected: @@ -346,16 +340,15 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, DataLayout data_layout, - DataType data_type) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataType data_type) { // Create tensors - TensorType src = create_tensor(input_shape, data_type, 1, 0, QuantizationInfo(), data_layout); - TensorType dst = create_tensor(output_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + TensorType src = create_tensor(input_shape, data_type); + TensorType dst = create_tensor(output_shape, data_type, 1, 0, QuantizationInfo(), winograd_info.output_data_layout); // Create and configure function FunctionType output_transform; - output_transform.configure(&src, nullptr, &dst, kernel_dims, output_convolved_dims, num_tiles); + output_transform.configure(&src, nullptr, &dst, winograd_info); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -375,17 +368,15 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, - DataLayout data_layout, - DataType data_type) + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const WinogradInfo &winograd_info, DataType data_type) { // Create reference - SimpleTensor src{ input_shape, data_type, 1, 0, QuantizationInfo(), data_layout }; + SimpleTensor src{ input_shape, data_type }; // Fill reference fill(src, 0, -1.f, 1.f); - return reference::winograd_output_transform(src, output_shape, kernel_dims, num_tiles); + return reference::winograd_output_transform(src, output_shape, winograd_info); } TensorType _target{}; diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp index ad0dcbd958..604e25214b 100644 --- a/tests/validation/reference/Winograd.cpp +++ b/tests/validation/reference/Winograd.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/Types.h" +#include + namespace arm_compute { namespace test @@ -39,153 +41,155 @@ namespace reference namespace { template -void winograd_filter_transform3x3(const SimpleTensor &in, SimpleTensor &out, const Size2D &output_tile) +void initialize_matrix_transform(SimpleTensor &src, const Size2D &output_tile_size, const Size2D &kernel_size, WinogradTransformType winograd_transform_type) { - const bool is_2x2 = (output_tile.width == 2); - const unsigned int transf_side = is_2x2 ? 4u : 6u; + ARM_COMPUTE_ERROR_ON((output_tile_size != Size2D(2U, 2U)) && (output_tile_size != Size2D(4U, 4U))); + ARM_COMPUTE_ERROR_ON(kernel_size != Size2D(3U, 3U)); - // Simple tensor for the 3x3 input tile - SimpleTensor input_tile{ TensorShape(3u, 3u), in.data_type(), 1 }; + // Winograd input transform matrices + static const float imatrix2x2_3x3[] = + { + 1.0f, 0.0f, -1.0f, 0.0f, + 0.0f, 1.0f, 1.0f, 0.0f, + 0.0f, -1.0f, 1.0f, 0.0f, + 0.0f, 1.0f, 0.0f, -1.0f + }; - // Simple tensor for the transformation matrix - SimpleTensor trans_matrix{ TensorShape(3u, transf_side), in.data_type(), 1 }; + static const float imatrix4x4_3x3[] = + { + 4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f, + 0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f, + 0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f, + 0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f, + 0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f, + 0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f, + }; + + // ------------------------------------------ + + // Winograd filter transform matrices + static const float fmatrix2x2_3x3[] = + { + 1.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, + 0.5f, -0.5f, 0.5f, + 0.0f, 0.0f, 1.0f + }; - // Simple tensor for the transformation matrix transpose - SimpleTensor trans_matrix_transposed{ TensorShape(transf_side, 3u), in.data_type(), 1 }; + static const float fmatrix4x4_3x3[] = + { + 0.25f, 0.0f, 0.0f, + -1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f, + -1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f, + 1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f, + 1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f, + 0.0f, 0.0f, 1.0f + }; + + // ------------------------------------------ + + // Winograd output transform matrices + static const float omatrix2x2_3x3[] = + { + 1.0f, 1.0f, 1.0f, 0.0f, + 0.0f, 1.0f, -1.0f, -1.0f + }; + + static const float omatrix4x4_3x3[] = + { + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, + 0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f, + 0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f, + 0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f + }; - // Simple tensor for the 3xSide temporary tile - SimpleTensor tmp_tile{ TensorShape(3u, transf_side), in.data_type(), 1 }; + // ------------------------------------------ - // Simple tensor for the SidexSide output tile - SimpleTensor transf_tile{ TensorShape(transf_side, transf_side), in.data_type(), 1 }; + using WinogradKey = std::tuple, std::pair, WinogradTransformType>; - if(is_2x2) + // Key = (Output tile size, Kernel size, Winograd transform type) + static std::map matrix_map = { - // Initialize 3x4 transformation matrix - // 1 | 0 | 0 - // 0.5 | 0.5 | 0.5 - // 0.5 |-0.5 | 0.5 - // 0 | 0 | 1 - trans_matrix[0 + 0 * 3] = 1.0f; - trans_matrix[1 + 0 * 3] = 0.0f; - trans_matrix[2 + 0 * 3] = 0.0f; - trans_matrix[0 + 1 * 3] = 0.5f; - trans_matrix[1 + 1 * 3] = 0.5f; - trans_matrix[2 + 1 * 3] = 0.5f; - trans_matrix[0 + 2 * 3] = 0.5f; - trans_matrix[1 + 2 * 3] = -0.5f; - trans_matrix[2 + 2 * 3] = 0.5f; - trans_matrix[0 + 3 * 3] = 0.0f; - trans_matrix[1 + 3 * 3] = 0.0f; - trans_matrix[2 + 3 * 3] = 1.0f; + { WinogradKey(std::pair(2, 2), std::pair(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 }, + { WinogradKey(std::pair(4, 4), std::pair(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 }, + { WinogradKey(std::pair(2, 2), std::pair(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 }, + { WinogradKey(std::pair(4, 4), std::pair(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 }, + { WinogradKey(std::pair(2, 2), std::pair(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 }, + { WinogradKey(std::pair(4, 4), std::pair(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 }, + }; + + // Find input matrix transform + std::map::iterator it; + + it = matrix_map.find(WinogradKey(std::pair(output_tile_size.width, output_tile_size.height), + std::pair(kernel_size.width, kernel_size.height), + winograd_transform_type)); + + float const *matrix_values = nullptr; + if(it != matrix_map.end()) + { + // Get matrix pointer + matrix_values = it->second; } else { - // Initialize 3x6 transformation matrix - // 1/4 | 0 | 0 - // -1/6 | -1/6 | -1/6 - // -1/6 | 1/6 | -1/6 - // 1/24 | 1/12 | 1/6 - // 1/24 | -1/12 | 1/6 - // 0 | 0 | 1 - trans_matrix[0 + 0 * 3] = 1.0f / 4.0f; - trans_matrix[1 + 0 * 3] = 0.0f; - trans_matrix[2 + 0 * 3] = 0.0f; - trans_matrix[0 + 1 * 3] = -1.0f / 6.0f; - trans_matrix[1 + 1 * 3] = -1.0f / 6.0f; - trans_matrix[2 + 1 * 3] = -1.0f / 6.0f; - trans_matrix[0 + 2 * 3] = -1.0f / 6.0f; - trans_matrix[1 + 2 * 3] = 1.0f / 6.0f; - trans_matrix[2 + 2 * 3] = -1.0f / 6.0f; - trans_matrix[0 + 3 * 3] = 1.0f / 24.0f; - trans_matrix[1 + 3 * 3] = 1.0f / 12.0f; - trans_matrix[2 + 3 * 3] = 1.0f / 6.0f; - trans_matrix[0 + 4 * 3] = 1.0f / 24.0f; - trans_matrix[1 + 4 * 3] = -1.0f / 12.0f; - trans_matrix[2 + 4 * 3] = 1.0f / 6.0f; - trans_matrix[0 + 5 * 3] = 0.0f; - trans_matrix[1 + 5 * 3] = 0.0f; - trans_matrix[2 + 5 * 3] = 1.0f; + ARM_COMPUTE_ERROR("Winograd configuration not supported"); } - // Transpose the transformation matrix - transpose_matrix(trans_matrix, trans_matrix_transposed); + // Copy values + std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]); +} +} // namespace - const int num_channels = in.shape()[2]; - const int num_filters = in.shape()[3]; - const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters); +template +SimpleTensor winograd_input_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info) +{ + ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW); - for(int n = 0; n < num_batches; ++n) - { - for(int w = 0; w < num_filters; ++w) - { - for(int z = 0; z < num_channels; ++z) - { - // Load the 3x3 tile from the input tensor - get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; - // First transformation - matrix_multiply(trans_matrix, input_tile, tmp_tile); + SimpleTensor out{ output_shape, in.data_type() }; - // Second transformation - matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile); + // Calculate dimensions for the tile + const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1; + const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1; - // Store the 4x4 output tile across the 16 channels - const int output_offset = w + z * num_filters; + TensorShape tile_dims(tile_w, tile_h); - for(unsigned int out_h = 0, out_pos = 0; out_h < transf_side; ++out_h) - { - for(unsigned int out_w = 0; out_w < transf_side; ++out_w, ++out_pos) - { - out[output_offset + out_pos * num_filters * num_channels] = transf_tile[out_w + out_h * transf_side]; - } - } - } - } - } -} + // Simple tensor for the input tile + SimpleTensor src_tile{ tile_dims, in.data_type() }; -template -void winograd_input_transform3x3(const SimpleTensor &src, SimpleTensor &dst, const PadStrideInfo &conv_info) -{ - TensorShape shape4x4(4u, 4u); - - // Simple tensor for the 4x4 input tile - SimpleTensor src_tile{ shape4x4, src.data_type() }; + // Simple tensor for the temporary tile + SimpleTensor tmp_tile{ tile_dims, in.data_type() }; - // Simple tensor for the 4x4 temporary tile - SimpleTensor tmp_tile{ shape4x4, src.data_type() }; - - // Simple tensor for the 4x4 output tile - SimpleTensor dst_tile{ shape4x4, src.data_type() }; + // Simple tensor for the output tile + SimpleTensor dst_tile{ tile_dims, in.data_type() }; // Simple tensor for the transformation matrix - SimpleTensor matrix{ shape4x4, src.data_type() }; + SimpleTensor matrix{ tile_dims, in.data_type() }; // Simple tensor for the transformation matrix transposed - SimpleTensor matrix_transposed{ shape4x4, src.data_type() }; - - const float matrix_values[] = { 1.f, 0.f, -1.f, 0.f, - 0.f, 1.f, 1.f, 0.f, - 0.f, -1.f, 1.f, 0.f, - 0.f, 1.f, 0.f, -1.f - }; + SimpleTensor matrix_transposed{ tile_dims, in.data_type() }; - for(int i = 0; i < matrix.num_elements(); ++i) - { - matrix[i] = matrix_values[i]; - } + // Initialize matrix for the input transform + initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT); + // Transpose matrix transpose_matrix(matrix, matrix_transposed); - const int in_w = src.shape().x(); - const int in_h = src.shape().y(); - const int in_d = src.shape().z(); - const int num_batches = src.shape().total_size() / (in_w * in_h * in_d); - const int num_tiles_x = std::ceil((in_w - 2 + conv_info.pad_left() + conv_info.pad_right()) / 2.0f); - const int num_tiles_y = std::ceil((in_h - 2 + conv_info.pad_top() + conv_info.pad_bottom()) / 2.0f); + const int in_w = in.shape().x(); + const int in_h = in.shape().y(); + const int in_d = in.shape().z(); + const int out_d = out.shape().z(); + const int num_batches = in.shape().total_size() / (in_w * in_h * in_d); + const int num_tiles_x = std::ceil((in_w - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width)); + const int num_tiles_y = std::ceil((in_h - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height)); + const int step_x = output_tile_size.width; + const int step_y = output_tile_size.height; - ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast(dst.shape().y())); + ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast(out.shape().y())); for(int b = 0; b < num_batches; ++b) { @@ -195,61 +199,154 @@ void winograd_input_transform3x3(const SimpleTensor &src, SimpleTensor &ds { for(int x = 0; x < num_tiles_x; ++x) { - int xi = x * 2 - conv_info.pad_left(); - int yi = y * 2 - conv_info.pad_top(); + int xi = x * step_x - conv_info.pad_left(); + int yi = y * step_y - conv_info.pad_top(); - // Get the 4x4 tile from the input tensor - get_tile(src, src_tile, Coordinates(xi, yi, z, b)); + // Get the tile from the input tensor + get_tile(in, src_tile, Coordinates(xi, yi, z, b)); // Compute the transformation matrix_multiply(matrix, src_tile, tmp_tile); matrix_multiply(tmp_tile, matrix_transposed, dst_tile); - // Store the 4x4 output tile across the 16 channels - for(int i = 0; i < 16; ++i) + // Store the output tile across the channels + for(int i = 0; i < out_d; ++i) { int xo = z; int yo = x + y * num_tiles_x; - dst[coords2index(dst.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i]; + out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i]; } } } } } + + return out; } template -void winograd_output_transform3x3(const SimpleTensor &in, SimpleTensor &out, int num_tiles_x) +SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info) { - ARM_COMPUTE_ERROR_ON(in.shape()[2] != 16); + ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format"); + + // Create reference + SimpleTensor out{ output_shape, in.data_type(), 1 }; + + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; + + TensorShape kernel_tile_dims(kernel_size.width, kernel_size.height); + + // Calculate dimensions for the tile + const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1; + const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1; + const unsigned int input_tile_area = input_tile_w * input_tile_h; + + // Simple tensor for the input tile + SimpleTensor input_tile{ kernel_tile_dims, in.data_type(), 1 }; + + // Simple tensor for the transformation matrix + SimpleTensor trans_matrix{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 }; + + // Simple tensor for the transformation matrix transpose + SimpleTensor trans_matrix_transposed{ TensorShape(input_tile_w, kernel_tile_dims[0]), in.data_type(), 1 }; + + // Simple tensor for the temporary tile + SimpleTensor tmp_tile{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 }; + + // Simple tensor for the output tile + SimpleTensor transf_tile{ TensorShape(input_tile_w, input_tile_w), in.data_type(), 1 }; + + // Initialize matrix for the filter transform + initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER); + + // Transpose the transformation matrix + transpose_matrix(trans_matrix, trans_matrix_transposed); + + const int num_channels = in.shape()[2]; + const int num_filters = in.shape()[3]; + const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters); + + for(int n = 0; n < num_batches; ++n) + { + for(int w = 0; w < num_filters; ++w) + { + for(int z = 0; z < num_channels; ++z) + { + // Load the tile from the input tensor + get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + + // First transformation + matrix_multiply(trans_matrix, input_tile, tmp_tile); + + // Second transformation + matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile); + + // Store the output tile across the channels + const int output_offset = w + z * num_filters; + + // Store the values across the channels + for(unsigned int i = 0; i < input_tile_area; ++i) + { + out[output_offset + i * num_filters * num_channels] = transf_tile[i]; + } + } + } + } + + return out; +} + +template +SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info) +{ + ARM_COMPUTE_ERROR_ON_MSG(winograd_info.output_data_layout != DataLayout::NCHW, "Only supported NCHW data format"); + + const PadStrideInfo conv_info = winograd_info.convolution_info; + const Size2D input_dimensions = winograd_info.input_dimensions; + const Size2D output_tile_size = winograd_info.output_tile_size; + const Size2D kernel_size = winograd_info.kernel_size; + + // Create reference + SimpleTensor out{ output_shape, in.data_type(), 1 }; + + // Calculate dimensions for the tiles + const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1; + const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1; + const unsigned int out_tile_w = output_tile_size.width; + const unsigned int out_tile_h = output_tile_size.height; + + ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h)); ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]); - // Simple tensor for the 3x3 input tile - SimpleTensor input_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; + // Compute tile dimensions + // Input tile dimensions + TensorShape in_tile_dims(in_tile_w, in_tile_h); + + // Output tile dimensions + TensorShape out_tile_dims(output_tile_size.width, output_tile_size.height); + + // Transformation matrix dimensions + TensorShape tr_tile_dims(in_tile_w, output_tile_size.width); + + // Create tensors + // Simple tensor for the input tile + SimpleTensor input_tile{ in_tile_dims, in.data_type(), 1 }; // Simple tensor for the transformation matrix - SimpleTensor trans_matrix{ TensorShape(4u, 2u), in.data_type(), 1 }; + SimpleTensor trans_matrix{ tr_tile_dims, in.data_type(), 1 }; // Simple tensor for the transformation matrix transpose - SimpleTensor trans_matrix_transposed{ TensorShape(2u, 4u), in.data_type(), 1 }; - - // Simple tensor for the 4x3 temporary tile - SimpleTensor tmp_tile{ TensorShape(4u, 2u), in.data_type(), 1 }; - - // Simple tensor for the 4x4 output tile - SimpleTensor output_tile{ TensorShape(2u, 2u), in.data_type(), 1 }; - - // Initialize transformation matrix - // 1 | 1 | 1 | 1 - // 0 | 1 | -1 | -1 - trans_matrix[0 + 0 * 4] = 1.0f; - trans_matrix[1 + 0 * 4] = 1.0f; - trans_matrix[2 + 0 * 4] = 1.0f; - trans_matrix[3 + 0 * 4] = 0.0f; - trans_matrix[0 + 1 * 4] = 0.0f; - trans_matrix[1 + 1 * 4] = 1.0f; - trans_matrix[2 + 1 * 4] = -1.0f; - trans_matrix[3 + 1 * 4] = -1.0f; + SimpleTensor trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 }; + + // Simple tensor for the temporary tile + SimpleTensor tmp_tile{ tr_tile_dims, in.data_type(), 1 }; + + // Simple tensor for the output tile + SimpleTensor output_tile{ out_tile_dims, in.data_type(), 1 }; + + // Initialize matrix for the output transform + initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT); // Transpose the transformation matrix transpose_matrix(trans_matrix, trans_matrix_transposed); @@ -272,13 +369,22 @@ void winograd_output_transform3x3(const SimpleTensor &in, SimpleTensor &ou const int stridez_out = stridey_out * h_out; const int stridew_out = stridez_out * c_out; + // Compute number of elements to process in the X and Y direction + const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); + const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom(); + const int num_tiles_x = std::ceil(num_elements_x / static_cast(output_tile_size.width)); + const int num_tiles_y = std::ceil(num_elements_y / static_cast(output_tile_size.height)); + + ARM_COMPUTE_UNUSED(num_tiles_y); + ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast(num_tiles_x * num_tiles_y)); + for(int n = 0; n < num_batches; ++n) { for(int y = 0; y < h_in; ++y) { for(int x = 0; x < w_in; ++x) { - // Load the 4x4 tile across the 16 channels of the input tensor + // Load the input tile tile across the channels of the input tensor for(int z = 0; z < c_in; ++z) { input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)]; @@ -290,102 +396,34 @@ void winograd_output_transform3x3(const SimpleTensor &in, SimpleTensor &ou // Second transformation matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); - // Store the 2x2 output tile - const int xo = (y % num_tiles_x) * 2; - const int yo = (y / num_tiles_x) * 2; + // Store the output tile + const int xo = (y % num_tiles_x) * out_tile_w; + const int yo = (y / num_tiles_x) * out_tile_h; const int zo = x; - const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out); - out[output_offset + 0 * stridey_out + 0] = output_tile[0 + 0 * 2]; - - // Check out-of-bound writes - if(xo + 1 < w_out) - { - out[output_offset + 0 * stridey_out + 1] = output_tile[1 + 0 * 2]; - } - - if(yo + 1 < h_out) - { - out[output_offset + 1 * stridey_out + 0] = output_tile[0 + 1 * 2]; - } + const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out); - if((yo + 1 < h_out) && (xo + 1 < w_out)) + for(int yi = 0; yi < static_cast(out_tile_h); ++yi) { - out[output_offset + 1 * stridey_out + 1] = output_tile[1 + 1 * 2]; + for(int xi = 0; xi < static_cast(out_tile_w); ++xi) + { + // Check out-of-bound writes + if((xo + xi < w_out) && (yo + yi < h_out)) + { + out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * out_tile_w]; + } + } } } } } -} -} // namespace - -template -SimpleTensor winograd_input_transform(const SimpleTensor &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims) -{ - ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height); - ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NCHW); - - SimpleTensor dst{ dst_shape, src.data_type() }; - - switch(kernel_dims.width) - { - case 3: - winograd_input_transform3x3(src, dst, conv_info); - break; - default: - ARM_COMPUTE_ERROR("Only 3x3 kernels are supported"); - } - - return dst; -} - -template -SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &output_tile) -{ - ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format"); - - // Create reference - SimpleTensor out{ output_shape, in.data_type(), 1 }; - - switch(in.shape()[0]) - { - case 3: - winograd_filter_transform3x3(in, out, output_tile); - break; - default: - ARM_COMPUTE_ERROR("Only supported 3x3 kernel"); - break; - } - - return out; -} - -template -SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles) -{ - ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format"); - ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height); - ARM_COMPUTE_ERROR_ON(in.shape()[1] != num_tiles.area()); - - // Create reference - SimpleTensor out{ output_shape, in.data_type(), 1 }; - - switch(kernel_dims.width) - { - case 3: - winograd_output_transform3x3(in, out, num_tiles.width); - break; - default: - ARM_COMPUTE_ERROR("Only supported 3x3 kernel"); - break; - } return out; } -template SimpleTensor winograd_input_transform(const SimpleTensor &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims); -template SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &output_tile); -template SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); +template SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); +template SimpleTensor winograd_input_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); +template SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Winograd.h b/tests/validation/reference/Winograd.h index 62e136b09d..29181f1142 100644 --- a/tests/validation/reference/Winograd.h +++ b/tests/validation/reference/Winograd.h @@ -36,14 +36,22 @@ namespace validation { namespace reference { +/** Winograd transform type */ +enum class WinogradTransformType +{ + INPUT, /**< Winograd input transform */ + FILTER, /**< Winograd filter transform */ + OUTPUT /**< Winograd output transform */ +}; + template -SimpleTensor winograd_input_transform(const SimpleTensor &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims); +SimpleTensor winograd_input_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); template -SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &output_tile); +SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); template -SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); +SimpleTensor winograd_output_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); } // namespace reference } // namespace validation } // namespace test diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 9ac4b343b3..e26b695809 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -1457,6 +1457,7 @@ inline std::string to_string(const GPUTarget &gpu_target) str << gpu_target; return str.str(); } + /** Formatted output of the DetectionWindow type. */ inline ::std::ostream &operator<<(::std::ostream &os, const DetectionWindow &detection_window) { @@ -1470,11 +1471,29 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DetectionWindow &det return os; } +/** Formatted output of the WinogradInfo type. */ +inline ::std::ostream &operator<<(::std::ostream &os, const WinogradInfo &info) +{ + os << "{OutputTileSize=" << info.output_tile_size << "," + << "KernelSize=" << info.kernel_size << "," + << "PadStride=" << info.convolution_info << "," + << "OutputDataLayout=" << info.output_data_layout << "}"; + + return os; +} + inline std::string to_string(const DetectionWindow &type) { std::stringstream str; str << type; return str.str(); } + +inline std::string to_string(const WinogradInfo &type) +{ + std::stringstream str; + str << type; + return str.str(); +} } // namespace arm_compute #endif /* __ARM_COMPUTE_TEST_TYPE_PRINTER_H__ */ \ No newline at end of file -- cgit v1.2.1