From 7df27869aff38b07b50e4fe589f6b2cf51954a92 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 30 May 2018 11:44:26 +0100 Subject: COMPMID-1162: Enable NHWC data layout support for NEWinogradConvolutionLayer - part1 In this first part we reworked the configuration of the kernels as before we passed the raw pointer to the buffer within the configuration of the function Change-Id: I83d3cb64c562303093c7f0ae52395ecd080a5d52 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133560 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Gian Marco Iodice --- .../kernels/NEWinogradConvolutionLayerKernel.h | 198 ++++++++++++--------- 1 file changed, 117 insertions(+), 81 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index 6b8866cb2e..68c133ee37 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -42,15 +42,15 @@ public: /** Determine how much memory (in units of TIn) to allocate for the * transformed input. * - * @param[in] n_batches Number of batches in the input tensor. - * @param[in] n_channels Number of feature maps in the input tensor. - * @param[in] n_rows Number of rows in each feature map. - * @param[in] n_cols Number of columns in each feature map. + * @param[in] num_batches Number of batches in the input tensor. + * @param[in] num_channels Number of feature maps in the input tensor. + * @param[in] num_rows Number of rows in each feature map. + * @param[in] num_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". * * @return Storage size (in units of TIn) required. */ - virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0; + virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0; /** Gets the stride between matrices in the input worspace * @@ -64,16 +64,17 @@ public: /** Configure the output transform kernel. * - * @param[in] input Input tensor data - * @param[in] n_batches Number of batches in input tensor. - * @param[in] n_rows Number of rows in input tensor. - * @param[in] n_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input tensor. + * @param[in] input_nhwc Input tensor in NHWC data layout format. + * @param[in] num_batches Number of batches in input tensor. + * @param[in] num_rows Number of rows in input tensor. + * @param[in] num_cols Number of columns in input tensor. + * @param[in] num_channels Number of channels in input tensor. * @param[in] padding Padding type. * @param[out] output Base of output matrices. * @param[in] matrix_stride Stride between output matrices. */ - virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0; + virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels, + const PaddingType padding, T *const output, const int matrix_stride) = 0; /** Destructor */ virtual ~INEWinogradLayerTransformInputKernel() @@ -86,22 +87,33 @@ template { public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default; + /** Default destructor */ + ~NEWinogradLayerTransformInputKernel() = default; + /** Determine how much memory (in units of TIn) to allocate for the * transformed input. * - * @param[in] n_batches Number of batches in the input tensor. - * @param[in] n_channels Number of feature maps in the input tensor. - * @param[in] n_rows Number of rows in each feature map. - * @param[in] n_cols Number of columns in each feature map. + * @param[in] num_batches Number of batches in the input tensor. + * @param[in] num_channels Number of feature maps in the input tensor. + * @param[in] num_rows Number of rows in each feature map. + * @param[in] num_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". * * @return Storage size (in units of TIn) required. */ unsigned int get_input_storage_size( - int n_batches, - int n_channels, - int n_rows, - int n_cols, + int num_batches, + int num_channels, + int num_rows, + int num_cols, bool same_padding) const override; /** Gets the stride between matrices in the input worspace @@ -124,21 +136,21 @@ public: /** Configure the output transform kernel. * - * @param[in] input Input tensor data. Data types supported: F32. - * @param[in] n_batches Number of batches in input tensor. - * @param[in] n_rows Number of rows in input tensor. - * @param[in] n_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input tensor. + * @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC. + * @param[in] num_batches Number of batches in input tensor. + * @param[in] num_rows Number of rows in input tensor. + * @param[in] num_cols Number of columns in input tensor. + * @param[in] num_channels Number of channels in input tensor. * @param[in] padding Padding type. * @param[out] output Base of output matrices. * @param[in] matrix_stride Stride between output matrices. */ void configure( - const T *const input, - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, + const ITensor *input_nhwc, + const int num_batches, + const int num_rows, + const int num_cols, + const int num_channels, const PaddingType padding, T *const output, const int matrix_stride) override; @@ -163,7 +175,14 @@ public: private: using InputTransform = typename WinogradBase::template InputTransform; - std::unique_ptr _transform; + const ITensor *_input_nhwc; + int _num_batches; /**< Number of batches in input tensor. */ + int _num_rows; /**< Number of rows in input tensor. */ + int _num_cols; /**< Number of columns in input tensor. */ + int _num_channels; /**< Number of channels in input tensor. */ + PaddingType _padding; /**< Padding type. */ + T *_output; /**< Base of output matrices. */ + int _matrix_stride; /**< Stride between output matrices. */ }; /** Interface for the NEON kernel to perform Winograd output transform. */ @@ -174,15 +193,15 @@ public: /** Determine how much memory (in units of TOut) to allocate for the * (Winograd domain) output. * - * @param[in] n_batches Number of batches in the output tensor. - * @param[in] n_rows Number of rows in each feature map of the input tensor. - * @param[in] n_cols Number of columns in each feature map of the input tensor. - * @param[in] n_output_channels Number of feature maps in the output tensor. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * @param[in] num_batches Number of batches in the output tensor. + * @param[in] num_rows Number of rows in each feature map of the input tensor. + * @param[in] num_cols Number of columns in each feature map of the input tensor. + * @param[in] num_output_channels Number of feature maps in the output tensor. + * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". * * @return Storage size (in units of TOut) required. */ - virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0; + virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const = 0; /** Gets the stride between matrices in the output worspace * @@ -209,21 +228,21 @@ public: * @param[in] biases Pointer to the biases tensor. * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain. * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution::get_output_matrix_stride() - * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain. - * @param[in] n_batches Number of batches in the input tensor. - * @param[in] n_rows Number of rows in output tensor. - * @param[in] n_cols Number of columns in output tensor. - * @param[in] n_channels Number of feature maps in the output tensor. + * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain. + * @param[in] num_batches Number of batches in the input tensor. + * @param[in] num_rows Number of rows in output tensor. + * @param[in] num_cols Number of columns in output tensor. + * @param[in] num_channels Number of feature maps in the output tensor. */ virtual void configure( const ITensor *biases, const T *const output_workingspace, const int matrix_stride, - T *const output, - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels) = 0; + ITensor *const output_nhwc, + const int num_batches, + const int num_rows, + const int num_cols, + const int num_channels) = 0; virtual ~INEWinogradLayerTransformOutputKernel() { @@ -257,15 +276,15 @@ public: /** Determine how much memory (in units of TOut) to allocate for the * (Winograd domain) output. * - * @param[in] n_batches Number of batches in the output tensor. - * @param[in] n_rows Number of rows in each feature map of the input tensor. - * @param[in] n_cols Number of columns in each feature map of the input tensor. - * @param[in] n_output_channels Number of feature maps in the output tensor. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * @param[in] num_batches Number of batches in the output tensor. + * @param[in] num_rows Number of rows in each feature map of the input tensor. + * @param[in] num_cols Number of columns in each feature map of the input tensor. + * @param[in] num_output_channels Number of feature maps in the output tensor. + * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". * * @return Storage size (in units of TOut) required. */ - unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override; + unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const override; /** Gets the stride between matrices in the output worspace * @@ -291,21 +310,21 @@ public: * @param[in] biases Pointer to the biases tensor. * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain. * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution::get_output_matrix_stride() - * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain. - * @param[in] n_batches Number of batches in the input tensor. - * @param[in] n_rows Number of rows in output tensor. - * @param[in] n_cols Number of columns in output tensor. - * @param[in] n_channels Number of feature maps in the output tensor. + * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain. + * @param[in] num_batches Number of batches in the input tensor. + * @param[in] num_rows Number of rows in output tensor. + * @param[in] num_cols Number of columns in output tensor. + * @param[in] num_channels Number of feature maps in the output tensor. */ void configure( const ITensor *biases, const T *const output_workingspace, const int matrix_stride, - T *const output, - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels) override; + ITensor *const output_nhwc, + const int num_batches, + const int num_rows, + const int num_cols, + const int num_channels) override; void run(const Window &window, const ThreadInfo &info) override; @@ -329,11 +348,11 @@ private: const T *_output_workspace; int _matrix_stride; int _matrix_row_stride; - T *_output; - int _n_batches; - int _n_rows; - int _n_cols; - int _n_channels; + ITensor *_output_nhwc; + int _num_batches; + int _num_rows; + int _num_cols; + int _num_channels; }; /** Interface for the NEON kernel to perform Winograd weights transform. */ @@ -344,12 +363,12 @@ public: /** Determine how much memory (in units of T) to allocate for the * transformed weights. * - * @param[in] n_output_channels Number of output feature maps. - * @param[in] n_input_channels Number of input feature maps. + * @param[in] num_output_channels Number of output feature maps. + * @param[in] num_input_channels Number of input feature maps. * * @return Storage size (in units of T) required. */ - virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0; + virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0; /** Gets the stride between matrices in the kernel worspace * * @param[in] kernel_shape The shape of the weights tensor. @@ -360,13 +379,14 @@ public: /** Configure the weights transform kernel. * - * @param[in] weights_hwio Pointer to the weights tensor - * @param[in] output Pointer to working space for the output tensor in the Winograd domain. - * @param[in] matrix_stride Stride across matrices in the output workspace. - * @param[in] n_output_channels Number of filters. - * @param[in] n_input_channels Number of channels in each filter. + * @param[in] weights_hwio Pointer to the weights tensor + * @param[in] output Pointer to working space for the output tensor in the Winograd domain. + * @param[in] matrix_stride Stride across matrices in the output workspace. + * @param[in] num_output_channels Number of filters. + * @param[in] num_input_channels Number of channels in each filter. */ - virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0; + + virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0; virtual ~INEWinogradLayerTransformWeightsKernel() { @@ -378,6 +398,17 @@ template { public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default; + /** Default destructor */ + ~NEWinogradLayerTransformWeightsKernel() = default; + /** Default constructor. */ NEWinogradLayerTransformWeightsKernel(); const char *name() const override @@ -397,8 +428,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); // Inherited methods overridden: - void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override; - unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override; + void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override; + unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override; int get_matrix_stride(const KernelShape &kernel_shape) const override; void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; @@ -407,7 +438,12 @@ private: using WinogradBase = winograd::WinogradGEMM; using WinogradConv = typename WinogradBase::template Convolution; using WeightsTransform = typename WinogradBase::template WeightsTransform; - std::unique_ptr _transform; + + const ITensor *_weights_hwio; + T *_output; + int _matrix_stride; + int _num_output_channels; + int _num_input_channels; }; /** Interface for the NEON kernel to perform Winograd. */ @@ -421,7 +457,7 @@ public: /** Initialise the kernel * * @param[in] n_gemms Number of GEMMs to compute. - * @param[in] M in_shape.n_batches * tile_rows * tile_cols. + * @param[in] M in_shape.num_batches * tile_rows * tile_cols. * @param[in] K Number of channels in the input tensor. * @param[in] N Number of channels in the output tensor. * @param[in] a_matrix_stride Stride between input matrices. @@ -498,7 +534,7 @@ public: /** Initialise the kernel * * @param[in] n_gemms Number of GEMMs to compute. - * @param[in] M in_shape.n_batches * tile_rows * tile_cols. + * @param[in] M in_shape.num_batches * tile_rows * tile_cols. * @param[in] K Number of channels in the input tensor. * @param[in] N Number of channels in the output tensor. * @param[in] a_matrix_stride Stride between input matrices. -- cgit v1.2.1