diff options
Diffstat (limited to 'arm_compute/core/NEON/kernels/winograd')
5 files changed, 54 insertions, 71 deletions
diff --git a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp index 725f6cab65..6a9984a24a 100644 --- a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp +++ b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp @@ -29,6 +29,7 @@ void direct_convolution( const Tensor4D<Tensor4DShape, float>& input, const Tensor4D<KernelShape, float>& kernel, + const Tensor4D<Tensor4DShape, float>& biases, Tensor4D<Tensor4DShape, float>& output, const PaddingType padding ); diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp index 39b444184e..075765a513 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp +++ b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp @@ -71,7 +71,7 @@ namespace winograd const int row_offset = (tile_i == 0) ? 0 : ((padding_type == PADDING_VALID) ? 0 : 1); const T* const input_base_row = ( - input_base_batch + ((inner_tile_rows - 2)*tile_i - row_offset)*input_row_stride + input_base_batch + ((inner_tile_rows - (kernel_rows - 1))*tile_i - row_offset)*input_row_stride ); T* const outptr_base_row = outptr_base_batch + tile_i*output_row_stride; diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp index 7fa5ee9617..0dd719751b 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp +++ b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp @@ -35,6 +35,7 @@ namespace winograd const T* const matrix_base, const int matrix_stride, const int matrix_row_stride, + const T* const biases, T* const output ) { @@ -69,8 +70,9 @@ namespace winograd // Process the row process_tile_row( tile_N, output_shape.n_channels, matrix_tile_row, matrix_stride, - matrix_row_stride, outptr_row, output_row_stride, - output_col_stride, row_pad_bottom, pad_right + matrix_row_stride, biases, + outptr_row, output_row_stride, output_col_stride, row_pad_bottom, + pad_right ); } } @@ -85,6 +87,7 @@ namespace winograd const T* const matrix_base, const int matrix_stride, const int matrix_row_stride, + const T* const biases, T* const output, const int output_row_stride, const int output_col_stride, @@ -102,7 +105,7 @@ namespace winograd // Perform the output transformation tile_fns[row_pad_bottom][tile_pad_right]( - n_channels, matrix_row, matrix_stride, + n_channels, matrix_row, matrix_stride, biases, outptr, output_row_stride, output_col_stride ); } @@ -131,14 +134,17 @@ namespace winograd const T* const matrix_base, const int matrix_stride, const int matrix_row_stride, + const T* const biases, T* const output, const int n_batches, const int n_rows, const int n_cols, const int n_channels - ) : _matrix_base(matrix_base), _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride), - _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols), _n_channels(n_channels), - _tile_M(iceildiv(n_rows, output_tile_rows)), _tile_N(iceildiv(n_cols, output_tile_cols)) + ) : _matrix_base(matrix_base), _biases(biases), + _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride), + _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols), + _n_channels(n_channels), _tile_M(iceildiv(n_rows, output_tile_rows)), + _tile_N(iceildiv(n_cols, output_tile_cols)) { } @@ -168,7 +174,8 @@ namespace winograd _n_batches, _n_rows, _n_cols, _n_channels, NHWC }; execute( - output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _outptr + output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _biases, + _outptr ); } } // namespace winograd diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp index adca48a6d6..2ea70f182b 100644 --- a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp +++ b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp @@ -183,7 +183,7 @@ class WinogradGEMM const int row_pad_top, const int row_pad_left, const int row_pad_bottom, - const int row_pad_right + const int n_cols ); static constexpr int max_pad_bottom = inner_tile_rows - 1; @@ -225,6 +225,7 @@ class WinogradGEMM const T* const matrix_base, const int matrix_stride, const int matrix_row_stride, + const T* const biases, T* const output ); @@ -236,6 +237,7 @@ class WinogradGEMM const T* const matrix_base, /** Pointer to base of matrices. */ const int matrix_stride, /** Stride between matrices. */ const int matrix_row_stride, /** Stride within a matrix. */ + const T* const biases, /** Pointer to biases vector. */ T* const output, /** Pointer to output tensor. */ const int n_batches, /** Number of batches in output tensor. */ const int n_rows, /** Number of rows in output tensor. */ @@ -257,6 +259,7 @@ class WinogradGEMM const T* const matrix_base, const int matrix_stride, const int matrix_row_stride, + const T* const biases, T* const output, const int output_row_stride, const int output_col_stride, @@ -270,14 +273,15 @@ class WinogradGEMM /** Prepare a single tile of the output tensor. */ template <int pad_bottom, int pad_right> - static void process_tile(int, const T*, int, T*, int, int); + static void process_tile(int, const T*, int, const T*, T*, int, int); // Array of methods to produce tiles of output tensor. - typedef void (*TileFn)(int, const T*, int, T*, int, int); + typedef void (*TileFn)(int, const T*, int, const T*, T*, int, int); static const TileFn tile_fns[max_pad_bottom][max_pad_right]; /** Member constants for instances of the transform. */ const T* const _matrix_base; + const T* const _biases; const int _matrix_stride, _matrix_row_stride; T* const _outptr; const int _n_batches, _n_rows, _n_cols, _n_channels, _tile_M, _tile_N; @@ -328,6 +332,7 @@ class WinogradGEMM void execute( TOut* const output, const TIn* const input, + const TOut* const biases, void* working_space=NULL, const int n_threads=1 ); @@ -336,6 +341,7 @@ class WinogradGEMM void execute( TOut* const output, const TIn* const input, + const TOut* const biases, const int n_threads ); diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp index 4559312df4..1db63d750b 100644 --- a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp +++ b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp @@ -74,87 +74,56 @@ class WinogradConvolutionLayer /** Determine how much memory (in units of TIn) to allocate for the * transformed weights. - * - * @param[in] n_output_channels Number of output feature maps. - * @param[in] n_input_channels Number of input feature maps. */ static unsigned int get_weight_storage_size( - const int n_output_channels, - const int n_input_channels + const int n_output_channels, /** Number of output feature maps. */ + const int n_input_channels /** Number of input feature maps. */ ); /** Determine how much memory (in units of TIn) to allocate for the * transformed input. - * - * @param[in] n_batches Number of batches in the input tensor. - * @param[in] n_channels Number of feature maps in the input tensor. - * @param[in] n_rows Number of rows in each feature map. - * @param[in] n_cols Number of columns in each feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". */ static unsigned int get_input_storage_size( - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - const bool same_padding + const int n_batches, /** Number of batches in the input tensor. */ + const int n_channels, /** Number of feature maps in the input tensor. */ + const int n_rows, /** Number of rows in each feature map. */ + const int n_cols, /** Number of columns in each feature map. */ + const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ ); /** Determine how much memory (in units of TOut) to allocate for the * (Winograd domain) output. - * - * @param[in] n_batches Number of batches in the output tensor. - * @param[in] n_rows Number of rows in each feature map of the input tensor. - * @param[in] n_cols Number of columns in each feature map of the input tensor. - * @param[in] n_output_channels Number of feature maps in the output tensor. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". */ static unsigned int get_output_storage_size( - const int n_batches, - const int n_rows, - const int n_cols, - const int n_output_channels, - const bool same_padding + const int n_batches, /** Number of batches in the output tensor. */ + const int n_rows, /** Number of rows in each feature map of the input tensor. */ + const int n_cols, /** Number of columns in each feature map of the input tensor. */ + const int n_output_channels, /** Number of feature maps in the output tensor. */ + const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ ); - /** Get the shape (rows, cols) of a feature map of the output tensor. - * - * @param[in] n_input_rows Number of rows in the input feature map. - * @param[in] n_input_cols Number of columns in the input feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - */ + /** Get the shape (rows, cols) of a feature map of the output tensor. */ static std::pair<int, int> get_output_feature_map_shape( - const int n_input_rows, - const int n_input_cols, - const bool same_padding + const int n_input_rows, /** Number of rows in the input feature map. */ + const int n_input_cols, /** Number of columns in the input feature map. */ + const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ ); /** Create a new Winograd convolution layer. - * @param[in] n_batches Number of batches in the input and output tensors. - * @param[in] n_input_channels Number of feature maps in a batch of the input tensor. - * @param[in] n_input_rows Number of rows in a feature map of the input tensor. - * @param[in] n_input_cols Number of columns in a feature map of the input tensor. - * @param[in] n_output_channels Number of feature maps in the output tensor. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - * @param[in] weights Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. - * @param[out] weights_storage Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size - * @param[in] input Pointer to NHWC ordered input tensor, in the spatial domain. - * @param[out] winograd_input Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. - * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain. - * @param[out] winograd_output Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */ WinogradConvolutionLayer( - const int n_batches, - const int n_input_channels, - const int n_input_rows, - const int n_input_cols, - const int n_output_channels, - const bool same_padding, - const TIn* const weights, - TIn* const weights_storage, - const TIn* const input, - TIn* const winograd_input, - TOut* const output, - TOut* const winograd_output + const int n_batches, /** Number of batches in the input and output tensors. */ + const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */ + const int n_input_rows, /** Number of rows in a feature map of the input tensor. */ + const int n_input_cols, /** Number of columns in a feature map of the input tensor. */ + const int n_output_channels, /** Number of feature maps in the output tensor. */ + const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */ + const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */ + TIn* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */ + const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */ + TIn* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */ + const TOut* const biases, /** Pointer to biases vector. */ + TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */ + TOut* const winograd_output /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */ ); }; |