diff options
author | Pablo Tello <pablo.tello@arm.com> | 2018-10-29 13:13:23 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:55:45 +0000 |
commit | f718ce255508f74729bab40fe30c5dab0f3a978b (patch) | |
tree | 31974d3ffc95267838cb27718bf8eb8b31cf822c /src/runtime/NEON | |
parent | 4b90865ab985d571f70c60583cdfb8c7a65f1670 (diff) | |
download | ComputeLibrary-f718ce255508f74729bab40fe30c5dab0f3a978b.tar.gz |
COMPMID-1701: small cleanup NEWinogradConvolutionLayer.
Change-Id: I0cf221c706c3d957423941d3aa9a9262dcb00c00
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/155593
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r-- | src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp | 35 |
1 files changed, 12 insertions, 23 deletions
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 44ea3a0881..569db37bf6 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -448,6 +448,8 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * // Configure the InputTransform _memory_group.manage(&_input_workspace); + _memory_group.manage(&_output_workspace); + if(data_layout == DataLayout::NCHW) { // configure the kernel to transform the input tensor from NCHW -> NHWC @@ -455,47 +457,34 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * _input_nhwc.allocator()->allocate(); transform_input_kernel->configure(&_input_nhwc, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type, &_input_workspace, input_matrix_stride); - } - else - { - transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type, - &_input_workspace, input_matrix_stride); - } - // Configure WeightsTransform - if(data_layout == DataLayout::NCHW) - { // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map] _permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 2U, 0U, 1U)); transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels); + + //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method + transform_output_kernel->configure(biases, &_output_workspace, + output_matrix_stride, &_output_nhwc, + in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels); } else { + transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type, + &_input_workspace, input_matrix_stride); + // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map] _permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 0U, 1U, 2U)); transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels); - } - _weights_hwio.allocator()->allocate(); - - // Configure OutputTransform - //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method - _memory_group.manage(&_output_workspace); - if(data_layout == DataLayout::NCHW) - { - transform_output_kernel->configure(biases, &_output_workspace, - output_matrix_stride, &_output_nhwc, - in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels); - } - else - { transform_output_kernel->configure(biases, &_output_workspace, output_matrix_stride, _output, in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels); } + _weights_hwio.allocator()->allocate(); + _asm_glue.configure(&_input_workspace, &_kernel_storage, &_output_workspace, 1.0f, 0.f, false); _input_workspace.allocator()->allocate(); _kernel_storage.allocator()->allocate(); |