aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-10-29 13:13:23 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commitf718ce255508f74729bab40fe30c5dab0f3a978b (patch)
tree31974d3ffc95267838cb27718bf8eb8b31cf822c /src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
parent4b90865ab985d571f70c60583cdfb8c7a65f1670 (diff)
downloadComputeLibrary-f718ce255508f74729bab40fe30c5dab0f3a978b.tar.gz
COMPMID-1701: small cleanup NEWinogradConvolutionLayer.
Change-Id: I0cf221c706c3d957423941d3aa9a9262dcb00c00 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/155593 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp35
1 files changed, 12 insertions, 23 deletions
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 44ea3a0881..569db37bf6 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -448,6 +448,8 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
// Configure the InputTransform
_memory_group.manage(&_input_workspace);
+ _memory_group.manage(&_output_workspace);
+
if(data_layout == DataLayout::NCHW)
{
// configure the kernel to transform the input tensor from NCHW -> NHWC
@@ -455,47 +457,34 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
_input_nhwc.allocator()->allocate();
transform_input_kernel->configure(&_input_nhwc, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
&_input_workspace, input_matrix_stride);
- }
- else
- {
- transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
- &_input_workspace, input_matrix_stride);
- }
- // Configure WeightsTransform
- if(data_layout == DataLayout::NCHW)
- {
// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
_permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 2U, 0U, 1U));
transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
+
+ //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
+ transform_output_kernel->configure(biases, &_output_workspace,
+ output_matrix_stride, &_output_nhwc,
+ in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
}
else
{
+ transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
+ &_input_workspace, input_matrix_stride);
+
// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
_permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 0U, 1U, 2U));
transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
- }
- _weights_hwio.allocator()->allocate();
-
- // Configure OutputTransform
- //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
- _memory_group.manage(&_output_workspace);
- if(data_layout == DataLayout::NCHW)
- {
- transform_output_kernel->configure(biases, &_output_workspace,
- output_matrix_stride, &_output_nhwc,
- in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
- }
- else
- {
transform_output_kernel->configure(biases, &_output_workspace,
output_matrix_stride, _output,
in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
}
+ _weights_hwio.allocator()->allocate();
+
_asm_glue.configure(&_input_workspace, &_kernel_storage, &_output_workspace, 1.0f, 0.f, false);
_input_workspace.allocator()->allocate();
_kernel_storage.allocator()->allocate();