aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-17 13:01:54 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-18 10:41:05 +0000
commit7179837ac9ef82c6f784aaee999be347bc7764fd (patch)
treedc6db86ed3d6756491d84e3450e60f8f92066293
parente73686ac797be2d19cd9bed26d690e1431e3d848 (diff)
downloadComputeLibrary-7179837ac9ef82c6f784aaee999be347bc7764fd.tar.gz
COMPMID-2112: Rework memory manager support in NEWinogradLayer
Change-Id: I3b86de829f70963f25564a6934926d0a13062763 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1006 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp94
1 files changed, 44 insertions, 50 deletions
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index b4247be6cb..2332450c04 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -380,20 +380,17 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
// Kernel Storage
const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
in_channels)
- * data_type_size
- + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ * data_type_size;
// Input storage
const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
use_same_padding)
- * data_type_size
- + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ * data_type_size;
// Output storage
const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels,
use_same_padding)
- * data_type_size
- + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ * data_type_size;
;
const KernelShape kernel_shape({ out_channels, static_cast<int>(kernel_size.height), static_cast<int>(kernel_size.width), in_channels });
const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(kernel_shape);
@@ -446,62 +443,58 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
1, _output->info()->data_type());
_output_nhwc.allocator()->init(info);
- // Configure the InputTransform
- _memory_group.manage(&_input_transformed);
- _memory_group.manage(&_output_transformed);
- _memory_group.manage(&_input_workspace);
- _memory_group.manage(&_output_workspace);
+ const ITensor *input_to_use = _input;
+ ITensor *output_to_use = _output;
+ PermutationVector weights_permutation_vector(3U, 0U, 1U, 2U);
+ const unsigned int max_num_threads = NEScheduler::get().num_threads();
+ // Configure the kernel to transform the input tensor from NCHW -> NHWC
if(data_layout == DataLayout::NCHW)
{
- // configure the kernel to transform the input tensor from NCHW -> NHWC
+ _memory_group.manage(&_input_nhwc);
_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
- _input_nhwc.allocator()->allocate();
- transform_input_kernel->configure(&_input_nhwc, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
- &_input_transformed, input_matrix_stride, &_input_workspace);
-
- // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
- _permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 2U, 0U, 1U));
-
- transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
-
- //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
- _memory_group.manage(&_output_nhwc);
- transform_output_kernel->configure(biases, &_output_transformed,
- output_matrix_stride, &_output_nhwc,
- in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels, &_output_workspace);
+ input_to_use = &_input_nhwc;
+ weights_permutation_vector = PermutationVector(3U, 2U, 0U, 1U);
}
- else
- {
- transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
- &_input_transformed, input_matrix_stride, &_input_workspace);
-
- // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
- _permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 0U, 1U, 2U));
-
- transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
-
- transform_output_kernel->configure(biases, &_output_transformed,
- output_matrix_stride, _output,
- in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels, &_output_workspace);
- }
-
- //Configure input/output workspaces, get_working_space_size() must be called after configure()
- const unsigned int max_num_threads = NEScheduler::get().num_threads_hint();
- const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
- const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
- TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, _input->info()->data_type());
+ // Configure input transform kernel
+ _memory_group.manage(&_input_transformed);
+ _memory_group.manage(&_input_workspace);
+ transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
+ &_input_transformed, input_matrix_stride, &_input_workspace);
+ const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
+ TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, _input->info()->data_type());
_input_workspace.allocator()->init(input_workspace_info);
+ _input_workspace.allocator()->allocate();
+ if(data_layout == DataLayout::NCHW)
+ {
+ _input_nhwc.allocator()->allocate();
+ }
- TensorInfo output_workspace_info(TensorShape(output_workspace_size), 1, _output->info()->data_type());
- _output_workspace.allocator()->init(output_workspace_info);
+ // Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
+ _permute_weights.configure(weights, &_weights_hwio, weights_permutation_vector);
+ transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
+ // Configure GEMM function
+ _memory_group.manage(&_output_transformed);
_gemm_function.configure(&_input_transformed, &_kernel_storage, nullptr, &_output_transformed, 1.0f, 0.f);
_input_transformed.allocator()->allocate();
- _output_transformed.allocator()->allocate();
- _input_workspace.allocator()->allocate();
+
+ // Configure output transform function
+ // The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
+ if(data_layout == DataLayout::NCHW)
+ {
+ _memory_group.manage(&_output_nhwc);
+ output_to_use = &_output_nhwc;
+ }
+ transform_output_kernel->configure(biases, &_output_transformed,
+ output_matrix_stride, output_to_use,
+ in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels, &_output_workspace);
+ const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
+ TensorInfo output_workspace_info(TensorShape(output_workspace_size), 1, _output->info()->data_type());
+ _output_workspace.allocator()->init(output_workspace_info);
_output_workspace.allocator()->allocate();
+ _output_transformed.allocator()->allocate();
// Reorder the convoluted output to ACL's ordering NCHW
if(data_layout == DataLayout::NCHW)
@@ -541,6 +534,7 @@ void NEWinogradConvolutionLayer::run()
//Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
_gemm_function.run();
+
// Transform output tensor to the spatial domain
NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX);