From 20394d526820ca97df4c0db91ec2571b98280d6d Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Thu, 2 Aug 2018 11:29:09 +0100 Subject: COMPMID-1248 Enabled memory manager in NEWinogradConvolutionLayer Change-Id: I7bbab53f18a42f0879d80122a52bb6bdca4b8631 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142413 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp | 12 ++++-------- src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp | 14 +++++++------- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'src/runtime/NEON') diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index 8ba620fe51..39fee1bfa5 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -159,7 +159,7 @@ private: * @param[in] memory_group Tensor memory group. * @param[in] alignment Workspace memory alignment. */ - void allocate_workspace(size_t workspace_size, MemoryGroup *memory_group, size_t alignment); + void allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment); /** Assembly Gemm kernel */ std::unique_ptr> _gemm_kernel_asm{ nullptr }; @@ -204,8 +204,7 @@ void Fallback::configure(const ITensor *a, const ITensor { // Allocate workspace const unsigned int alignment = 4096; - //FIXME: is memory_group ever null ? - allocate_workspace(workspace_size, &memory_group, alignment); + allocate_workspace(workspace_size, memory_group, alignment); } //if we disable this code below in brackets then ConvLayer deadlocks when threads > 1 and @@ -256,14 +255,11 @@ void Fallback::prepare() } template -void Fallback::allocate_workspace(size_t workspace_size, MemoryGroup *memory_group, size_t alignment) +void Fallback::allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment) { ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0"); _workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment); - if(memory_group != nullptr) - { - memory_group->manage(&_workspace); - } + memory_group.manage(&_workspace); _workspace.allocator()->allocate(); } diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index a71eade9a1..11bb2d881b 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -263,23 +263,17 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size); _input_workspace.allocator()->init(a_info, storage_alignment); - _input_workspace.allocator()->allocate(); - _kernel_storage.allocator()->init(b_info, storage_alignment); - _kernel_storage.allocator()->allocate(); - _output_workspace.allocator()->init(d_info, storage_alignment); - _output_workspace.allocator()->allocate(); // configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output() TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0), _output->info()->dimension(1), _output->info()->dimension(3)), 1, _output->info()->data_type()); _output_nhwc.allocator()->init(info); - _output_nhwc.allocator()->allocate(); // Configure the InputTransform - + _memory_group.manage(&_input_workspace); if(data_layout == DataLayout::NCHW) { // configure the kernel to transform the input tensor from NCHW -> NHWC @@ -314,6 +308,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * // Configure OutputTransform //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method + _memory_group.manage(&_output_workspace); if(data_layout == DataLayout::NCHW) { transform_output_kernel->configure(biases, &_output_workspace, @@ -328,10 +323,15 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * } _asm_glue.configure(&_input_workspace, &_kernel_storage, &_output_workspace, 1.0f, 0.f, false); + _input_workspace.allocator()->allocate(); + _kernel_storage.allocator()->allocate(); + _output_workspace.allocator()->allocate(); // Reorder the convoluted output to ACL's ordering NCHW _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U)); + _output_nhwc.allocator()->allocate(); + _transform_input_kernel = std::move(transform_input_kernel); _transform_weights_kernel = std::move(transform_weights_kernel); _transform_output_kernel = std::move(transform_output_kernel); -- cgit v1.2.1