From e7f7b5507c6d9e70b946880a774205d9b1a8e59c Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 28 Sep 2017 10:43:38 +0100 Subject: COMPMID-417 - Added support for memory manager in neon_cnn example Change-Id: I3bd91a9f234b8b42a96454c0d2e45f6098c543a5 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89481 Reviewed-by: Georgios Pinitas Tested-by: Kaizen --- examples/neon_cnn.cpp | 95 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 14 deletions(-) (limited to 'examples/neon_cnn.cpp') diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp index 238f0572da..198890c9ba 100644 --- a/examples/neon_cnn.cpp +++ b/examples/neon_cnn.cpp @@ -24,6 +24,10 @@ #include "arm_compute/runtime/NEON/NEFunctions.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Allocator.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" #include "utils/Utils.h" using namespace arm_compute; @@ -34,6 +38,18 @@ void main_cnn(int argc, const char **argv) ARM_COMPUTE_UNUSED(argc); ARM_COMPUTE_UNUSED(argv); + // Create NEON allocator + Allocator allocator; + + // Create memory manager components + // We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions)) + auto lifetime_mgr0 = std::make_shared(); // Create lifetime manager + auto lifetime_mgr1 = std::make_shared(); // Create lifetime manager + auto pool_mgr0 = std::make_shared(); // Create pool manager + auto pool_mgr1 = std::make_shared(); // Create pool manager + auto mm_layers = std::make_shared(lifetime_mgr0, pool_mgr0); // Create the memory manager + auto mm_transitions = std::make_shared(lifetime_mgr1, pool_mgr1); // Create the memory manager + // The src tensor should contain the input image Tensor src; @@ -55,15 +71,16 @@ void main_cnn(int argc, const char **argv) Tensor out_fc0; Tensor out_softmax; - NEConvolutionLayer conv0; - NEConvolutionLayer conv1; + // Create layers and set memory manager where allowed to manage internal memory requirements + NEConvolutionLayer conv0(mm_layers); + NEConvolutionLayer conv1(mm_layers); NEPoolingLayer pool0; NEPoolingLayer pool1; - NEFullyConnectedLayer fc0; + NEFullyConnectedLayer fc0(mm_layers); NEActivationLayer act0; NEActivationLayer act1; NEActivationLayer act2; - NESoftmaxLayer softmax; + NESoftmaxLayer softmax(mm_layers); /* [Initialize tensors] */ @@ -171,9 +188,37 @@ void main_cnn(int argc, const char **argv) /* -----------------------End: [Configure functions] */ + /*[ Add tensors to memory manager ]*/ + + // We need 2 memory groups for handling the input and output + // We call explicitly allocate after manage() in order to avoid overlapping lifetimes + MemoryGroup memory_group0(mm_transitions); + MemoryGroup memory_group1(mm_transitions); + + memory_group0.manage(&out_conv0); + out_conv0.allocator()->allocate(); + memory_group1.manage(&out_act0); + out_act0.allocator()->allocate(); + memory_group0.manage(&out_pool0); + out_pool0.allocator()->allocate(); + memory_group1.manage(&out_conv1); + out_conv1.allocator()->allocate(); + memory_group0.manage(&out_act1); + out_act1.allocator()->allocate(); + memory_group1.manage(&out_pool1); + out_pool1.allocator()->allocate(); + memory_group0.manage(&out_fc0); + out_fc0.allocator()->allocate(); + memory_group1.manage(&out_act2); + out_act2.allocator()->allocate(); + memory_group0.manage(&out_softmax); + out_softmax.allocator()->allocate(); + + /* -----------------------End: [ Add tensors to memory manager ] */ + /* [Allocate tensors] */ - // Now that the padding requirements are known we can allocate the images: + // Now that the padding requirements are known we can allocate all tensors src.allocator()->allocate(); weights0.allocator()->allocate(); weights1.allocator()->allocate(); @@ -181,18 +226,32 @@ void main_cnn(int argc, const char **argv) biases0.allocator()->allocate(); biases1.allocator()->allocate(); biases2.allocator()->allocate(); - out_conv0.allocator()->allocate(); - out_conv1.allocator()->allocate(); - out_act0.allocator()->allocate(); - out_act1.allocator()->allocate(); - out_act2.allocator()->allocate(); - out_pool0.allocator()->allocate(); - out_pool1.allocator()->allocate(); - out_fc0.allocator()->allocate(); - out_softmax.allocator()->allocate(); /* -----------------------End: [Allocate tensors] */ + // Finalize layers memory manager + + // Set allocator that the memory manager will use + mm_layers->set_allocator(&allocator); + + // Number of pools that the manager will create. This specifies how many layers you want to run in parallel + mm_layers->set_num_pools(1); + + // Finalize the manager. (Validity checks, memory allocations etc) + mm_layers->finalize(); + + // Finalize transitions memory manager + + // Set allocator that the memory manager will use + mm_transitions->set_allocator(&allocator); + + // Number of pools that the manager will create. This specifies how many models we can run in parallel. + // Setting to 2 as we need one for the input and one for the output at any given time + mm_transitions->set_num_pools(2); + + // Finalize the manager. (Validity checks, memory allocations etc) + mm_transitions->finalize(); + /* [Initialize weights and biases tensors] */ // Once the tensors have been allocated, the src, weights and biases tensors can be initialized @@ -202,6 +261,10 @@ void main_cnn(int argc, const char **argv) /* [Execute the functions] */ + // Acquire memory for the memory groups + memory_group0.acquire(); + memory_group1.acquire(); + conv0.run(); act0.run(); pool0.run(); @@ -212,6 +275,10 @@ void main_cnn(int argc, const char **argv) act2.run(); softmax.run(); + // Release memory + memory_group0.release(); + memory_group1.release(); + /* -----------------------End: [Execute the functions] */ } -- cgit v1.2.1