aboutsummaryrefslogtreecommitdiff
path: root/examples/neon_cnn.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-09-28 10:43:38 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commite7f7b5507c6d9e70b946880a774205d9b1a8e59c (patch)
tree62fd1c6d0258d27bd2f7fc0d3281e77cccd2cfbe /examples/neon_cnn.cpp
parent299025a35f4349993547df88c8724a6049b7e10e (diff)
downloadComputeLibrary-e7f7b5507c6d9e70b946880a774205d9b1a8e59c.tar.gz
COMPMID-417 - Added support for memory manager in neon_cnn example
Change-Id: I3bd91a9f234b8b42a96454c0d2e45f6098c543a5 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89481 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'examples/neon_cnn.cpp')
-rw-r--r--examples/neon_cnn.cpp95
1 files changed, 81 insertions, 14 deletions
diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp
index 238f0572da..198890c9ba 100644
--- a/examples/neon_cnn.cpp
+++ b/examples/neon_cnn.cpp
@@ -24,6 +24,10 @@
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Allocator.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
#include "utils/Utils.h"
using namespace arm_compute;
@@ -34,6 +38,18 @@ void main_cnn(int argc, const char **argv)
ARM_COMPUTE_UNUSED(argc);
ARM_COMPUTE_UNUSED(argv);
+ // Create NEON allocator
+ Allocator allocator;
+
+ // Create memory manager components
+ // We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions))
+ auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager
+ auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager
+ auto pool_mgr0 = std::make_shared<PoolManager>(); // Create pool manager
+ auto pool_mgr1 = std::make_shared<PoolManager>(); // Create pool manager
+ auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager
+ auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager
+
// The src tensor should contain the input image
Tensor src;
@@ -55,15 +71,16 @@ void main_cnn(int argc, const char **argv)
Tensor out_fc0;
Tensor out_softmax;
- NEConvolutionLayer conv0;
- NEConvolutionLayer conv1;
+ // Create layers and set memory manager where allowed to manage internal memory requirements
+ NEConvolutionLayer conv0(mm_layers);
+ NEConvolutionLayer conv1(mm_layers);
NEPoolingLayer pool0;
NEPoolingLayer pool1;
- NEFullyConnectedLayer fc0;
+ NEFullyConnectedLayer fc0(mm_layers);
NEActivationLayer act0;
NEActivationLayer act1;
NEActivationLayer act2;
- NESoftmaxLayer softmax;
+ NESoftmaxLayer softmax(mm_layers);
/* [Initialize tensors] */
@@ -171,9 +188,37 @@ void main_cnn(int argc, const char **argv)
/* -----------------------End: [Configure functions] */
+ /*[ Add tensors to memory manager ]*/
+
+ // We need 2 memory groups for handling the input and output
+ // We call explicitly allocate after manage() in order to avoid overlapping lifetimes
+ MemoryGroup memory_group0(mm_transitions);
+ MemoryGroup memory_group1(mm_transitions);
+
+ memory_group0.manage(&out_conv0);
+ out_conv0.allocator()->allocate();
+ memory_group1.manage(&out_act0);
+ out_act0.allocator()->allocate();
+ memory_group0.manage(&out_pool0);
+ out_pool0.allocator()->allocate();
+ memory_group1.manage(&out_conv1);
+ out_conv1.allocator()->allocate();
+ memory_group0.manage(&out_act1);
+ out_act1.allocator()->allocate();
+ memory_group1.manage(&out_pool1);
+ out_pool1.allocator()->allocate();
+ memory_group0.manage(&out_fc0);
+ out_fc0.allocator()->allocate();
+ memory_group1.manage(&out_act2);
+ out_act2.allocator()->allocate();
+ memory_group0.manage(&out_softmax);
+ out_softmax.allocator()->allocate();
+
+ /* -----------------------End: [ Add tensors to memory manager ] */
+
/* [Allocate tensors] */
- // Now that the padding requirements are known we can allocate the images:
+ // Now that the padding requirements are known we can allocate all tensors
src.allocator()->allocate();
weights0.allocator()->allocate();
weights1.allocator()->allocate();
@@ -181,18 +226,32 @@ void main_cnn(int argc, const char **argv)
biases0.allocator()->allocate();
biases1.allocator()->allocate();
biases2.allocator()->allocate();
- out_conv0.allocator()->allocate();
- out_conv1.allocator()->allocate();
- out_act0.allocator()->allocate();
- out_act1.allocator()->allocate();
- out_act2.allocator()->allocate();
- out_pool0.allocator()->allocate();
- out_pool1.allocator()->allocate();
- out_fc0.allocator()->allocate();
- out_softmax.allocator()->allocate();
/* -----------------------End: [Allocate tensors] */
+ // Finalize layers memory manager
+
+ // Set allocator that the memory manager will use
+ mm_layers->set_allocator(&allocator);
+
+ // Number of pools that the manager will create. This specifies how many layers you want to run in parallel
+ mm_layers->set_num_pools(1);
+
+ // Finalize the manager. (Validity checks, memory allocations etc)
+ mm_layers->finalize();
+
+ // Finalize transitions memory manager
+
+ // Set allocator that the memory manager will use
+ mm_transitions->set_allocator(&allocator);
+
+ // Number of pools that the manager will create. This specifies how many models we can run in parallel.
+ // Setting to 2 as we need one for the input and one for the output at any given time
+ mm_transitions->set_num_pools(2);
+
+ // Finalize the manager. (Validity checks, memory allocations etc)
+ mm_transitions->finalize();
+
/* [Initialize weights and biases tensors] */
// Once the tensors have been allocated, the src, weights and biases tensors can be initialized
@@ -202,6 +261,10 @@ void main_cnn(int argc, const char **argv)
/* [Execute the functions] */
+ // Acquire memory for the memory groups
+ memory_group0.acquire();
+ memory_group1.acquire();
+
conv0.run();
act0.run();
pool0.run();
@@ -212,6 +275,10 @@ void main_cnn(int argc, const char **argv)
act2.run();
softmax.run();
+ // Release memory
+ memory_group0.release();
+ memory_group1.release();
+
/* -----------------------End: [Execute the functions] */
}