COMPMID-605: Transition buffer memory manager

Change-Id: Ide7c6124eb19f13f15f517e62d705646a0cd1ecd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130184 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-05-03 20:47:16 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:51:50 +0000
commit: 3d1489de593574e65ef1e64a7ae64e4e56c2978b (patch)
tree: f87f3df521cb5ed8bd383dad89cbeb92c49670ac /src
parent: 54d6fae4dbb4f556cc5ec484c51681ad84c015a7 (diff)
download: ComputeLibrary-3d1489de593574e65ef1e64a7ae64e4e56c2978b.tar.gz
15 files changed, 569 insertions, 86 deletions
diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 6fc45c0aa7..3f311145bc 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp
@@ -60,13 +60,24 @@ MemoryManagerContext *GraphContext::memory_management_ctx(Target target)
     return (_memory_managers.find(target) != std::end(_memory_managers)) ? &_memory_managers[target] : nullptr;
 }
 
+std::map<Target, MemoryManagerContext> &GraphContext::memory_managers()
+{
+    return _memory_managers;
+}
+
 void GraphContext::finalize()
 {
     for(auto &mm_obj : _memory_managers)
     {
-        if(mm_obj.second.mm != nullptr)
+        // Finalize intra layer memory manager
+        if(mm_obj.second.intra_mm != nullptr)
+        {
+            mm_obj.second.intra_mm->finalize();
+        }
+        // Finalize cross layer memory manager
+        if(mm_obj.second.cross_mm != nullptr)
         {
-            mm_obj.second.mm->finalize();
+            mm_obj.second.cross_mm->finalize();
         }
     }
 }
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index aac6488311..a67e5b25d6 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -28,6 +28,7 @@
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/PassManager.h"
 #include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
 #include "arm_compute/graph/detail/ExecutionHelpers.h"
 
 namespace arm_compute
@@ -72,41 +73,37 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
     auto workload = detail::configure_all_nodes(graph, ctx);
     ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
 
+    // Allocate const tensors and call accessors
+    detail::allocate_const_tensors(graph);
+    detail::call_all_const_node_accessors(graph);
+
     // TODO (COMPMID-920) : Update prepare for NEON/GC
     if(forced_target == Target::CL)
     {
-        // Allocate const tensors and call accessors
-        detail::allocate_const_tensors(graph);
-        detail::call_all_const_node_accessors(graph);
-
         // Prepare graph
         detail::prepare_all_tasks(workload);
+    }
 
-        // Allocate all tensors
-        detail::allocate_all_tensors(graph);
-
-        // Finalize Graph context
-        ctx.finalize();
-
-        // Register graph
-        _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
+    // Setup tensor memory (Allocate all tensors or setup transition manager)
+    if(ctx.config().use_transition_memory_manager)
+    {
+        detail::configure_transition_manager(graph, ctx, workload);
     }
     else
     {
-        // Allocate all tensors
         detail::allocate_all_tensors(graph);
+    }
 
-        // Call accessors on all Const nodes
-        detail::call_all_const_node_accessors(graph);
-
-        // Finalize Graph context
-        ctx.finalize();
+    // Finalize Graph context
+    ctx.finalize();
 
-        // Register graph
-        _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
+    // Register graph
+    _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
 
+    // TODO (COMPMID-920) : Update prepare for NEON/GC
+    if(forced_target != Target::CL)
+    {
         // Make first run
         execute_graph(graph);
 
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 37cbcd72d7..7f2be674f6 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -37,6 +37,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -107,8 +108,10 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     if(ctx.memory_management_ctx(Target::CL) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::CL;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::CL;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_group = std::make_shared<CLMemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -119,6 +122,11 @@ bool CLDeviceBackend::is_backend_supported()
     return arm_compute::opencl_is_available();
 }
 
+IAllocator *CLDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor
diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index a1bc8a1dd3..016dca753b 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp
@@ -32,11 +32,12 @@ namespace graph
 namespace backends
 {
 CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
-    : _sub_tensor()
+    : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
     auto parent_tensor = arm_compute::utils::cast::polymorphic_downcast<ICLTensor *>(&parent_handle->tensor());
     _sub_tensor        = arm_compute::CLSubTensor(parent_tensor, shape, coords, extend_parent);
+    _parent_handle     = parent_handle;
 }
 
 void CLSubTensorHandle::allocate()
@@ -44,14 +45,15 @@ void CLSubTensorHandle::allocate()
     // noop
 }
 
-const arm_compute::ITensor &CLSubTensorHandle::tensor() const
+void CLSubTensorHandle::free()
 {
-    return _sub_tensor;
+    // noop
 }
 
-arm_compute::ITensor &CLSubTensorHandle::tensor()
+void CLSubTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _sub_tensor;
+    ARM_COMPUTE_UNUSED(mg);
+    // noop
 }
 
 void CLSubTensorHandle::map(bool blocking)
@@ -69,10 +71,31 @@ void CLSubTensorHandle::release_if_unused()
     // noop
 }
 
+const arm_compute::ITensor &CLSubTensorHandle::tensor() const
+{
+    return _sub_tensor;
+}
+
+arm_compute::ITensor &CLSubTensorHandle::tensor()
+{
+    return _sub_tensor;
+}
+
+ITensorHandle *CLSubTensorHandle::parent_handle()
+{
+    ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr);
+    return _parent_handle->parent_handle();
+}
+
 bool CLSubTensorHandle::is_subtensor() const
 {
     return true;
 }
+
+Target CLSubTensorHandle::target() const
+{
+    return Target::CL;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index 563c4d9ac6..219d9d0301 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp
@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/CL/CLTensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@ void CLTensorHandle::allocate()
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &CLTensorHandle::tensor() const
+void CLTensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &CLTensorHandle::tensor()
+void CLTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *cl_mg = arm_compute::utils::cast::polymorphic_downcast<CLMemoryGroup *>(mg);
+        cl_mg->manage(&_tensor);
+    }
 }
 
 void CLTensorHandle::map(bool blocking)
@@ -69,10 +76,30 @@ void CLTensorHandle::release_if_unused()
     }
 }
 
+const arm_compute::ITensor &CLTensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &CLTensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *CLTensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool CLTensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target CLTensorHandle::target() const
+{
+    return Target::CL;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 0185598965..770cca5d42 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -36,6 +36,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -68,8 +69,10 @@ void GCDeviceBackend::setup_backend_context(GraphContext &ctx)
     if(ctx.memory_management_ctx(Target::GC) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::GC;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::GC;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_group = std::make_shared<GCMemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -80,6 +83,11 @@ bool GCDeviceBackend::is_backend_supported()
     return arm_compute::opengles31_is_available();
 }
 
+IAllocator *GCDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor
diff --git a/src/graph/backends/GLES/GCTensorHandle.cpp b/src/graph/backends/GLES/GCTensorHandle.cpp
index ae7c778130..4e5c652120 100644
--- a/src/graph/backends/GLES/GCTensorHandle.cpp
+++ b/src/graph/backends/GLES/GCTensorHandle.cpp
@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/GLES/GCTensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@ void GCTensorHandle::allocate()
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &GCTensorHandle::tensor() const
+void GCTensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &GCTensorHandle::tensor()
+void GCTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *gc_mg = arm_compute::utils::cast::polymorphic_downcast<GCMemoryGroup *>(mg);
+        gc_mg->manage(&_tensor);
+    }
 }
 
 void GCTensorHandle::map(bool blocking)
@@ -69,10 +76,30 @@ void GCTensorHandle::release_if_unused()
     }
 }
 
+const arm_compute::ITensor &GCTensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &GCTensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *GCTensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool GCTensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target GCTensorHandle::target() const
+{
+    return Target::GC;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index def6c39003..7c2db40260 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -37,6 +37,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/Allocator.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/OffsetLifetimeManager.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -74,8 +75,10 @@ void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
     if(ctx.memory_management_ctx(Target::NEON) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::NEON;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::NEON;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Offset);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Offset);
+        mm_ctx.cross_group = std::make_shared<MemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -86,6 +89,11 @@ bool NEDeviceBackend::is_backend_supported()
     return true;
 }
 
+IAllocator *NEDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor
diff --git a/src/graph/backends/NEON/NESubTensorHandle.cpp b/src/graph/backends/NEON/NESubTensorHandle.cpp
index c48ba6b9d6..c0acedd9f2 100644
--- a/src/graph/backends/NEON/NESubTensorHandle.cpp
+++ b/src/graph/backends/NEON/NESubTensorHandle.cpp
@@ -30,10 +30,11 @@ namespace graph
 namespace backends
 {
 NESubTensorHandle::NESubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
-    : _sub_tensor()
+    : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
-    _sub_tensor = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent);
+    _sub_tensor    = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent);
+    _parent_handle = parent_handle;
 }
 
 void NESubTensorHandle::allocate()
@@ -41,14 +42,15 @@ void NESubTensorHandle::allocate()
     // noop
 }
 
-const arm_compute::ITensor &NESubTensorHandle::tensor() const
+void NESubTensorHandle::free()
 {
-    return _sub_tensor;
+    // noop
 }
 
-arm_compute::ITensor &NESubTensorHandle::tensor()
+void NESubTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _sub_tensor;
+    ARM_COMPUTE_UNUSED(mg);
+    // noop
 }
 
 void NESubTensorHandle::map(bool blocking)
@@ -66,10 +68,31 @@ void NESubTensorHandle::release_if_unused()
     // noop
 }
 
+const arm_compute::ITensor &NESubTensorHandle::tensor() const
+{
+    return _sub_tensor;
+}
+
+arm_compute::ITensor &NESubTensorHandle::tensor()
+{
+    return _sub_tensor;
+}
+
+ITensorHandle *NESubTensorHandle::parent_handle()
+{
+    ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr);
+    return _parent_handle->parent_handle();
+}
+
 bool NESubTensorHandle::is_subtensor() const
 {
     return true;
 }
+
+Target NESubTensorHandle::target() const
+{
+    return Target::NEON;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index 8508ac9511..5892116caf 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp
@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@ void NETensorHandle::allocate()
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &NETensorHandle::tensor() const
+void NETensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &NETensorHandle::tensor()
+void NETensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *ne_mg = arm_compute::utils::cast::polymorphic_downcast<MemoryGroup *>(mg);
+        ne_mg->manage(&_tensor);
+    }
 }
 
 void NETensorHandle::map(bool blocking)
@@ -68,10 +75,30 @@ void NETensorHandle::release_if_unused()
     }
 }
 
+const arm_compute::ITensor &NETensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &NETensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *NETensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool NETensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target NETensorHandle::target() const
+{
+    return Target::NEON;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
 \ No newline at end of file
diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
new file mode 100644
index 0000000000..7fc5ca0576
--- /dev/null
+++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/GraphManager.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/Types.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <algorithm>
+#include <map>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace detail
+{
+namespace
+{
+using HandleCountPair     = std::pair<ITensorHandle *, unsigned int>;
+using HandleCounter       = std::map<HandleCountPair::first_type, HandleCountPair::second_type>;
+using TargetHandleCounter = std::map<Target, HandleCounter>;
+
+/** Holds managed IO tensor handles if a task */
+struct TaskHandles
+{
+    std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> input_handles  = {}; /**< Input handles to a task */
+    std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> output_handles = {}; /**< Output handles of a task */
+};
+
+/** Returns memory group depending on handle backend type
+ *
+ * @param[in] ctx    Graph context
+ * @param[in] handle Tensor handle
+ *
+ * @return Memory groupb
+ */
+IMemoryGroup *get_memory_group_from_handle(GraphContext &ctx, ITensorHandle *handle)
+{
+    ARM_COMPUTE_ERROR_ON(handle == nullptr);
+    return ctx.memory_management_ctx(handle->target())->cross_group.get();
+}
+
+/** Get handles of const tensors of graph
+ *
+ * @param[in] g Graph
+ *
+ * @return Handles of const tensors of graph
+ */
+std::set<ITensorHandle *> get_const_handles(const Graph &g)
+{
+    std::set<NodeType> const_node_types = { NodeType::Input, NodeType::Output, NodeType::Const };
+
+    std::set<ITensorHandle *> const_tensors;
+
+    auto &nodes = g.nodes();
+    for(auto &node : nodes)
+    {
+        // If its a const node:
+        if(node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types))
+        {
+            // TODO (geopin01) : Create IO iterator wrappers
+            // Add all its inputs / outputs to the list of constant handles
+            for(unsigned int i = 0; i < node->num_inputs(); ++i)
+            {
+                if(node->input(i) != nullptr)
+                {
+                    const_tensors.insert(node->input(i)->handle()->parent_handle());
+                }
+            }
+            for(unsigned int i = 0; i < node->num_outputs(); ++i)
+            {
+                if(node->output(i) != nullptr)
+                {
+                    const_tensors.insert(node->output(i)->handle()->parent_handle());
+                }
+            }
+        }
+    }
+
+    return const_tensors;
+}
+
+/** Builds a list of all the transition handles (Handles that are used to link two nodes)
+ *
+ * @param[in] ctx           Graph context
+ * @param[in] task          Workload task
+ * @param[in] const_tensors Constant tensors
+ *
+ * @return List of transition handles
+ */
+TaskHandles get_transition_handles(GraphContext                    &ctx,
+                                   ExecutionTask                   &task,
+                                   const std::set<ITensorHandle *> &const_tensors)
+{
+    ARM_COMPUTE_ERROR_ON(task.node == nullptr || task.task == nullptr);
+    INode &node = *task.node;
+
+    TaskHandles transition_handles;
+
+    // Add input handles
+    for(unsigned int i = 0; i < node.input_edges().size(); ++i)
+    {
+        Edge *input_edge = node.input_edge(i);
+        // If this input is the output of another node
+        if(input_edge != nullptr && input_edge->tensor() != nullptr && const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors))
+        {
+            // Then add it to the list of transition buffers
+            ITensorHandle *tensor_handle = input_edge->tensor()->handle()->parent_handle();
+            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            transition_handles.input_handles.push_back(std::make_pair(tensor_handle, mm_group));
+        }
+    }
+
+    // Add output handles
+    for(unsigned int i = 0; i < node.num_outputs(); ++i)
+    {
+        Tensor *output_tensor = node.output(i);
+        // If this output is used as an input for another node
+        if(output_tensor != nullptr && const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors))
+        {
+            ITensorHandle *tensor_handle = output_tensor->handle()->parent_handle();
+            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            transition_handles.output_handles.push_back(std::make_pair(tensor_handle, mm_group));
+        }
+    }
+
+    return transition_handles;
+}
+
+/** Counts handles refcount for each input handle of each target
+ *
+ * @param[in]     task           Execution task containing the managed handles
+ * @param[in,out] handle_counter Data structure that keeps the handles reference count
+ */
+void count_input_handles_per_target(const TaskHandles &task_handles, TargetHandleCounter &handle_counter)
+{
+    for(const auto &handle : task_handles.input_handles)
+    {
+        ITensorHandle *key            = handle.first;
+        HandleCounter &target_counter = handle_counter[key->target()];
+        if(target_counter.find(key) == std::end(target_counter))
+        {
+            target_counter.emplace(std::make_pair(key, 1));
+        }
+        else
+        {
+            ++target_counter[key];
+        }
+    }
+}
+
+/** Calculates the lifetime of each tensor handle
+ *
+ * @param[in, out] tasks_handles Tensor handles for each task
+ * @param[in]      hc            Data structure that keeps the handles reference count
+ */
+void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const HandleCounter &hc)
+{
+    // Identify max number of tensors in flight
+    HandleCounter tensors_in_flight;
+
+    // Acquires the given handles and sets them as in flight if they aren't already
+    auto acquire = [&](std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> &handles)
+    {
+        for(auto &handle : handles)
+        {
+            ITensorHandle *parent_handle = handle.first;
+            ARM_COMPUTE_ERROR_ON(parent_handle == nullptr);
+            // If the tensor is not already in flight:
+            if(tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight))
+            {
+                ARM_COMPUTE_ERROR_ON(hc.find(parent_handle) == std::end(hc));
+                // Then add it to the list of in flight tensors
+                tensors_in_flight.insert(std::make_pair(parent_handle, hc.at(parent_handle)));
+                // Start of allocation's lifetime
+                parent_handle->manage(handle.second);
+            }
+        }
+    };
+
+    for(auto &task_handle : tasks_handles)
+    {
+        // Marking all the input and output tensors of the task as in flight
+        acquire(task_handle.input_handles);
+        acquire(task_handle.output_handles);
+
+        // Releasing the input tensors
+        for(auto &input_handle : task_handle.input_handles)
+        {
+            ITensorHandle *ihandle = input_handle.first;
+            ARM_COMPUTE_ERROR_ON(ihandle == nullptr);
+            ARM_COMPUTE_ERROR_ON(tensors_in_flight.find(ihandle) == std::end(tensors_in_flight));
+            --tensors_in_flight[ihandle];
+            if(tensors_in_flight[ihandle] <= 0)
+            {
+                // Remove tensor for tensors in flight
+                tensors_in_flight.erase(ihandle);
+                // End of allocation's lifetime
+                ihandle->allocate();
+            }
+        }
+    }
+}
+} // namespace
+
+void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload &workload)
+{
+    // Get const tensors (un-managed)
+    std::set<ITensorHandle *> const_tensors = get_const_handles(g);
+
+    std::vector<TaskHandles> tasks_handles;
+    TargetHandleCounter      target_handle_count;
+
+    // Count handles
+    for(auto &task : workload.tasks)
+    {
+        // Populates IO handles
+        tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors));
+
+        // Count handles
+        count_input_handles_per_target(tasks_handles.back(), target_handle_count);
+    }
+
+    // Setup memory managers
+    for(auto &hc : target_handle_count)
+    {
+        MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first);
+        if(mm_ctx != nullptr)
+        {
+            if(mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr)
+            {
+                // Manage and allocate tensors
+                configure_handle_lifetime(tasks_handles, hc.second);
+            }
+        }
+    }
+}
+} // namespace detail
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index c1304436f6..c370fdf916 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -143,7 +143,9 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
 {
     ExecutionWorkload workload;
     workload.graph = &g;
-    auto &nodes    = g.nodes();
+    workload.ctx   = &ctx;
+
+    auto &nodes = g.nodes();
 
     // Create tasks
     for(auto &node : nodes)
@@ -235,10 +237,31 @@ void prepare_all_tasks(ExecutionWorkload &workload)
 
 void call_all_tasks(ExecutionWorkload &workload)
 {
+    ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr);
+
+    // Acquire memory for the transition buffers
+    for(auto &mm_ctx : workload.ctx->memory_managers())
+    {
+        if(mm_ctx.second.cross_group != nullptr)
+        {
+            mm_ctx.second.cross_group->acquire();
+        }
+    }
+
+    // Execute tasks
     for(auto &task : workload.tasks)
     {
         task();
     }
+
+    // Release memory for the transition buffers
+    for(auto &mm_ctx : workload.ctx->memory_managers())
+    {
+        if(mm_ctx.second.cross_group != nullptr)
+        {
+            mm_ctx.second.cross_group->release();
+        }
+    }
 }
 
 void call_all_output_node_accessors(ExecutionWorkload &workload)
diff --git a/src/runtime/BlobLifetimeManager.cpp b/src/runtime/BlobLifetimeManager.cpp
index 3ca5071d91..2a4ab6ec0d 100644
--- a/src/runtime/BlobLifetimeManager.cpp
+++ b/src/runtime/BlobLifetimeManager.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,15 +57,15 @@ void BlobLifetimeManager::update_blobs_and_mappings()
     ARM_COMPUTE_ERROR_ON(!are_all_finalized());
     ARM_COMPUTE_ERROR_ON(_active_group == nullptr);
 
-    // Sort active group requirements in descending order.
-    std::sort(std::begin(_active_elements), std::end(_active_elements), [](const Element & a, const Element & b)
+    // Sort free blobs requirements in descending order.
+    _free_blobs.sort([](const Blob & ba, const Blob & bb)
     {
-        return a.size > b.size;
+        return ba.max_size > bb.max_size;
     });
     std::vector<size_t> group_sizes;
-    std::transform(std::begin(_active_elements), std::end(_active_elements), std::back_inserter(group_sizes), [](const Element & e)
+    std::transform(std::begin(_free_blobs), std::end(_free_blobs), std::back_inserter(group_sizes), [](const Blob & b)
     {
-        return e.size;
+        return b.max_size;
     });
 
     // Update blob sizes
@@ -80,8 +80,14 @@ void BlobLifetimeManager::update_blobs_and_mappings()
     // Calculate group mappings
     auto &group_mappings = _active_group->mappings();
     int   blob_idx       = 0;
-    for(auto &e : _active_elements)
+    for(auto &free_blob : _free_blobs)
     {
-        group_mappings[e.handle] = blob_idx++;
+        for(auto &bound_element_id : free_blob.bound_elements)
+        {
+            ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements));
+            Element &bound_element               = _active_elements[bound_element_id];
+            group_mappings[bound_element.handle] = blob_idx;
+        }
+        ++blob_idx;
     }
 }
diff --git a/src/runtime/ISimpleLifetimeManager.cpp b/src/runtime/ISimpleLifetimeManager.cpp
index 2c64475b39..faaff8a63e 100644
--- a/src/runtime/ISimpleLifetimeManager.cpp
+++ b/src/runtime/ISimpleLifetimeManager.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 using namespace arm_compute;
 
 ISimpleLifetimeManager::ISimpleLifetimeManager()
-    : _active_group(nullptr), _active_elements(), _finalized_groups()
+    : _active_group(nullptr), _active_elements(), _free_blobs(), _occupied_blobs(), _finalized_groups()
 {
 }
 
@@ -53,14 +53,21 @@ void ISimpleLifetimeManager::register_group(IMemoryGroup *group)
 void ISimpleLifetimeManager::start_lifetime(void *obj)
 {
     ARM_COMPUTE_ERROR_ON(obj == nullptr);
-    ARM_COMPUTE_ERROR_ON_MSG(std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e)
+    ARM_COMPUTE_ERROR_ON_MSG(_active_elements.find(obj) != std::end(_active_elements), "Memory object is already registered!");
+
+    // Check if there is a free blob
+    if(_free_blobs.empty())
+    {
+        _occupied_blobs.emplace_front(Blob{ obj, 0, { obj } });
+    }
+    else
     {
-        return obj == e.id;
-    }) != std::end(_active_elements),
-    "Memory object is already registered!");
+        _occupied_blobs.splice(std::begin(_occupied_blobs), _free_blobs, std::begin(_free_blobs));
+        _occupied_blobs.front().id = obj;
+    }
 
     // Insert object in groups and mark its finalized state to false
-    _active_elements.emplace_back(obj);
+    _active_elements.insert(std::make_pair(obj, obj));
 }
 
 void ISimpleLifetimeManager::end_lifetime(void *obj, void **handle, size_t size)
@@ -68,36 +75,50 @@ void ISimpleLifetimeManager::end_lifetime(void *obj, void **handle, size_t size)
     ARM_COMPUTE_ERROR_ON(obj == nullptr);
 
     // Find object
-    auto it = std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e)
+    auto active_object_it = _active_elements.find(obj);
+    ARM_COMPUTE_ERROR_ON(active_object_it == std::end(_active_elements));
+
+    // Update object fields and mark object as complete
+    Element &el = active_object_it->second;
+    el.handle   = handle;
+    el.size     = size;
+    el.status   = true;
+
+    // Find object in the occupied lists
+    auto occupied_blob_it = std::find_if(std::begin(_occupied_blobs), std::end(_occupied_blobs), [&obj](const Blob & b)
     {
-        return obj == e.id;
+        return obj == b.id;
     });
-    ARM_COMPUTE_ERROR_ON(it == std::end(_active_elements));
+    ARM_COMPUTE_ERROR_ON(occupied_blob_it == std::end(_occupied_blobs));
 
-    // Update object fields and mark object as complete
-    it->handle = handle;
-    it->size   = size;
-    it->status = true;
+    // Update occupied blob and return as free
+    occupied_blob_it->bound_elements.insert(obj);
+    occupied_blob_it->max_size = std::max(occupied_blob_it->max_size, size);
+    occupied_blob_it->id       = nullptr;
+    _free_blobs.splice(std::begin(_free_blobs), _occupied_blobs, occupied_blob_it);
 
     // Check if all object are finalized and reset active group
     if(are_all_finalized())
     {
-        // Update finalized groups
-        _finalized_groups[_active_group].insert(std::end(_finalized_groups[_active_group]), std::begin(_active_elements), std::end(_active_elements));
+        ARM_COMPUTE_ERROR_ON(!_occupied_blobs.empty());
 
         // Update blobs and group mappings
         update_blobs_and_mappings();
 
+        // Update finalized groups
+        _finalized_groups[_active_group] = std::move(_active_elements);
+
         // Reset state
         _active_elements.clear();
         _active_group = nullptr;
+        _free_blobs.clear();
     }
 }
 
 bool ISimpleLifetimeManager::are_all_finalized() const
 {
-    return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const Element e)
+    return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const std::pair<void *, Element> &e)
     {
-        return !e.status;
+        return !e.second.status;
     });
 }
diff --git a/src/runtime/OffsetLifetimeManager.cpp b/src/runtime/OffsetLifetimeManager.cpp
index 4540aeab28..d0b3bde724 100644
--- a/src/runtime/OffsetLifetimeManager.cpp
+++ b/src/runtime/OffsetLifetimeManager.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,19 +58,24 @@ void OffsetLifetimeManager::update_blobs_and_mappings()
     ARM_COMPUTE_ERROR_ON(_active_group == nullptr);
 
     // Update blob size
-    size_t max_group_size = std::accumulate(std::begin(_active_elements), std::end(_active_elements), static_cast<size_t>(0), [](size_t s, const Element & e)
+    size_t max_group_size = std::accumulate(std::begin(_free_blobs), std::end(_free_blobs), static_cast<size_t>(0), [](size_t s, const Blob & b)
     {
-        return s + e.size;
+        return s + b.max_size;
     });
     _blob = std::max(_blob, max_group_size);
 
     // Calculate group mappings
     auto &group_mappings = _active_group->mappings();
     size_t offset         = 0;
-    for(auto &e : _active_elements)
+    for(auto &free_blob : _free_blobs)
     {
-        group_mappings[e.handle] = offset;
-        offset += e.size;
+        for(auto &bound_element_id : free_blob.bound_elements)
+        {
+            ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements));
+            Element &bound_element               = _active_elements[bound_element_id];
+            group_mappings[bound_element.handle] = offset;
+        }
+        offset += free_blob.max_size;
         ARM_COMPUTE_ERROR_ON(offset > _blob);
     }
 }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-05-03 20:47:16 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:51:50 +0000
commit	3d1489de593574e65ef1e64a7ae64e4e56c2978b (patch)
tree	f87f3df521cb5ed8bd383dad89cbeb92c49670ac /src
parent	54d6fae4dbb4f556cc5ec484c51681ad84c015a7 (diff)
download	ComputeLibrary-3d1489de593574e65ef1e64a7ae64e4e56c2978b.tar.gz