From 3d1489de593574e65ef1e64a7ae64e4e56c2978b Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 May 2018 20:47:16 +0100 Subject: COMPMID-605: Transition buffer memory manager Change-Id: Ide7c6124eb19f13f15f517e62d705646a0cd1ecd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130184 Reviewed-by: Georgios Pinitas Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/graph/GraphContext.h | 11 +- arm_compute/graph/IDeviceBackend.h | 5 + arm_compute/graph/ITensorHandle.h | 29 ++- arm_compute/graph/Types.h | 8 +- arm_compute/graph/Workload.h | 17 +- arm_compute/graph/backends/CL/CLDeviceBackend.h | 1 + arm_compute/graph/backends/CL/CLSubTensorHandle.h | 21 +- arm_compute/graph/backends/CL/CLTensorHandle.h | 14 +- arm_compute/graph/backends/GLES/GCDeviceBackend.h | 1 + arm_compute/graph/backends/GLES/GCTensorHandle.h | 14 +- arm_compute/graph/backends/NEON/NEDeviceBackend.h | 1 + .../graph/backends/NEON/NESubTensorHandle.h | 21 +- arm_compute/graph/backends/NEON/NETensorHandle.h | 14 +- arm_compute/graph/backends/Utils.h | 2 +- .../graph/detail/CrossLayerMemoryManagerHelpers.h | 52 ++++ arm_compute/runtime/ISimpleLifetimeManager.h | 20 +- examples/graph_alexnet.cpp | 3 +- examples/graph_googlenet.cpp | 3 +- examples/graph_inception_v3.cpp | 3 +- examples/graph_inception_v4.cpp | 3 +- examples/graph_lenet.cpp | 3 +- examples/graph_mobilenet.cpp | 3 +- examples/graph_mobilenet_qasymm8.cpp | 3 +- examples/graph_resnet50.cpp | 3 +- examples/graph_squeezenet.cpp | 3 +- examples/graph_squeezenet_v1_1.cpp | 3 +- examples/graph_vgg16.cpp | 3 +- examples/graph_vgg19.cpp | 3 +- src/graph/GraphContext.cpp | 15 +- src/graph/GraphManager.cpp | 41 ++-- src/graph/backends/CL/CLDeviceBackend.cpp | 12 +- src/graph/backends/CL/CLSubTensorHandle.cpp | 33 ++- src/graph/backends/CL/CLTensorHandle.cpp | 35 ++- src/graph/backends/GLES/GCDeviceBackend.cpp | 12 +- src/graph/backends/GLES/GCTensorHandle.cpp | 35 ++- src/graph/backends/NEON/NEDeviceBackend.cpp | 12 +- src/graph/backends/NEON/NESubTensorHandle.cpp | 35 ++- src/graph/backends/NEON/NETensorHandle.cpp | 35 ++- .../detail/CrossLayerMemoryManagerHelpers.cpp | 269 +++++++++++++++++++++ src/graph/detail/ExecutionHelpers.cpp | 25 +- src/runtime/BlobLifetimeManager.cpp | 22 +- src/runtime/ISimpleLifetimeManager.cpp | 57 +++-- src/runtime/OffsetLifetimeManager.cpp | 17 +- 43 files changed, 762 insertions(+), 160 deletions(-) create mode 100644 arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h create mode 100644 src/graph/detail/CrossLayerMemoryManagerHelpers.cpp diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h index 2f9ab665ce..1831cc2c8b 100644 --- a/arm_compute/graph/GraphContext.h +++ b/arm_compute/graph/GraphContext.h @@ -38,8 +38,10 @@ namespace graph /** Contains structs required for memory management */ struct MemoryManagerContext { - Target target = { Target::UNSPECIFIED }; /**< Target */ - std::shared_ptr mm = { nullptr }; /**< Memory manager */ + Target target = { Target::UNSPECIFIED }; /**< Target */ + std::shared_ptr intra_mm = { nullptr }; /**< Intra-function memory manager */ + std::shared_ptr cross_mm = { nullptr }; /**< Cross-function memory manager */ + std::shared_ptr cross_group = { nullptr }; /**< Cross-function memory group */ }; /** Graph context **/ @@ -82,6 +84,11 @@ public: * @return Management context for the target if exists else nullptr */ MemoryManagerContext *memory_management_ctx(Target target); + /** Gets the memory managers map + * + * @return Memory manager contexts + */ + std::map &memory_managers(); /** Finalizes memory managers in graph context */ void finalize(); diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h index fa6fbae1ea..f28cb1ab42 100644 --- a/arm_compute/graph/IDeviceBackend.h +++ b/arm_compute/graph/IDeviceBackend.h @@ -61,6 +61,11 @@ public: * @return True if the backend is supported else false */ virtual bool is_backend_supported() = 0; + /** Gets a backend memory allocator + * + * @return Backend memory allocator + */ + virtual IAllocator *backend_allocator() = 0; /** Create a backend Tensor * * @param[in] tensor The tensor we want to create a backend tensor for diff --git a/arm_compute/graph/ITensorHandle.h b/arm_compute/graph/ITensorHandle.h index cc7132e316..261ebf5474 100644 --- a/arm_compute/graph/ITensorHandle.h +++ b/arm_compute/graph/ITensorHandle.h @@ -25,9 +25,13 @@ #define __ARM_COMPUTE_GRAPH_ITENSORHANDLE_H__ #include "arm_compute/core/ITensor.h" +#include "arm_compute/graph/Types.h" namespace arm_compute { +// Forward declarations +class IMemoryGroup; + namespace graph { /** Tensor handle interface object */ @@ -38,10 +42,13 @@ public: virtual ~ITensorHandle() = default; /** Allocates backend memory for the handle */ virtual void allocate() = 0; - /** Backend tensor object accessor */ - virtual arm_compute::ITensor &tensor() = 0; - /** Backend tensor object const accessor */ - virtual const arm_compute::ITensor &tensor() const = 0; + /** Allocates backend memory for the handle */ + virtual void free() = 0; + /** Set backend tensor to be managed by a memory group + * + * @param[in] mg Memory group + */ + virtual void manage(IMemoryGroup *mg) = 0; /** Maps backend tensor object * * @param[in] blocking Flags if the mapping operations should be blocking @@ -58,11 +65,25 @@ public: * on the other hand if a sub-tensor is marked as unused then the parent tensor won't be released */ virtual void release_if_unused() = 0; + /** Backend tensor object accessor */ + virtual arm_compute::ITensor &tensor() = 0; + /** Backend tensor object const accessor */ + virtual const arm_compute::ITensor &tensor() const = 0; + /** Return the parent tensor handle if is a subtensor else this + * + * @return Parent tensor handle + */ + virtual ITensorHandle *parent_handle() = 0; /** Checks if a backing tensor is a sub-tensor object or not * * @return True if the backend tensor is a sub-tensor else false */ virtual bool is_subtensor() const = 0; + /** Returns target type + * + * @return Target type + */ + virtual Target target() const = 0; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 02e5d92983..b195ed7eda 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -76,10 +76,10 @@ class TensorDescriptor; /** Graph configuration structure */ struct GraphConfig { - bool use_function_memory_manager{ false }; /**< Use a memory manager to manage per-funcion auxilary memory */ - bool use_transition_memory_manager{ false }; /**< Use a memory manager to manager transition buffer memory */ - bool use_tuner{ false }; /**< Use a tuner in tunable backends */ - int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */ + bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */ + bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */ + bool use_tuner{ false }; /**< Use a tuner in tunable backends */ + int num_threads{ -1 }; /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */ }; /**< Device target types */ diff --git a/arm_compute/graph/Workload.h b/arm_compute/graph/Workload.h index 35066c474d..e9368eefd0 100644 --- a/arm_compute/graph/Workload.h +++ b/arm_compute/graph/Workload.h @@ -24,7 +24,9 @@ #ifndef __ARM_COMPUTE_GRAPH_WORKLOAD_H__ #define __ARM_COMPUTE_GRAPH_WORKLOAD_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryGroup.h" #include #include @@ -68,10 +70,8 @@ public: struct ExecutionTask { // TODO (geopin01) : Support vector of functions? - std::unique_ptr task = {}; /**< Task to execute */ - INode *node = {}; /**< Node bound to this workload */ - std::vector commit_handles = {}; /**< Handles needs to sync for this task to execute */ - std::vector release_handles = {}; /**< Handles that can be released after this node execution */ + std::unique_ptr task = {}; /**< Task to execute */ + INode *node = {}; /**< Node bound to this workload */ /** Function operator */ void operator()(); @@ -83,10 +83,11 @@ struct ExecutionTask /** Execution workload */ struct ExecutionWorkload { - std::vector inputs = {}; /**< Input handles */ - std::vector outputs = {}; /**< Output handles */ - std::vector tasks = {}; /**< Execution workload */ - Graph *graph = nullptr; /**< Graph bound to the workload */ + std::vector inputs = {}; /**< Input handles */ + std::vector outputs = {}; /**< Output handles */ + std::vector tasks = {}; /**< Execution workload */ + Graph *graph = { nullptr }; /**< Graph bound to the workload */ + GraphContext *ctx = { nullptr }; /**< Graph execution context */ }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index 5adbe0e1a8..ab39d0fb1b 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -55,6 +55,7 @@ public: void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; + IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; std::unique_ptr create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override; std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; diff --git a/arm_compute/graph/backends/CL/CLSubTensorHandle.h b/arm_compute/graph/backends/CL/CLSubTensorHandle.h index 4be5842c70..0c515a1c53 100644 --- a/arm_compute/graph/backends/CL/CLSubTensorHandle.h +++ b/arm_compute/graph/backends/CL/CLSubTensorHandle.h @@ -52,18 +52,27 @@ public: CLSubTensorHandle(CLSubTensorHandle &&) = default; /** Allow instances of this class to be moved */ CLSubTensorHandle &operator=(CLSubTensorHandle &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSubTensorHandle(const CLSubTensorHandle &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSubTensorHandle &operator=(const CLSubTensorHandle &) = delete; // Inherited overridden methods - void allocate() override; + void allocate() override; + void free() override; + void manage(IMemoryGroup *mg) override; + void map(bool blocking) override; + void unmap() override; + void release_if_unused() override; arm_compute::ITensor &tensor() override; const arm_compute::ITensor &tensor() const override; - void map(bool blocking) override; - void unmap() override; - void release_if_unused() override; - bool is_subtensor() const override; + ITensorHandle *parent_handle() override; + bool is_subtensor() const override; + Target target() const override; private: - arm_compute::CLSubTensor _sub_tensor; /**< Backend Sub-Tensor */ + arm_compute::CLSubTensor _sub_tensor; /**< Backend Sub-Tensor */ + ITensorHandle *_parent_handle; /**< Parent handle */ }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/backends/CL/CLTensorHandle.h b/arm_compute/graph/backends/CL/CLTensorHandle.h index 8f5a70cbbb..23997325d8 100644 --- a/arm_compute/graph/backends/CL/CLTensorHandle.h +++ b/arm_compute/graph/backends/CL/CLTensorHandle.h @@ -51,13 +51,17 @@ public: CLTensorHandle &operator=(CLTensorHandle &&) = default; // Inherited overridden methods - void allocate() override; + void allocate() override; + void free() override; + void manage(IMemoryGroup *mg) override; + void map(bool blocking) override; + void unmap() override; + void release_if_unused() override; arm_compute::ITensor &tensor() override; const arm_compute::ITensor &tensor() const override; - void map(bool blocking) override; - void unmap() override; - void release_if_unused() override; - bool is_subtensor() const override; + ITensorHandle *parent_handle() override; + bool is_subtensor() const override; + Target target() const override; private: arm_compute::CLTensor _tensor; /**< Backend Tensor */ diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h index be81a8f1f2..dc0e2b07dc 100644 --- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h +++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h @@ -45,6 +45,7 @@ public: void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; + IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; std::unique_ptr create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override; std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; diff --git a/arm_compute/graph/backends/GLES/GCTensorHandle.h b/arm_compute/graph/backends/GLES/GCTensorHandle.h index 774268fd3f..29b0319d15 100644 --- a/arm_compute/graph/backends/GLES/GCTensorHandle.h +++ b/arm_compute/graph/backends/GLES/GCTensorHandle.h @@ -51,13 +51,17 @@ public: GCTensorHandle &operator=(GCTensorHandle &&) = default; // Inherited overridden methods - void allocate() override; + void allocate() override; + void free() override; + void manage(IMemoryGroup *mg) override; + void map(bool blocking) override; + void unmap() override; + void release_if_unused() override; arm_compute::ITensor &tensor() override; const arm_compute::ITensor &tensor() const override; - void map(bool blocking) override; - void unmap() override; - void release_if_unused() override; - bool is_subtensor() const override; + ITensorHandle *parent_handle() override; + bool is_subtensor() const override; + Target target() const override; private: arm_compute::GCTensor _tensor; /**< Backend Tensor */ diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h index b23c83adea..c1e2e0c078 100644 --- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h +++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h @@ -44,6 +44,7 @@ public: void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; + IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; std::unique_ptr create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override; std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; diff --git a/arm_compute/graph/backends/NEON/NESubTensorHandle.h b/arm_compute/graph/backends/NEON/NESubTensorHandle.h index 11dcec60f3..101d3e6644 100644 --- a/arm_compute/graph/backends/NEON/NESubTensorHandle.h +++ b/arm_compute/graph/backends/NEON/NESubTensorHandle.h @@ -52,18 +52,27 @@ public: NESubTensorHandle(NESubTensorHandle &&) = default; /** Allow instances of this class to be moved */ NESubTensorHandle &operator=(NESubTensorHandle &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESubTensorHandle(const NESubTensorHandle &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESubTensorHandle &operator=(const NESubTensorHandle &) = delete; // Inherited overridden methods - void allocate() override; + void allocate() override; + void free() override; + void manage(IMemoryGroup *mg) override; + void map(bool blocking) override; + void unmap() override; + void release_if_unused() override; arm_compute::ITensor &tensor() override; const arm_compute::ITensor &tensor() const override; - void map(bool blocking) override; - void unmap() override; - void release_if_unused() override; - bool is_subtensor() const override; + ITensorHandle *parent_handle() override; + bool is_subtensor() const override; + Target target() const override; private: - arm_compute::SubTensor _sub_tensor; /**< Backend Sub-Tensor */ + arm_compute::SubTensor _sub_tensor; /**< Backend Sub-Tensor */ + ITensorHandle *_parent_handle; /**< Parent handle */ }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/backends/NEON/NETensorHandle.h b/arm_compute/graph/backends/NEON/NETensorHandle.h index 06ccdd83cc..150e0c97c8 100644 --- a/arm_compute/graph/backends/NEON/NETensorHandle.h +++ b/arm_compute/graph/backends/NEON/NETensorHandle.h @@ -51,13 +51,17 @@ public: NETensorHandle &operator=(NETensorHandle &&) = default; // Inherited overridden methods - void allocate() override; + void allocate() override; + void free() override; + void manage(IMemoryGroup *mg) override; + void map(bool blocking) override; + void unmap() override; + void release_if_unused() override; arm_compute::ITensor &tensor() override; const arm_compute::ITensor &tensor() const override; - void map(bool blocking) override; - void unmap() override; - void release_if_unused() override; - bool is_subtensor() const override; + ITensorHandle *parent_handle() override; + bool is_subtensor() const override; + Target target() const override; private: arm_compute::Tensor _tensor; /**< Backend Tensor */ diff --git a/arm_compute/graph/backends/Utils.h b/arm_compute/graph/backends/Utils.h index b902d17c0e..c7a50d93c6 100644 --- a/arm_compute/graph/backends/Utils.h +++ b/arm_compute/graph/backends/Utils.h @@ -88,7 +88,7 @@ inline bool is_in_place_operation(void *input, void *output) inline std::shared_ptr get_memory_manager(GraphContext &ctx, Target target) { bool enabled = ctx.config().use_function_memory_manager && (ctx.memory_management_ctx(target) != nullptr); - return enabled ? ctx.memory_management_ctx(target)->mm : nullptr; + return enabled ? ctx.memory_management_ctx(target)->intra_mm : nullptr; } } // namespace backends } // namespace graph diff --git a/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h b/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h new file mode 100644 index 0000000000..b7424c8e88 --- /dev/null +++ b/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__ +#define __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__ + +#include + +namespace arm_compute +{ +namespace graph +{ +// Forward declarations +class Graph; +class GraphContext; +class ExecutionWorkload; +class ITransMemoryManager; +class ITensorHandle; + +namespace detail +{ +/** Configures transition manager and execution workload + * + * @param[in] g Graph to configure + * @param[in] ctx Graph context + * @param[in] workload Workload to configure + */ +void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload &workload); +} // namespace detail +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__ */ diff --git a/arm_compute/runtime/ISimpleLifetimeManager.h b/arm_compute/runtime/ISimpleLifetimeManager.h index 792ab0b558..7942e40f7f 100644 --- a/arm_compute/runtime/ISimpleLifetimeManager.h +++ b/arm_compute/runtime/ISimpleLifetimeManager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,9 @@ #include "arm_compute/runtime/Types.h" #include +#include #include +#include #include namespace arm_compute @@ -77,9 +79,19 @@ protected: bool status; /**< Lifetime status */ }; - IMemoryGroup *_active_group; /**< Active group */ - std::vector _active_elements; /**< A map that contains the active elements */ - std::map> _finalized_groups; /**< A map that contains the finalized groups */ + /** Blob struct */ + struct Blob + { + void *id; + size_t max_size; + std::set bound_elements; + }; + + IMemoryGroup *_active_group; /**< Active group */ + std::map _active_elements; /**< A map that contains the active elements */ + std::list _free_blobs; /**< Free blobs */ + std::list _occupied_blobs; /**< Occupied blobs */ + std::map> _finalized_groups; /**< A map that contains the finalized groups */ }; } // namespace arm_compute #endif /* __ARM_COMPUTE_ISIMPLELIFETIMEMANAGER_H__ */ diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 291a586315..45c2b56cc2 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -169,8 +169,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index 25e9e7f0cf..deafe5a822 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -135,8 +135,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index 01a4d0c49c..7fa0fc74fe 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -190,8 +190,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index 8f340354cd..4217c78554 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -159,8 +159,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); #else /* __aarch64__ */ using namespace arm_compute; diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp index 895d9aad4e..ea0916b317 100644 --- a/examples/graph_lenet.cpp +++ b/examples/graph_lenet.cpp @@ -109,8 +109,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 870e67daa5..813c0bfe1d 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -167,8 +167,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp index ddf6175c4a..7edd1822ae 100644 --- a/examples/graph_mobilenet_qasymm8.cpp +++ b/examples/graph_mobilenet_qasymm8.cpp @@ -169,8 +169,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index d0fa106bce..18a028d48c 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -126,8 +126,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index ff2487cd65..8ed43f707d 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -171,8 +171,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index e1a1f661fb..529f4fe80a 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -176,8 +176,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index 9c2763f649..44b4c4c3f2 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -230,8 +230,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index 0684309111..229112bb6f 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -243,8 +243,7 @@ public: // Finalize graph GraphConfig config; - config.use_function_memory_manager = true; - config.use_tuner = (target == 2); + config.use_tuner = (target == 2); graph.finalize(target_hint, config); } void do_run() override diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp index 6fc45c0aa7..3f311145bc 100644 --- a/src/graph/GraphContext.cpp +++ b/src/graph/GraphContext.cpp @@ -60,13 +60,24 @@ MemoryManagerContext *GraphContext::memory_management_ctx(Target target) return (_memory_managers.find(target) != std::end(_memory_managers)) ? &_memory_managers[target] : nullptr; } +std::map &GraphContext::memory_managers() +{ + return _memory_managers; +} + void GraphContext::finalize() { for(auto &mm_obj : _memory_managers) { - if(mm_obj.second.mm != nullptr) + // Finalize intra layer memory manager + if(mm_obj.second.intra_mm != nullptr) + { + mm_obj.second.intra_mm->finalize(); + } + // Finalize cross layer memory manager + if(mm_obj.second.cross_mm != nullptr) { - mm_obj.second.mm->finalize(); + mm_obj.second.cross_mm->finalize(); } } } diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp index aac6488311..a67e5b25d6 100644 --- a/src/graph/GraphManager.cpp +++ b/src/graph/GraphManager.cpp @@ -28,6 +28,7 @@ #include "arm_compute/graph/Logger.h" #include "arm_compute/graph/PassManager.h" #include "arm_compute/graph/Utils.h" +#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h" #include "arm_compute/graph/detail/ExecutionHelpers.h" namespace arm_compute @@ -72,41 +73,37 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager & auto workload = detail::configure_all_nodes(graph, ctx); ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!"); + // Allocate const tensors and call accessors + detail::allocate_const_tensors(graph); + detail::call_all_const_node_accessors(graph); + // TODO (COMPMID-920) : Update prepare for NEON/GC if(forced_target == Target::CL) { - // Allocate const tensors and call accessors - detail::allocate_const_tensors(graph); - detail::call_all_const_node_accessors(graph); - // Prepare graph detail::prepare_all_tasks(workload); + } - // Allocate all tensors - detail::allocate_all_tensors(graph); - - // Finalize Graph context - ctx.finalize(); - - // Register graph - _workloads.insert(std::make_pair(graph.id(), std::move(workload))); - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl); + // Setup tensor memory (Allocate all tensors or setup transition manager) + if(ctx.config().use_transition_memory_manager) + { + detail::configure_transition_manager(graph, ctx, workload); } else { - // Allocate all tensors detail::allocate_all_tensors(graph); + } - // Call accessors on all Const nodes - detail::call_all_const_node_accessors(graph); - - // Finalize Graph context - ctx.finalize(); + // Finalize Graph context + ctx.finalize(); - // Register graph - _workloads.insert(std::make_pair(graph.id(), std::move(workload))); - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl); + // Register graph + _workloads.insert(std::make_pair(graph.id(), std::move(workload))); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl); + // TODO (COMPMID-920) : Update prepare for NEON/GC + if(forced_target != Target::CL) + { // Make first run execute_graph(graph); diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index 37cbcd72d7..7f2be674f6 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -37,6 +37,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/BlobLifetimeManager.h" #include "arm_compute/runtime/CL/CLBufferAllocator.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/MemoryManagerOnDemand.h" #include "arm_compute/runtime/PoolManager.h" @@ -107,8 +108,10 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx) if(ctx.memory_management_ctx(Target::CL) == nullptr) { MemoryManagerContext mm_ctx; - mm_ctx.target = Target::CL; - mm_ctx.mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.target = Target::CL; + mm_ctx.intra_mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.cross_mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.cross_group = std::make_shared(mm_ctx.cross_mm); ctx.insert_memory_management_ctx(std::move(mm_ctx)); } @@ -119,6 +122,11 @@ bool CLDeviceBackend::is_backend_supported() return arm_compute::opencl_is_available(); } +IAllocator *CLDeviceBackend::backend_allocator() +{ + return &_allocator; +} + std::unique_ptr CLDeviceBackend::create_tensor(const Tensor &tensor) { // Get tensor descriptor diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp index a1bc8a1dd3..016dca753b 100644 --- a/src/graph/backends/CL/CLSubTensorHandle.cpp +++ b/src/graph/backends/CL/CLSubTensorHandle.cpp @@ -32,11 +32,12 @@ namespace graph namespace backends { CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent) - : _sub_tensor() + : _sub_tensor(), _parent_handle(nullptr) { ARM_COMPUTE_ERROR_ON(!parent_handle); auto parent_tensor = arm_compute::utils::cast::polymorphic_downcast(&parent_handle->tensor()); _sub_tensor = arm_compute::CLSubTensor(parent_tensor, shape, coords, extend_parent); + _parent_handle = parent_handle; } void CLSubTensorHandle::allocate() @@ -44,14 +45,15 @@ void CLSubTensorHandle::allocate() // noop } -const arm_compute::ITensor &CLSubTensorHandle::tensor() const +void CLSubTensorHandle::free() { - return _sub_tensor; + // noop } -arm_compute::ITensor &CLSubTensorHandle::tensor() +void CLSubTensorHandle::manage(IMemoryGroup *mg) { - return _sub_tensor; + ARM_COMPUTE_UNUSED(mg); + // noop } void CLSubTensorHandle::map(bool blocking) @@ -69,10 +71,31 @@ void CLSubTensorHandle::release_if_unused() // noop } +const arm_compute::ITensor &CLSubTensorHandle::tensor() const +{ + return _sub_tensor; +} + +arm_compute::ITensor &CLSubTensorHandle::tensor() +{ + return _sub_tensor; +} + +ITensorHandle *CLSubTensorHandle::parent_handle() +{ + ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr); + return _parent_handle->parent_handle(); +} + bool CLSubTensorHandle::is_subtensor() const { return true; } + +Target CLSubTensorHandle::target() const +{ + return Target::CL; +} } // namespace backends } // namespace graph } // namespace arm_compute \ No newline at end of file diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp index 563c4d9ac6..219d9d0301 100644 --- a/src/graph/backends/CL/CLTensorHandle.cpp +++ b/src/graph/backends/CL/CLTensorHandle.cpp @@ -23,6 +23,9 @@ */ #include "arm_compute/graph/backends/CL/CLTensorHandle.h" +#include "arm_compute/core/utils/misc/Cast.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" + namespace arm_compute { namespace graph @@ -40,14 +43,18 @@ void CLTensorHandle::allocate() _tensor.allocator()->allocate(); } -const arm_compute::ITensor &CLTensorHandle::tensor() const +void CLTensorHandle::free() { - return _tensor; + _tensor.allocator()->free(); } -arm_compute::ITensor &CLTensorHandle::tensor() +void CLTensorHandle::manage(IMemoryGroup *mg) { - return _tensor; + if(mg != nullptr) + { + auto *cl_mg = arm_compute::utils::cast::polymorphic_downcast(mg); + cl_mg->manage(&_tensor); + } } void CLTensorHandle::map(bool blocking) @@ -69,10 +76,30 @@ void CLTensorHandle::release_if_unused() } } +const arm_compute::ITensor &CLTensorHandle::tensor() const +{ + return _tensor; +} + +arm_compute::ITensor &CLTensorHandle::tensor() +{ + return _tensor; +} + +ITensorHandle *CLTensorHandle::parent_handle() +{ + return this; +} + bool CLTensorHandle::is_subtensor() const { return false; } + +Target CLTensorHandle::target() const +{ + return Target::CL; +} } // namespace backends } // namespace graph } // namespace arm_compute \ No newline at end of file diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp index 0185598965..770cca5d42 100644 --- a/src/graph/backends/GLES/GCDeviceBackend.cpp +++ b/src/graph/backends/GLES/GCDeviceBackend.cpp @@ -36,6 +36,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/BlobLifetimeManager.h" #include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" #include "arm_compute/runtime/MemoryManagerOnDemand.h" #include "arm_compute/runtime/PoolManager.h" @@ -68,8 +69,10 @@ void GCDeviceBackend::setup_backend_context(GraphContext &ctx) if(ctx.memory_management_ctx(Target::GC) == nullptr) { MemoryManagerContext mm_ctx; - mm_ctx.target = Target::GC; - mm_ctx.mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.target = Target::GC; + mm_ctx.intra_mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.cross_mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.cross_group = std::make_shared(mm_ctx.cross_mm); ctx.insert_memory_management_ctx(std::move(mm_ctx)); } @@ -80,6 +83,11 @@ bool GCDeviceBackend::is_backend_supported() return arm_compute::opengles31_is_available(); } +IAllocator *GCDeviceBackend::backend_allocator() +{ + return &_allocator; +} + std::unique_ptr GCDeviceBackend::create_tensor(const Tensor &tensor) { // Get tensor descriptor diff --git a/src/graph/backends/GLES/GCTensorHandle.cpp b/src/graph/backends/GLES/GCTensorHandle.cpp index ae7c778130..4e5c652120 100644 --- a/src/graph/backends/GLES/GCTensorHandle.cpp +++ b/src/graph/backends/GLES/GCTensorHandle.cpp @@ -23,6 +23,9 @@ */ #include "arm_compute/graph/backends/GLES/GCTensorHandle.h" +#include "arm_compute/core/utils/misc/Cast.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" + namespace arm_compute { namespace graph @@ -40,14 +43,18 @@ void GCTensorHandle::allocate() _tensor.allocator()->allocate(); } -const arm_compute::ITensor &GCTensorHandle::tensor() const +void GCTensorHandle::free() { - return _tensor; + _tensor.allocator()->free(); } -arm_compute::ITensor &GCTensorHandle::tensor() +void GCTensorHandle::manage(IMemoryGroup *mg) { - return _tensor; + if(mg != nullptr) + { + auto *gc_mg = arm_compute::utils::cast::polymorphic_downcast(mg); + gc_mg->manage(&_tensor); + } } void GCTensorHandle::map(bool blocking) @@ -69,10 +76,30 @@ void GCTensorHandle::release_if_unused() } } +const arm_compute::ITensor &GCTensorHandle::tensor() const +{ + return _tensor; +} + +arm_compute::ITensor &GCTensorHandle::tensor() +{ + return _tensor; +} + +ITensorHandle *GCTensorHandle::parent_handle() +{ + return this; +} + bool GCTensorHandle::is_subtensor() const { return false; } + +Target GCTensorHandle::target() const +{ + return Target::GC; +} } // namespace backends } // namespace graph } // namespace arm_compute \ No newline at end of file diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp index def6c39003..7c2db40260 100644 --- a/src/graph/backends/NEON/NEDeviceBackend.cpp +++ b/src/graph/backends/NEON/NEDeviceBackend.cpp @@ -37,6 +37,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/Allocator.h" #include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/MemoryManagerOnDemand.h" #include "arm_compute/runtime/OffsetLifetimeManager.h" #include "arm_compute/runtime/PoolManager.h" @@ -74,8 +75,10 @@ void NEDeviceBackend::setup_backend_context(GraphContext &ctx) if(ctx.memory_management_ctx(Target::NEON) == nullptr) { MemoryManagerContext mm_ctx; - mm_ctx.target = Target::NEON; - mm_ctx.mm = create_memory_manager(MemoryManagerAffinity::Buffer); + mm_ctx.target = Target::NEON; + mm_ctx.intra_mm = create_memory_manager(MemoryManagerAffinity::Offset); + mm_ctx.cross_mm = create_memory_manager(MemoryManagerAffinity::Offset); + mm_ctx.cross_group = std::make_shared(mm_ctx.cross_mm); ctx.insert_memory_management_ctx(std::move(mm_ctx)); } @@ -86,6 +89,11 @@ bool NEDeviceBackend::is_backend_supported() return true; } +IAllocator *NEDeviceBackend::backend_allocator() +{ + return &_allocator; +} + std::unique_ptr NEDeviceBackend::create_tensor(const Tensor &tensor) { // Get tensor descriptor diff --git a/src/graph/backends/NEON/NESubTensorHandle.cpp b/src/graph/backends/NEON/NESubTensorHandle.cpp index c48ba6b9d6..c0acedd9f2 100644 --- a/src/graph/backends/NEON/NESubTensorHandle.cpp +++ b/src/graph/backends/NEON/NESubTensorHandle.cpp @@ -30,10 +30,11 @@ namespace graph namespace backends { NESubTensorHandle::NESubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent) - : _sub_tensor() + : _sub_tensor(), _parent_handle(nullptr) { ARM_COMPUTE_ERROR_ON(!parent_handle); - _sub_tensor = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent); + _sub_tensor = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent); + _parent_handle = parent_handle; } void NESubTensorHandle::allocate() @@ -41,14 +42,15 @@ void NESubTensorHandle::allocate() // noop } -const arm_compute::ITensor &NESubTensorHandle::tensor() const +void NESubTensorHandle::free() { - return _sub_tensor; + // noop } -arm_compute::ITensor &NESubTensorHandle::tensor() +void NESubTensorHandle::manage(IMemoryGroup *mg) { - return _sub_tensor; + ARM_COMPUTE_UNUSED(mg); + // noop } void NESubTensorHandle::map(bool blocking) @@ -66,10 +68,31 @@ void NESubTensorHandle::release_if_unused() // noop } +const arm_compute::ITensor &NESubTensorHandle::tensor() const +{ + return _sub_tensor; +} + +arm_compute::ITensor &NESubTensorHandle::tensor() +{ + return _sub_tensor; +} + +ITensorHandle *NESubTensorHandle::parent_handle() +{ + ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr); + return _parent_handle->parent_handle(); +} + bool NESubTensorHandle::is_subtensor() const { return true; } + +Target NESubTensorHandle::target() const +{ + return Target::NEON; +} } // namespace backends } // namespace graph } // namespace arm_compute \ No newline at end of file diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp index 8508ac9511..5892116caf 100644 --- a/src/graph/backends/NEON/NETensorHandle.cpp +++ b/src/graph/backends/NEON/NETensorHandle.cpp @@ -23,6 +23,9 @@ */ #include "arm_compute/graph/backends/NEON/NETensorHandle.h" +#include "arm_compute/core/utils/misc/Cast.h" +#include "arm_compute/runtime/MemoryGroup.h" + namespace arm_compute { namespace graph @@ -40,14 +43,18 @@ void NETensorHandle::allocate() _tensor.allocator()->allocate(); } -const arm_compute::ITensor &NETensorHandle::tensor() const +void NETensorHandle::free() { - return _tensor; + _tensor.allocator()->free(); } -arm_compute::ITensor &NETensorHandle::tensor() +void NETensorHandle::manage(IMemoryGroup *mg) { - return _tensor; + if(mg != nullptr) + { + auto *ne_mg = arm_compute::utils::cast::polymorphic_downcast(mg); + ne_mg->manage(&_tensor); + } } void NETensorHandle::map(bool blocking) @@ -68,10 +75,30 @@ void NETensorHandle::release_if_unused() } } +const arm_compute::ITensor &NETensorHandle::tensor() const +{ + return _tensor; +} + +arm_compute::ITensor &NETensorHandle::tensor() +{ + return _tensor; +} + +ITensorHandle *NETensorHandle::parent_handle() +{ + return this; +} + bool NETensorHandle::is_subtensor() const { return false; } + +Target NETensorHandle::target() const +{ + return Target::NEON; +} } // namespace backends } // namespace graph } // namespace arm_compute \ No newline at end of file diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp new file mode 100644 index 0000000000..7fc5ca0576 --- /dev/null +++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h" + +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphContext.h" +#include "arm_compute/graph/GraphManager.h" +#include "arm_compute/graph/INode.h" +#include "arm_compute/graph/Tensor.h" +#include "arm_compute/graph/Types.h" +#include "arm_compute/graph/backends/BackendRegistry.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/utils/misc/Cast.h" + +#include +#include + +namespace arm_compute +{ +namespace graph +{ +namespace detail +{ +namespace +{ +using HandleCountPair = std::pair; +using HandleCounter = std::map; +using TargetHandleCounter = std::map; + +/** Holds managed IO tensor handles if a task */ +struct TaskHandles +{ + std::vector> input_handles = {}; /**< Input handles to a task */ + std::vector> output_handles = {}; /**< Output handles of a task */ +}; + +/** Returns memory group depending on handle backend type + * + * @param[in] ctx Graph context + * @param[in] handle Tensor handle + * + * @return Memory groupb + */ +IMemoryGroup *get_memory_group_from_handle(GraphContext &ctx, ITensorHandle *handle) +{ + ARM_COMPUTE_ERROR_ON(handle == nullptr); + return ctx.memory_management_ctx(handle->target())->cross_group.get(); +} + +/** Get handles of const tensors of graph + * + * @param[in] g Graph + * + * @return Handles of const tensors of graph + */ +std::set get_const_handles(const Graph &g) +{ + std::set const_node_types = { NodeType::Input, NodeType::Output, NodeType::Const }; + + std::set const_tensors; + + auto &nodes = g.nodes(); + for(auto &node : nodes) + { + // If its a const node: + if(node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types)) + { + // TODO (geopin01) : Create IO iterator wrappers + // Add all its inputs / outputs to the list of constant handles + for(unsigned int i = 0; i < node->num_inputs(); ++i) + { + if(node->input(i) != nullptr) + { + const_tensors.insert(node->input(i)->handle()->parent_handle()); + } + } + for(unsigned int i = 0; i < node->num_outputs(); ++i) + { + if(node->output(i) != nullptr) + { + const_tensors.insert(node->output(i)->handle()->parent_handle()); + } + } + } + } + + return const_tensors; +} + +/** Builds a list of all the transition handles (Handles that are used to link two nodes) + * + * @param[in] ctx Graph context + * @param[in] task Workload task + * @param[in] const_tensors Constant tensors + * + * @return List of transition handles + */ +TaskHandles get_transition_handles(GraphContext &ctx, + ExecutionTask &task, + const std::set &const_tensors) +{ + ARM_COMPUTE_ERROR_ON(task.node == nullptr || task.task == nullptr); + INode &node = *task.node; + + TaskHandles transition_handles; + + // Add input handles + for(unsigned int i = 0; i < node.input_edges().size(); ++i) + { + Edge *input_edge = node.input_edge(i); + // If this input is the output of another node + if(input_edge != nullptr && input_edge->tensor() != nullptr && const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors)) + { + // Then add it to the list of transition buffers + ITensorHandle *tensor_handle = input_edge->tensor()->handle()->parent_handle(); + IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); + transition_handles.input_handles.push_back(std::make_pair(tensor_handle, mm_group)); + } + } + + // Add output handles + for(unsigned int i = 0; i < node.num_outputs(); ++i) + { + Tensor *output_tensor = node.output(i); + // If this output is used as an input for another node + if(output_tensor != nullptr && const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors)) + { + ITensorHandle *tensor_handle = output_tensor->handle()->parent_handle(); + IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); + transition_handles.output_handles.push_back(std::make_pair(tensor_handle, mm_group)); + } + } + + return transition_handles; +} + +/** Counts handles refcount for each input handle of each target + * + * @param[in] task Execution task containing the managed handles + * @param[in,out] handle_counter Data structure that keeps the handles reference count + */ +void count_input_handles_per_target(const TaskHandles &task_handles, TargetHandleCounter &handle_counter) +{ + for(const auto &handle : task_handles.input_handles) + { + ITensorHandle *key = handle.first; + HandleCounter &target_counter = handle_counter[key->target()]; + if(target_counter.find(key) == std::end(target_counter)) + { + target_counter.emplace(std::make_pair(key, 1)); + } + else + { + ++target_counter[key]; + } + } +} + +/** Calculates the lifetime of each tensor handle + * + * @param[in, out] tasks_handles Tensor handles for each task + * @param[in] hc Data structure that keeps the handles reference count + */ +void configure_handle_lifetime(std::vector &tasks_handles, const HandleCounter &hc) +{ + // Identify max number of tensors in flight + HandleCounter tensors_in_flight; + + // Acquires the given handles and sets them as in flight if they aren't already + auto acquire = [&](std::vector> &handles) + { + for(auto &handle : handles) + { + ITensorHandle *parent_handle = handle.first; + ARM_COMPUTE_ERROR_ON(parent_handle == nullptr); + // If the tensor is not already in flight: + if(tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight)) + { + ARM_COMPUTE_ERROR_ON(hc.find(parent_handle) == std::end(hc)); + // Then add it to the list of in flight tensors + tensors_in_flight.insert(std::make_pair(parent_handle, hc.at(parent_handle))); + // Start of allocation's lifetime + parent_handle->manage(handle.second); + } + } + }; + + for(auto &task_handle : tasks_handles) + { + // Marking all the input and output tensors of the task as in flight + acquire(task_handle.input_handles); + acquire(task_handle.output_handles); + + // Releasing the input tensors + for(auto &input_handle : task_handle.input_handles) + { + ITensorHandle *ihandle = input_handle.first; + ARM_COMPUTE_ERROR_ON(ihandle == nullptr); + ARM_COMPUTE_ERROR_ON(tensors_in_flight.find(ihandle) == std::end(tensors_in_flight)); + --tensors_in_flight[ihandle]; + if(tensors_in_flight[ihandle] <= 0) + { + // Remove tensor for tensors in flight + tensors_in_flight.erase(ihandle); + // End of allocation's lifetime + ihandle->allocate(); + } + } + } +} +} // namespace + +void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload &workload) +{ + // Get const tensors (un-managed) + std::set const_tensors = get_const_handles(g); + + std::vector tasks_handles; + TargetHandleCounter target_handle_count; + + // Count handles + for(auto &task : workload.tasks) + { + // Populates IO handles + tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors)); + + // Count handles + count_input_handles_per_target(tasks_handles.back(), target_handle_count); + } + + // Setup memory managers + for(auto &hc : target_handle_count) + { + MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first); + if(mm_ctx != nullptr) + { + if(mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr) + { + // Manage and allocate tensors + configure_handle_lifetime(tasks_handles, hc.second); + } + } + } +} +} // namespace detail +} // namespace graph +} // namespace arm_compute diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp index c1304436f6..c370fdf916 100644 --- a/src/graph/detail/ExecutionHelpers.cpp +++ b/src/graph/detail/ExecutionHelpers.cpp @@ -143,7 +143,9 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx) { ExecutionWorkload workload; workload.graph = &g; - auto &nodes = g.nodes(); + workload.ctx = &ctx; + + auto &nodes = g.nodes(); // Create tasks for(auto &node : nodes) @@ -235,10 +237,31 @@ void prepare_all_tasks(ExecutionWorkload &workload) void call_all_tasks(ExecutionWorkload &workload) { + ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr); + + // Acquire memory for the transition buffers + for(auto &mm_ctx : workload.ctx->memory_managers()) + { + if(mm_ctx.second.cross_group != nullptr) + { + mm_ctx.second.cross_group->acquire(); + } + } + + // Execute tasks for(auto &task : workload.tasks) { task(); } + + // Release memory for the transition buffers + for(auto &mm_ctx : workload.ctx->memory_managers()) + { + if(mm_ctx.second.cross_group != nullptr) + { + mm_ctx.second.cross_group->release(); + } + } } void call_all_output_node_accessors(ExecutionWorkload &workload) diff --git a/src/runtime/BlobLifetimeManager.cpp b/src/runtime/BlobLifetimeManager.cpp index 3ca5071d91..2a4ab6ec0d 100644 --- a/src/runtime/BlobLifetimeManager.cpp +++ b/src/runtime/BlobLifetimeManager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,15 +57,15 @@ void BlobLifetimeManager::update_blobs_and_mappings() ARM_COMPUTE_ERROR_ON(!are_all_finalized()); ARM_COMPUTE_ERROR_ON(_active_group == nullptr); - // Sort active group requirements in descending order. - std::sort(std::begin(_active_elements), std::end(_active_elements), [](const Element & a, const Element & b) + // Sort free blobs requirements in descending order. + _free_blobs.sort([](const Blob & ba, const Blob & bb) { - return a.size > b.size; + return ba.max_size > bb.max_size; }); std::vector group_sizes; - std::transform(std::begin(_active_elements), std::end(_active_elements), std::back_inserter(group_sizes), [](const Element & e) + std::transform(std::begin(_free_blobs), std::end(_free_blobs), std::back_inserter(group_sizes), [](const Blob & b) { - return e.size; + return b.max_size; }); // Update blob sizes @@ -80,8 +80,14 @@ void BlobLifetimeManager::update_blobs_and_mappings() // Calculate group mappings auto &group_mappings = _active_group->mappings(); int blob_idx = 0; - for(auto &e : _active_elements) + for(auto &free_blob : _free_blobs) { - group_mappings[e.handle] = blob_idx++; + for(auto &bound_element_id : free_blob.bound_elements) + { + ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements)); + Element &bound_element = _active_elements[bound_element_id]; + group_mappings[bound_element.handle] = blob_idx; + } + ++blob_idx; } } diff --git a/src/runtime/ISimpleLifetimeManager.cpp b/src/runtime/ISimpleLifetimeManager.cpp index 2c64475b39..faaff8a63e 100644 --- a/src/runtime/ISimpleLifetimeManager.cpp +++ b/src/runtime/ISimpleLifetimeManager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ using namespace arm_compute; ISimpleLifetimeManager::ISimpleLifetimeManager() - : _active_group(nullptr), _active_elements(), _finalized_groups() + : _active_group(nullptr), _active_elements(), _free_blobs(), _occupied_blobs(), _finalized_groups() { } @@ -53,14 +53,21 @@ void ISimpleLifetimeManager::register_group(IMemoryGroup *group) void ISimpleLifetimeManager::start_lifetime(void *obj) { ARM_COMPUTE_ERROR_ON(obj == nullptr); - ARM_COMPUTE_ERROR_ON_MSG(std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e) + ARM_COMPUTE_ERROR_ON_MSG(_active_elements.find(obj) != std::end(_active_elements), "Memory object is already registered!"); + + // Check if there is a free blob + if(_free_blobs.empty()) + { + _occupied_blobs.emplace_front(Blob{ obj, 0, { obj } }); + } + else { - return obj == e.id; - }) != std::end(_active_elements), - "Memory object is already registered!"); + _occupied_blobs.splice(std::begin(_occupied_blobs), _free_blobs, std::begin(_free_blobs)); + _occupied_blobs.front().id = obj; + } // Insert object in groups and mark its finalized state to false - _active_elements.emplace_back(obj); + _active_elements.insert(std::make_pair(obj, obj)); } void ISimpleLifetimeManager::end_lifetime(void *obj, void **handle, size_t size) @@ -68,36 +75,50 @@ void ISimpleLifetimeManager::end_lifetime(void *obj, void **handle, size_t size) ARM_COMPUTE_ERROR_ON(obj == nullptr); // Find object - auto it = std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e) + auto active_object_it = _active_elements.find(obj); + ARM_COMPUTE_ERROR_ON(active_object_it == std::end(_active_elements)); + + // Update object fields and mark object as complete + Element &el = active_object_it->second; + el.handle = handle; + el.size = size; + el.status = true; + + // Find object in the occupied lists + auto occupied_blob_it = std::find_if(std::begin(_occupied_blobs), std::end(_occupied_blobs), [&obj](const Blob & b) { - return obj == e.id; + return obj == b.id; }); - ARM_COMPUTE_ERROR_ON(it == std::end(_active_elements)); + ARM_COMPUTE_ERROR_ON(occupied_blob_it == std::end(_occupied_blobs)); - // Update object fields and mark object as complete - it->handle = handle; - it->size = size; - it->status = true; + // Update occupied blob and return as free + occupied_blob_it->bound_elements.insert(obj); + occupied_blob_it->max_size = std::max(occupied_blob_it->max_size, size); + occupied_blob_it->id = nullptr; + _free_blobs.splice(std::begin(_free_blobs), _occupied_blobs, occupied_blob_it); // Check if all object are finalized and reset active group if(are_all_finalized()) { - // Update finalized groups - _finalized_groups[_active_group].insert(std::end(_finalized_groups[_active_group]), std::begin(_active_elements), std::end(_active_elements)); + ARM_COMPUTE_ERROR_ON(!_occupied_blobs.empty()); // Update blobs and group mappings update_blobs_and_mappings(); + // Update finalized groups + _finalized_groups[_active_group] = std::move(_active_elements); + // Reset state _active_elements.clear(); _active_group = nullptr; + _free_blobs.clear(); } } bool ISimpleLifetimeManager::are_all_finalized() const { - return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const Element e) + return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const std::pair &e) { - return !e.status; + return !e.second.status; }); } diff --git a/src/runtime/OffsetLifetimeManager.cpp b/src/runtime/OffsetLifetimeManager.cpp index 4540aeab28..d0b3bde724 100644 --- a/src/runtime/OffsetLifetimeManager.cpp +++ b/src/runtime/OffsetLifetimeManager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,19 +58,24 @@ void OffsetLifetimeManager::update_blobs_and_mappings() ARM_COMPUTE_ERROR_ON(_active_group == nullptr); // Update blob size - size_t max_group_size = std::accumulate(std::begin(_active_elements), std::end(_active_elements), static_cast(0), [](size_t s, const Element & e) + size_t max_group_size = std::accumulate(std::begin(_free_blobs), std::end(_free_blobs), static_cast(0), [](size_t s, const Blob & b) { - return s + e.size; + return s + b.max_size; }); _blob = std::max(_blob, max_group_size); // Calculate group mappings auto &group_mappings = _active_group->mappings(); size_t offset = 0; - for(auto &e : _active_elements) + for(auto &free_blob : _free_blobs) { - group_mappings[e.handle] = offset; - offset += e.size; + for(auto &bound_element_id : free_blob.bound_elements) + { + ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements)); + Element &bound_element = _active_elements[bound_element_id]; + group_mappings[bound_element.handle] = offset; + } + offset += free_blob.max_size; ARM_COMPUTE_ERROR_ON(offset > _blob); } } -- cgit v1.2.1