diff options
Diffstat (limited to 'src/graph/detail')
-rw-r--r-- | src/graph/detail/CrossLayerMemoryManagerHelpers.cpp | 59 | ||||
-rw-r--r-- | src/graph/detail/ExecutionHelpers.cpp | 91 |
2 files changed, 78 insertions, 72 deletions
diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp index b45f453f23..1e813dc678 100644 --- a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp +++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp @@ -23,6 +23,8 @@ */ #include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/GraphManager.h" @@ -30,9 +32,7 @@ #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" #include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" -#include "arm_compute/core/ITensor.h" #include "support/Cast.h" #include <algorithm> @@ -78,28 +78,28 @@ IMemoryGroup *get_memory_group_from_handle(GraphContext &ctx, ITensorHandle *han */ std::set<ITensorHandle *> get_const_handles(const Graph &g) { - std::set<NodeType> const_node_types = { NodeType::Input, NodeType::Output, NodeType::Const }; + std::set<NodeType> const_node_types = {NodeType::Input, NodeType::Output, NodeType::Const}; std::set<ITensorHandle *> const_tensors; auto &nodes = g.nodes(); - for(auto &node : nodes) + for (auto &node : nodes) { // If its a const node: - if(node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types)) + if (node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types)) { // TODO (geopin01) : Create IO iterator wrappers // Add all its inputs / outputs to the list of constant handles - for(unsigned int i = 0; i < node->num_inputs(); ++i) + for (unsigned int i = 0; i < node->num_inputs(); ++i) { - if(node->input(i) != nullptr) + if (node->input(i) != nullptr) { const_tensors.insert(node->input(i)->handle()->parent_handle()); } } - for(unsigned int i = 0; i < node->num_outputs(); ++i) + for (unsigned int i = 0; i < node->num_outputs(); ++i) { - if(node->output(i) != nullptr) + if (node->output(i) != nullptr) { const_tensors.insert(node->output(i)->handle()->parent_handle()); } @@ -118,9 +118,8 @@ std::set<ITensorHandle *> get_const_handles(const Graph &g) * * @return List of transition handles */ -TaskHandles get_transition_handles(GraphContext &ctx, - ExecutionTask &task, - const std::set<ITensorHandle *> &const_tensors) +TaskHandles +get_transition_handles(GraphContext &ctx, ExecutionTask &task, const std::set<ITensorHandle *> &const_tensors) { ARM_COMPUTE_ERROR_ON(task.node == nullptr || (task.task == nullptr && !is_utility_node(task.node))); INode &node = *task.node; @@ -128,28 +127,30 @@ TaskHandles get_transition_handles(GraphContext &ctx, TaskHandles transition_handles; // Add input handles - for(unsigned int i = 0; i < node.input_edges().size(); ++i) + for (unsigned int i = 0; i < node.input_edges().size(); ++i) { Edge *input_edge = node.input_edge(i); // If this input is the output of another node - if(input_edge != nullptr && input_edge->tensor() != nullptr && const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors)) + if (input_edge != nullptr && input_edge->tensor() != nullptr && + const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors)) { // Then add it to the list of transition buffers ITensorHandle *tensor_handle = input_edge->tensor()->handle()->parent_handle(); - IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); + IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); transition_handles.input_handles.emplace_back(std::make_pair(tensor_handle, mm_group)); } } // Add output handles - for(unsigned int i = 0; i < node.num_outputs(); ++i) + for (unsigned int i = 0; i < node.num_outputs(); ++i) { Tensor *output_tensor = node.output(i); // If this output is used as an input for another node - if(output_tensor != nullptr && const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors)) + if (output_tensor != nullptr && + const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors)) { ITensorHandle *tensor_handle = output_tensor->handle()->parent_handle(); - IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); + IMemoryGroup *mm_group = get_memory_group_from_handle(ctx, tensor_handle); transition_handles.output_handles.emplace_back(std::make_pair(tensor_handle, mm_group)); } } @@ -164,11 +165,11 @@ TaskHandles get_transition_handles(GraphContext &ctx, */ void count_input_handles_per_target(const TaskHandles &task_handles, TargetHandleCounter &handle_counter) { - for(const auto &handle : task_handles.input_handles) + for (const auto &handle : task_handles.input_handles) { ITensorHandle *key = handle.first; HandleCounter &target_counter = handle_counter[key->target()]; - if(target_counter.find(key) == std::end(target_counter)) + if (target_counter.find(key) == std::end(target_counter)) { target_counter.emplace(std::make_pair(key, 1)); } @@ -192,12 +193,12 @@ void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const Ha // Acquires the given handles and sets them as in flight if they aren't already auto acquire = [&](std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> &handles) { - for(auto &handle : handles) + for (auto &handle : handles) { ITensorHandle *parent_handle = handle.first; ARM_COMPUTE_ERROR_ON(parent_handle == nullptr); // If the tensor is not already in flight: - if(tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight)) + if (tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight)) { ARM_COMPUTE_ERROR_ON(hc.find(parent_handle) == std::end(hc)); // Then add it to the list of in flight tensors @@ -208,20 +209,20 @@ void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const Ha } }; - for(auto &task_handle : tasks_handles) + for (auto &task_handle : tasks_handles) { // Marking all the input and output tensors of the task as in flight acquire(task_handle.input_handles); acquire(task_handle.output_handles); // Releasing the input tensors - for(auto &input_handle : task_handle.input_handles) + for (auto &input_handle : task_handle.input_handles) { ITensorHandle *ihandle = input_handle.first; ARM_COMPUTE_ERROR_ON(ihandle == nullptr); ARM_COMPUTE_ERROR_ON(tensors_in_flight.find(ihandle) == std::end(tensors_in_flight)); --tensors_in_flight[ihandle]; - if(tensors_in_flight[ihandle] <= 0) + if (tensors_in_flight[ihandle] <= 0) { // Remove tensor for tensors in flight tensors_in_flight.erase(ihandle); @@ -242,7 +243,7 @@ void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload TargetHandleCounter target_handle_count; // Count handles - for(auto &task : workload.tasks) + for (auto &task : workload.tasks) { // Populates IO handles tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors)); @@ -252,12 +253,12 @@ void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload } // Setup memory managers - for(auto &hc : target_handle_count) + for (auto &hc : target_handle_count) { MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first); - if(mm_ctx != nullptr) + if (mm_ctx != nullptr) { - if(mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr) + if (mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr) { // Manage and allocate tensors configure_handle_lifetime(tasks_handles, hc.second); diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp index 5be3706cfe..870d24a6c7 100644 --- a/src/graph/detail/ExecutionHelpers.cpp +++ b/src/graph/detail/ExecutionHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,12 @@ */ #include "arm_compute/graph/detail/ExecutionHelpers.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/GraphManager.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" namespace arm_compute { @@ -41,9 +41,9 @@ void validate_all_nodes(Graph &g) auto &nodes = g.nodes(); // Create tasks - for(auto &node : nodes) + for (auto &node : nodes) { - if(node != nullptr) + if (node != nullptr) { Target assigned_target = node->assigned_target(); backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(assigned_target); @@ -57,9 +57,9 @@ void configure_all_tensors(Graph &g) { auto &tensors = g.tensors(); - for(auto &tensor : tensors) + for (auto &tensor : tensors) { - if(tensor && tensor->handle() == nullptr) + if (tensor && tensor->handle() == nullptr) { Target target = tensor->desc().target; backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(target); @@ -72,10 +72,10 @@ void configure_all_tensors(Graph &g) void allocate_all_input_tensors(INode &node) { - for(unsigned int i = 0; i < node.num_inputs(); ++i) + for (unsigned int i = 0; i < node.num_inputs(); ++i) { Tensor *tensor = node.input(i); - if(tensor != nullptr && !tensor->bound_edges().empty()) + if (tensor != nullptr && !tensor->bound_edges().empty()) { ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!"); tensor->handle()->allocate(); @@ -85,10 +85,10 @@ void allocate_all_input_tensors(INode &node) void allocate_all_output_tensors(INode &node) { - for(unsigned int i = 0; i < node.num_outputs(); ++i) + for (unsigned int i = 0; i < node.num_outputs(); ++i) { Tensor *tensor = node.output(i); - if(tensor != nullptr && !tensor->bound_edges().empty()) + if (tensor != nullptr && !tensor->bound_edges().empty()) { ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!"); tensor->handle()->allocate(); @@ -98,11 +98,11 @@ void allocate_all_output_tensors(INode &node) void allocate_const_tensors(Graph &g) { - for(auto &node : g.nodes()) + for (auto &node : g.nodes()) { - if(node != nullptr) + if (node != nullptr) { - switch(node->type()) + switch (node->type()) { case NodeType::Const: case NodeType::Input: @@ -121,9 +121,10 @@ void allocate_all_tensors(Graph &g) { auto &tensors = g.tensors(); - for(auto &tensor : tensors) + for (auto &tensor : tensors) { - if(tensor && !tensor->bound_edges().empty() && tensor->handle() != nullptr && tensor->handle()->tensor().info()->is_resizable() && tensor->handle()->tensor().is_used()) + if (tensor && !tensor->bound_edges().empty() && tensor->handle() != nullptr && + tensor->handle()->tensor().info()->is_resizable() && tensor->handle()->tensor().is_used()) { tensor->handle()->allocate(); } @@ -140,15 +141,15 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve workload.tasks.reserve(node_order.size()); // Create tasks - for(auto &node_id : node_order) + for (auto &node_id : node_order) { auto node = g.node(node_id); - if(node != nullptr) + if (node != nullptr) { Target assigned_target = node->assigned_target(); - backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(assigned_target); + backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(assigned_target); std::unique_ptr<IFunction> func = backend.configure_node(*node, ctx); - if(func != nullptr || is_utility_node(node)) + if (func != nullptr || is_utility_node(node)) { workload.tasks.emplace_back(ExecutionTask(std::move(func), node)); } @@ -156,14 +157,14 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve } // Add inputs and outputs - for(auto &node : g.nodes()) + for (auto &node : g.nodes()) { - if(node != nullptr && node->type() == NodeType::Input) + if (node != nullptr && node->type() == NodeType::Input) { workload.inputs.push_back(node->output(0)); } - if(node != nullptr && node->type() == NodeType::Output) + if (node != nullptr && node->type() == NodeType::Output) { workload.outputs.push_back(node->input(0)); continue; @@ -175,9 +176,9 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve void release_unused_tensors(Graph &g) { - for(auto &tensor : g.tensors()) + for (auto &tensor : g.tensors()) { - if(tensor != nullptr && tensor->handle() != nullptr) + if (tensor != nullptr && tensor->handle() != nullptr) { tensor->handle()->release_if_unused(); } @@ -194,11 +195,11 @@ void call_all_const_node_accessors(Graph &g) { auto &nodes = g.nodes(); - for(auto &node : nodes) + for (auto &node : nodes) { - if(node != nullptr && node->type() == NodeType::Const && node->num_outputs()) + if (node != nullptr && node->type() == NodeType::Const && node->num_outputs()) { - if(!node->output(0)->bound_edges().empty()) + if (!node->output(0)->bound_edges().empty()) { call_tensor_accessor(node->output(0)); } @@ -209,18 +210,19 @@ void call_all_const_node_accessors(Graph &g) bool call_all_input_node_accessors(ExecutionWorkload &workload) { bool is_valid = true; - std::for_each(std::begin(workload.inputs), std::end(workload.inputs), [&](Tensor * input_tensor) - { - bool valid_input = (input_tensor != nullptr) && input_tensor->call_accessor(); - is_valid = is_valid && valid_input; - }); + std::for_each(std::begin(workload.inputs), std::end(workload.inputs), + [&](Tensor *input_tensor) + { + bool valid_input = (input_tensor != nullptr) && input_tensor->call_accessor(); + is_valid = is_valid && valid_input; + }); return is_valid; } void prepare_all_tasks(ExecutionWorkload &workload) { ARM_COMPUTE_ERROR_ON(workload.graph == nullptr); - for(auto &task : workload.tasks) + for (auto &task : workload.tasks) { task.prepare(); release_unused_tensors(*workload.graph); @@ -232,24 +234,24 @@ void call_all_tasks(ExecutionWorkload &workload) ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr); // Acquire memory for the transition buffers - for(auto &mm_ctx : workload.ctx->memory_managers()) + for (auto &mm_ctx : workload.ctx->memory_managers()) { - if(mm_ctx.second.cross_group != nullptr) + if (mm_ctx.second.cross_group != nullptr) { mm_ctx.second.cross_group->acquire(); } } // Execute tasks - for(auto &task : workload.tasks) + for (auto &task : workload.tasks) { task(); } // Release memory for the transition buffers - for(auto &mm_ctx : workload.ctx->memory_managers()) + for (auto &mm_ctx : workload.ctx->memory_managers()) { - if(mm_ctx.second.cross_group != nullptr) + if (mm_ctx.second.cross_group != nullptr) { mm_ctx.second.cross_group->release(); } @@ -259,11 +261,14 @@ void call_all_tasks(ExecutionWorkload &workload) bool call_all_output_node_accessors(ExecutionWorkload &workload) { bool is_valid = true; - std::for_each(std::begin(workload.outputs), std::end(workload.outputs), [&](Tensor * output_tensor) - { - bool valid_output = (output_tensor != nullptr) && output_tensor->call_accessor(); - is_valid = is_valid && valid_output; - }); + std::for_each(std::begin(workload.outputs), std::end(workload.outputs), + [&](Tensor *output_tensor) + { + bool valid_output = (output_tensor != nullptr) && output_tensor->call_accessor(); + is_valid = is_valid && valid_output; + }); + + sync_backends(); return is_valid; } |