diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /src/graph/mutators | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/graph/mutators')
-rw-r--r-- | src/graph/mutators/DepthConcatSubTensorMutator.cpp | 39 | ||||
-rw-r--r-- | src/graph/mutators/GroupedConvolutionMutator.cpp | 77 | ||||
-rw-r--r-- | src/graph/mutators/InPlaceOperationMutator.cpp | 105 | ||||
-rw-r--r-- | src/graph/mutators/MutatorUtils.cpp | 8 | ||||
-rw-r--r-- | src/graph/mutators/NodeExecutionMethodMutator.cpp | 42 | ||||
-rw-r--r-- | src/graph/mutators/NodeFusionMutator.cpp | 169 | ||||
-rw-r--r-- | src/graph/mutators/SplitLayerSubTensorMutator.cpp | 33 | ||||
-rw-r--r-- | src/graph/mutators/SyntheticDataTypeMutator.cpp | 72 |
8 files changed, 291 insertions, 254 deletions
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp index 963b948432..1b7ee3c4a4 100644 --- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp +++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp @@ -23,12 +23,12 @@ */ #include "arm_compute/graph/mutators/DepthConcatSubTensorMutator.h" -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/Logger.h" #include "arm_compute/graph/nodes/ConcatenateLayerNode.h" +#include "arm_compute/graph/Utils.h" #include "support/Cast.h" #include "support/Iterable.h" @@ -50,7 +50,7 @@ IGraphMutator::MutationType DepthConcatSubTensorMutator::type() const void DepthConcatSubTensorMutator::mutate(Graph &g) { // Early exit if no Concatenation layers exist in graph - if(g.nodes(NodeType::ConcatenateLayer).empty()) + if (g.nodes(NodeType::ConcatenateLayer).empty()) { return; } @@ -59,43 +59,48 @@ void DepthConcatSubTensorMutator::mutate(Graph &g) std::vector<NodeID> topological_sorted_node_ids = dfs(g); // Should be in reverse order of execution - for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) + for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { INode *node = g.node(node_id); - if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) + if (node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) { // Get output tensor auto output_tensor = node->output(0); // Check concatenation axis (Sub-tensor optimization is supported for concatenation axis >=2) auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node); - if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2) + if (output_tensor == nullptr || + get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2) { continue; } // Check that all tensor have the same target, valid inputs and same quantization info - bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(), - [&](const EdgeID & eid) - { - return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) - && (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info); - }); + bool is_valid = + std::all_of(node->input_edges().cbegin(), node->input_edges().cend(), + [&](const EdgeID &eid) + { + return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && + (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) && + (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info); + }); // Create subtensors - if(is_valid && is_target_supported(output_tensor->desc().target)) + if (is_valid && is_target_supported(output_tensor->desc().target)) { ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : " << node->id() << " and name : " << node->name() << std::endl); // Create sub-tensor handles unsigned depth = 0; - for(unsigned int i = 0; i < node->input_edges().size(); ++i) + for (unsigned int i = 0; i < node->input_edges().size(); ++i) { auto input_tensor = node->input(i); const auto input_shape = input_tensor->desc().shape; - backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target); - std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false); + backends::IDeviceBackend &backend = + backends::BackendRegistry::get().get_backend(input_tensor->desc().target); + std::unique_ptr<ITensorHandle> handle = + backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false); input_tensor->set_handle(std::move(handle)); depth += input_shape.z(); diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp index b7c551ce8b..31efba6bb1 100644 --- a/src/graph/mutators/GroupedConvolutionMutator.cpp +++ b/src/graph/mutators/GroupedConvolutionMutator.cpp @@ -23,15 +23,14 @@ */ #include "arm_compute/graph/mutators/GroupedConvolutionMutator.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" #include "support/Cast.h" - #include "support/StringSupport.h" #include <set> @@ -42,43 +41,51 @@ namespace graph { namespace { -NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias, - PadStrideInfo conv_info, ConvolutionMethod method, ActivationLayerInfo fused_act, FastMathHint fast_math_hint, unsigned int num_groups) +NodeID create_grouped_convolution(Graph &g, + const NodeParams ¶ms, + NodeIdxPair input, + NodeID weights, + NodeID bias, + PadStrideInfo conv_info, + ConvolutionMethod method, + ActivationLayerInfo fused_act, + FastMathHint fast_math_hint, + unsigned int num_groups) { bool has_bias = (bias != EmptyNodeID); // Split input const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); - const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL); - NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); + const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL); + NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); // Split weights const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]); - const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES); - NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx); + const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES); + NodeID weights_split = GraphBuilder::add_split_node(g, params, {weights, 0}, num_groups, batch_idx); // Split bias NodeID bias_split = EmptyNodeID; - if(has_bias) + if (has_bias) { // Split bias - bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0); + bias_split = GraphBuilder::add_split_node(g, params, {bias, 0}, num_groups, 0); } std::vector<NodeIdxPair> convolution_outputs; - for(unsigned int i = 0; i < num_groups; ++i) + for (unsigned int i = 0; i < num_groups; ++i) { NodeParams group_params = params; NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint); g.add_connection(input_split, i, conv_nid, 0); g.add_connection(weights_split, i, conv_nid, 1); - if(has_bias) + if (has_bias) { g.add_connection(bias_split, i, conv_nid, 2); } // Add group name - if(!group_params.name.empty()) + if (!group_params.name.empty()) { group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i)); } @@ -92,7 +99,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPai auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node); conv_node->set_fused_activation(fused_act); - convolution_outputs.push_back({ conv_nid, 0 }); + convolution_outputs.push_back({conv_nid, 0}); } // Depth concatenate output @@ -113,7 +120,7 @@ IGraphMutator::MutationType GroupedConvolutionMutator::type() const void GroupedConvolutionMutator::mutate(Graph &g) { // Early exit if no Convolution layers exist in graph - if(g.nodes(NodeType::ConvolutionLayer).empty()) + if (g.nodes(NodeType::ConvolutionLayer).empty()) { return; } @@ -122,17 +129,18 @@ void GroupedConvolutionMutator::mutate(Graph &g) size_t total_nodes = g.nodes().size(); // Iterate over convolution nodes - for(unsigned int i = 0; i < total_nodes; ++i) + for (unsigned int i = 0; i < total_nodes; ++i) { INode *node = g.node(i); - if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1) + if (node != nullptr && node->type() == NodeType::ConvolutionLayer && + arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1) { // Validate node backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target()); Status status = backend.validate_node(*node); // If grouped convolution is not supported - if(!bool(status)) + if (!bool(status)) { // Down-cast node auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node); @@ -151,7 +159,8 @@ void GroupedConvolutionMutator::mutate(Graph &g) ARM_COMPUTE_ERROR_ON(conv_node->input_edge(0) == nullptr || conv_node->input_edge(1) == nullptr); const NodeID input_id = conv_node->input_edge(0)->producer()->id(); const NodeID weights_id = conv_node->input_edge(1)->producer()->id(); - const NodeID bias_id = (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID; + const NodeID bias_id = + (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID; // Get driving nodes std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node); @@ -164,14 +173,15 @@ void GroupedConvolutionMutator::mutate(Graph &g) NodeID latest_nid = g.nodes().size(); // Create grouped convolution node - NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id, - conv_info, conv_method, fused_act_info, fast_math_hint, num_groups); + NodeID grouped_conv_id = + create_grouped_convolution(g, params, {input_id, 0}, weights_id, bias_id, conv_info, conv_method, + fused_act_info, fast_math_hint, num_groups); // Remove convolution node g.remove_node(node->id()); // Update batch normalization node outputs - for(auto &driving_node : driving_nodes) + for (auto &driving_node : driving_nodes) { g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index); } @@ -180,17 +190,16 @@ void GroupedConvolutionMutator::mutate(Graph &g) g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor)); // Configure new tensors and nodes - std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t) - { - configure_tensor(t.get()); - }); - std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n) - { - if(n != nullptr) - { - n->set_assigned_target(assigned_target); - } - }); + std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), + [](std::unique_ptr<Tensor> &t) { configure_tensor(t.get()); }); + std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), + [&assigned_target](std::unique_ptr<INode> &n) + { + if (n != nullptr) + { + n->set_assigned_target(assigned_target); + } + }); } } } diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp index d3ea940895..a51dcc4f42 100644 --- a/src/graph/mutators/InPlaceOperationMutator.cpp +++ b/src/graph/mutators/InPlaceOperationMutator.cpp @@ -29,6 +29,7 @@ #include "arm_compute/graph/Logger.h" #include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h" #include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h" + #include "support/Cast.h" using namespace arm_compute::utils::cast; @@ -48,7 +49,7 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge) const auto input_tensor = input_edge->tensor(); const auto input_edge_id = input_edge->id(); - if(parent_node == nullptr) + if (parent_node == nullptr) { return false; } @@ -57,24 +58,23 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge) // If the output is connected to only one edge, then computations can // be done in-place. - if(output_edges.size() == 1) + if (output_edges.size() == 1) { return true; } - return std::all_of(output_edges.begin(), - output_edges.end(), - [&](const EdgeID & edge_id) - { - // Skip check on current input edge - if(edge_id == input_edge_id) - { - return true; - } - - auto edge = g.edge(edge_id); - return edge->tensor() != input_tensor; - }); + return std::all_of(output_edges.begin(), output_edges.end(), + [&](const EdgeID &edge_id) + { + // Skip check on current input edge + if (edge_id == input_edge_id) + { + return true; + } + + auto edge = g.edge(edge_id); + return edge->tensor() != input_tensor; + }); } // If do in-place calculation, then need to use the new output and inherit original output's accessor @@ -109,12 +109,14 @@ void try_in_place_depthwiseconv(std::unique_ptr<INode> &node) // Extract PadStrideInfo and depth multiplier PadStrideInfo conv_info{}; unsigned int depth_multiplier{}; - if(node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer) + if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer) { - conv_info = polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info(); - depth_multiplier = polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier(); + conv_info = + polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info(); + depth_multiplier = + polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier(); } - else if(node->type() == NodeType::DepthwiseConvolutionLayer) + else if (node->type() == NodeType::DepthwiseConvolutionLayer) { conv_info = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info(); depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier(); @@ -126,7 +128,8 @@ void try_in_place_depthwiseconv(std::unique_ptr<INode> &node) const auto out_shape = current_output_tensor->desc().shape; const auto qinfo_out = current_output_tensor->desc().quant_info; - bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) && (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr); + bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) && + (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr); // Specify conditions with which input can be in-placed input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC; @@ -141,13 +144,14 @@ void try_in_place_depthwiseconv(std::unique_ptr<INode> &node) input_can_in_place &= !conv_info.has_padding(); // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node - if(input_can_in_place) + if (input_can_in_place) { set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor " + "or the quantization info are different.\n"); } } @@ -170,7 +174,7 @@ void try_in_place_elementwise(std::unique_ptr<INode> &node) const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); // Inputs are not broadcast compatible - if(out_shape.total_size() == 0) + if (out_shape.total_size() == 0) { return; } @@ -181,22 +185,27 @@ void try_in_place_elementwise(std::unique_ptr<INode> &node) const auto qinfo_out = current_output_tensor->desc().quant_info; // Can do in place, if the input has same shape as output, has same quntisation info as output, has same data type as output and input doesn't have accessor. - bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) - && (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) && (input0_tensor->accessor() == nullptr); - bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) - && (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) && (input1_tensor->accessor() == nullptr); - - if(input0_can_in_place) + bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && + (qinfo0 == qinfo_out) && + (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) && + (input0_tensor->accessor() == nullptr); + bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && + (qinfo1 == qinfo_out) && + (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) && + (input1_tensor->accessor() == nullptr); + + if (input0_can_in_place) { set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor); } - else if(input1_can_in_place) + else if (input1_can_in_place) { set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor " + "or the quantization info are different.\n"); } } } // namespace @@ -213,33 +222,31 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const void InPlaceOperationMutator::mutate(Graph &g) { - std::set<NodeType> in_place_nodes = - { - NodeType::ActivationLayer, - NodeType::BatchNormalizationLayer, - NodeType::EltwiseLayer, - NodeType::UnaryEltwiseLayer, - NodeType::DepthwiseConvolutionLayer, - NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer, - NodeType::PrintLayer - }; + std::set<NodeType> in_place_nodes = {NodeType::ActivationLayer, + NodeType::BatchNormalizationLayer, + NodeType::EltwiseLayer, + NodeType::UnaryEltwiseLayer, + NodeType::DepthwiseConvolutionLayer, + NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer, + NodeType::PrintLayer}; // Not interested in the order of nodes - for(auto &node : g.nodes()) + for (auto &node : g.nodes()) { - if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes)) + if (node && in_place_nodes.find(node->type()) != std::end(in_place_nodes)) { // Get input edge Edge *input_edge = node->input_edge(0); // Check if parent has a single output if yes then force in place calculation else not - if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge)) + if ((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge)) { - if(node->type() == NodeType::EltwiseLayer) + if (node->type() == NodeType::EltwiseLayer) { try_in_place_elementwise(node); } - else if(node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer || node->type() == NodeType::DepthwiseConvolutionLayer) + else if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer || + node->type() == NodeType::DepthwiseConvolutionLayer) { try_in_place_depthwiseconv(node); } @@ -252,9 +259,11 @@ void InPlaceOperationMutator::mutate(Graph &g) ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr); // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different - if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) + if (new_output_tensor->accessor() != nullptr || + current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to " + "the input tensor or the quantization info are different.\n"); } else { diff --git a/src/graph/mutators/MutatorUtils.cpp b/src/graph/mutators/MutatorUtils.cpp index c8f38f34e7..f47240eadd 100644 --- a/src/graph/mutators/MutatorUtils.cpp +++ b/src/graph/mutators/MutatorUtils.cpp @@ -29,14 +29,14 @@ namespace graph { bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list) { - if(layout == DataLayout::NCHW || layout == DataLayout::NHWC) + if (layout == DataLayout::NCHW || layout == DataLayout::NHWC) { const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT); const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH); - for(unsigned int i = 0; i < padding_list.size(); ++i) + for (unsigned int i = 0; i < padding_list.size(); ++i) { - if(i != height_index && i != width_index && padding_list[i] != PaddingInfo(0, 0)) + if (i != height_index && i != width_index && padding_list[i] != PaddingInfo(0, 0)) { // if the index is not either height or width, don't fuse return false; @@ -49,4 +49,4 @@ bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList & return false; } } // namespace graph -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp index 09a3cf50c0..588befecae 100644 --- a/src/graph/mutators/NodeExecutionMethodMutator.cpp +++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp @@ -23,11 +23,11 @@ */ #include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" #include "support/Cast.h" @@ -49,17 +49,17 @@ template <typename Setter> void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter) { const std::vector<NodeID> &node_ids = g.nodes(node_type); - for(auto &node_id : node_ids) + for (auto &node_id : node_ids) { INode *node = g.node(node_id); - if(node != nullptr) + if (node != nullptr) { // Validate node backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target()); Status status = backend.validate_node(*node); // Set default execution method in case of failure - if(!bool(status)) + if (!bool(status)) { setter(node); } @@ -81,22 +81,26 @@ IGraphMutator::MutationType NodeExecutionMethodMutator::type() const void NodeExecutionMethodMutator::mutate(Graph &g) { // Convolution Layer - set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n) - { - ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " - << n->id() << " and Name: " << n->name() << std::endl); - auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n); - casted_node->set_convolution_method(ConvolutionMethod::Default); - }); + set_default_on_invalid_method(g, NodeType::ConvolutionLayer, + [](INode *n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = + arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n); + casted_node->set_convolution_method(ConvolutionMethod::Default); + }); // Depthwise Convolution Layer - set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n) - { - ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : " - << n->id() << " and Name: " << n->name() << std::endl); - auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n); - casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); - }); + set_default_on_invalid_method( + g, NodeType::DepthwiseConvolutionLayer, + [](INode *n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n); + casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); + }); } } // namespace graph } // namespace arm_compute diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index 38284b93cf..998a4a05c7 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -24,15 +24,14 @@ #include "arm_compute/graph/mutators/NodeFusionMutator.h" #include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" #include "src/graph/mutators/MutatorUtils.h" - #include "support/Cast.h" #include <list> @@ -46,7 +45,7 @@ namespace detail { void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode *old_node, bool add_output_tensor) { - if(new_node == nullptr || old_node == nullptr) + if (new_node == nullptr || old_node == nullptr) { return; } @@ -55,7 +54,7 @@ void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode std::vector<NodeIdxPair> last_driving_nodes = get_driving_nodes(*old_node); // Extract last fusable node accessor if any - if(old_node->output(0) == nullptr) + if (old_node->output(0) == nullptr) { return; } @@ -65,10 +64,10 @@ void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode g.remove_node(old_node->id()); // Update fused node outputs - for(auto &driving_node : last_driving_nodes) + for (auto &driving_node : last_driving_nodes) { g.add_connection(new_node->id(), 0, driving_node.node_id, driving_node.index); - if(add_output_tensor) + if (add_output_tensor) { configure_tensor(new_node->output(0)); } @@ -83,19 +82,21 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge ARM_COMPUTE_ERROR_ON(output_edge == nullptr); auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer()); - auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); + auto *bn_node = + arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); // Not fusing if number of groups is greater than 1 - if(conv_node->num_groups() > 1) + if (conv_node->num_groups() > 1) { return; } - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << output_edge->producer_id() - << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " + << output_edge->producer_id() << " with BatchNormalization Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(conv_node->output(0)->accessor() == nullptr) + if (conv_node->output(0)->accessor() == nullptr) { const Target assigned_target = conv_node->assigned_target(); @@ -115,9 +116,10 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge const auto epsilon = bn_node->epsilon(); // Create the fused node - const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info); + const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>( + epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info); - if(conv_node->input_edge(2) != nullptr) + if (conv_node->input_edge(2) != nullptr) { auto conv_bias_id = conv_node->input_edge(2)->producer_id(); g.add_connection(conv_bias_id, 0, fused_id, 2); @@ -129,13 +131,13 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge g.add_connection(bn_mean_id, 0, fused_id, 3); g.add_connection(bn_var_id, 0, fused_id, 4); - if(bn_node->input_edge(3) != nullptr) + if (bn_node->input_edge(3) != nullptr) { const auto bn_beta_id = bn_node->input_edge(3)->producer_id(); g.add_connection(bn_beta_id, 0, fused_id, 5); } - if(bn_node->input_edge(4) != nullptr) + if (bn_node->input_edge(4) != nullptr) { const auto bn_gamma_id = bn_node->input_edge(4)->producer_id(); g.add_connection(bn_gamma_id, 0, fused_id, 6); @@ -147,14 +149,15 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true); fused_node->set_assigned_target(assigned_target); - fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + bn_node_name, assigned_target }); + fused_node->set_common_node_parameters(NodeParams{conv_node->name() + "+" + bn_node_name, assigned_target}); // Remove convolution node g.remove_node(conv_node->id()); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE( + "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n"); } } @@ -162,14 +165,17 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o { ARM_COMPUTE_ERROR_ON(output_edge == nullptr); - auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer()); - auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); + auto *depth_conv_node = + arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer()); + auto *bn_node = + arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " << output_edge->producer_id() - << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " + << output_edge->producer_id() << " with BatchNormalization Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(depth_conv_node->output(0)->accessor() == nullptr) + if (depth_conv_node->output(0)->accessor() == nullptr) { const Target assigned_target = depth_conv_node->assigned_target(); @@ -189,9 +195,10 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o const auto epsilon = bn_node->epsilon(); // Create the fused node - const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(epsilon, conv_info, depth_multiplier, depth_conv_method, act_info); + const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>( + epsilon, conv_info, depth_multiplier, depth_conv_method, act_info); - if(depth_conv_node->input_edge(2) != nullptr) + if (depth_conv_node->input_edge(2) != nullptr) { const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id(); g.add_connection(conv_bias_id, 0, fused_id, 2); @@ -211,19 +218,23 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true); fused_node->set_assigned_target(assigned_target); - fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target }); + fused_node->set_common_node_parameters( + NodeParams{depth_conv_node->name() + "+" + bn_node_name, assigned_target}); // Remove convolution node g.remove_node(depth_conv_node->id()); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the " + "presence of an output accessor\n"); } } template <typename N> -void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations) +void fuse_node_with_activation(Graph &g, + const Edge *output_edge, + const std::set<Activation> &supported_fused_activations) { ARM_COMPUTE_ERROR_ON(output_edge == nullptr); @@ -233,22 +244,23 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr); // Check if activation is supported for fusion - if(supported_fused_activations.count(act_node->activation_info().activation()) == 0) + if (supported_fused_activations.count(act_node->activation_info().activation()) == 0) { return; } // EltwiseLayerNode can only be fused when dataype is float - if(n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type)) + if (n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type)) { return; } ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id() - << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl); + << " with Activation Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(n_node->output(0)->accessor() == nullptr) + if (n_node->output(0)->accessor() == nullptr) { // Set activation info to fused node n_node->set_fused_activation(act_node->activation_info()); @@ -257,7 +269,8 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE( + "Prevented fusion of node with activation due to the presence of an output accessor\n"); } } @@ -268,8 +281,8 @@ void fuse_pad_with_convolution(Graph &g, const Edge *output_edge) auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer()); const Edge *input_edge = pad_node->input_edge(0); - if(input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr - && pad_node->pad_value().get<float>() == 0.0) + if (input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr && + pad_node->pad_value().get<float>() == 0.0) { const DataLayout layout = input_edge->tensor()->desc().layout; const PaddingList padding_list = pad_node->padding(); @@ -280,18 +293,14 @@ void fuse_pad_with_convolution(Graph &g, const Edge *output_edge) const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0); const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0); - if(is_padding_in_height_or_width(layout, padding_list)) + if (is_padding_in_height_or_width(layout, padding_list)) { // Add paddings to the convolution node const PadStrideInfo conv_info = conv_node->convolution_info(); - const PadStrideInfo new_conv_info( - conv_info.stride().first, - conv_info.stride().second, - conv_info.pad_left() + pad_w.first, - conv_info.pad_right() + pad_w.second, - conv_info.pad_top() + pad_h.first, - conv_info.pad_bottom() + pad_h.second, - conv_info.round()); + const PadStrideInfo new_conv_info(conv_info.stride().first, conv_info.stride().second, + conv_info.pad_left() + pad_w.first, conv_info.pad_right() + pad_w.second, + conv_info.pad_top() + pad_h.first, conv_info.pad_bottom() + pad_h.second, + conv_info.round()); conv_node->set_convolution_info(new_conv_info); // Update drivers of the convolution node @@ -299,7 +308,7 @@ void fuse_pad_with_convolution(Graph &g, const Edge *output_edge) g.remove_node(pad_node->id()); // Update fused node inputs - for(auto &driver_node : pad_driver_nodes) + for (auto &driver_node : pad_driver_nodes) { g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0); } @@ -308,22 +317,23 @@ void fuse_pad_with_convolution(Graph &g, const Edge *output_edge) } template <typename N1, typename N2, typename F, typename... Args> -void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments) +void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments) { // Note that fused nodes may be added to the end of the node list. // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing. // This is intentional as it probes the newly added fused nodes for further fusing opportunities. - for(unsigned int i = 0; i < g.nodes().size(); ++i) + for (unsigned int i = 0; i < g.nodes().size(); ++i) { auto node = g.node(i); // Check if the node is of type N1 and not a branching node - if(node && node->type() == N1::node_type && node->output_edges().size() == 1) + if (node && node->type() == N1::node_type && node->output_edges().size() == 1) { const auto output_edge_id = *node->output_edges().begin(); const auto output_edge = g.edge(output_edge_id); // Check if following node is a type N2 node - if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer())) + if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && + (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer())) { fuse_fcn(g, output_edge, optional_arguments...); } @@ -332,22 +342,22 @@ void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse } template <typename N1, typename F, typename... Args> -void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments) +void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments) { // Note that fused nodes may be added to the end of the node list. // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing. // This is intentional as it probes the newly added fused nodes for further fusing opportunities. - for(unsigned int i = 0; i < g.nodes().size(); ++i) + for (unsigned int i = 0; i < g.nodes().size(); ++i) { auto node = g.node(i); // Check if the node is of type N1 and not a branching node - if(node && node->type() == N1::node_type && node->output_edges().size() == 1) + if (node && node->type() == N1::node_type && node->output_edges().size() == 1) { const auto output_edge_id = *node->output_edges().begin(); const auto output_edge = g.edge(output_edge_id); // Check if it's the correct target - if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer())) + if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer())) { fuse_fcn(g, output_edge, i, optional_arguments...); } @@ -369,30 +379,24 @@ IGraphMutator::MutationType NodeFusionMutator::type() const void NodeFusionMutator::mutate(Graph &g) { // Supported activations when fusing - const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU, - Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU, - Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU, - Activation::RELU, Activation::SOFT_RELU, Activation::SQRT, - Activation::SQUARE, Activation::TANH - }; + const std::set<Activation> supported_fused_activations = { + Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU, + Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU, + Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU, + Activation::RELU, Activation::SOFT_RELU, Activation::SQRT, + Activation::SQUARE, Activation::TANH}; // Preconditions - auto empty_prec = [](INode &) - { - return true; - }; - auto cl_target_prec = [](INode & n) - { - return n.assigned_target() == Target::CL; - }; - auto qs8_prec = [&g](INode & n) + auto empty_prec = [](INode &) { return true; }; + auto cl_target_prec = [](INode &n) { return n.assigned_target() == Target::CL; }; + auto qs8_prec = [&g](INode &n) { ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr); const auto output_edge_id = *n.output_edges().begin(); const auto output_edge = g.edge(output_edge_id); // To perform fusion the two nodes must have same output quantization information - const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info; + const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info; const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8; return (output_qasymm8 && same_qinfo) || !output_qasymm8; @@ -400,16 +404,25 @@ void NodeFusionMutator::mutate(Graph &g) // Fusion mutations - detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<ConvolutionLayerNode>); - detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>); - detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations); - detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations); - detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations); - detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations); - detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations); + detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec, + detail::fuse_pad_with_convolution<ConvolutionLayerNode>); + detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>( + g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>); + detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations); + detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations); + detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>( + g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations); + detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations); + detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>( + g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations); // The fusion of BatchNormalizationLayer must occur after the fusion of ActivationLayer. Because FusedConvolutionBatchNormalizationNode assumes the BatchNormalization is already fused with activation, if any - detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization); - detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); + detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>( + g, empty_prec, detail::fuse_convolution_with_batch_normalization); + detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>( + g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } } // namespace graph } // namespace arm_compute diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp index 2c28a1a2d1..533f8944cf 100644 --- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp +++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp @@ -23,12 +23,12 @@ */ #include "arm_compute/graph/mutators/SplitLayerSubTensorMutator.h" -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/Logger.h" #include "arm_compute/graph/nodes/SplitLayerNode.h" +#include "arm_compute/graph/Utils.h" #include "support/Cast.h" #include "support/Iterable.h" @@ -50,7 +50,7 @@ IGraphMutator::MutationType SplitLayerSubTensorMutator::type() const void SplitLayerSubTensorMutator::mutate(Graph &g) { // Early exit if no Split layers exist in graph - if(g.nodes(NodeType::SplitLayer).empty()) + if (g.nodes(NodeType::SplitLayer).empty()) { return; } @@ -59,23 +59,23 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) std::vector<NodeID> topological_sorted_node_ids = dfs(g); // Should be in reverse order of execution - for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) + for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { INode *node = g.node(node_id); - if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) + if (node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) { // Get output tensor Tensor *input_tensor = node->input(0); // Check that all tensor have the same target and are valid bool is_valid = std::all_of(node->outputs().cbegin(), node->outputs().cend(), - [&](const TensorID & tid) - { - return (g.tensor(tid) != nullptr) && (g.tensor(tid)->desc().target == input_tensor->desc().target); - }); + [&](const TensorID &tid) { + return (g.tensor(tid) != nullptr) && + (g.tensor(tid)->desc().target == input_tensor->desc().target); + }); // Create subtensors - if(is_valid && is_target_supported(input_tensor->desc().target)) + if (is_valid && is_target_supported(input_tensor->desc().target)) { ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : " << node->id() << " and name : " << node->name() << std::endl); @@ -87,15 +87,18 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) const bool extend_parent = (axis < 2); // Create sub-tensor handles - for(unsigned int i = 0; i < node->outputs().size(); ++i) + for (unsigned int i = 0; i < node->outputs().size(); ++i) { Tensor *output_tensor = node->output(i); const TensorShape output_shape = output_tensor->desc().shape; Coordinates coords; - std::tie(std::ignore, coords) = split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); + std::tie(std::ignore, coords) = + split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); - backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target); - std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent); + backends::IDeviceBackend &backend = + backends::BackendRegistry::get().get_backend(output_tensor->desc().target); + std::unique_ptr<ITensorHandle> handle = + backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent); output_tensor->set_handle(std::move(handle)); } } diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp index 74d040b81d..3dc2480e85 100644 --- a/src/graph/mutators/SyntheticDataTypeMutator.cpp +++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp @@ -26,8 +26,8 @@ #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/ITensorAccessor.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" #include "support/Cast.h" @@ -62,14 +62,12 @@ public: */ bool is_mutation_supported(Graph &g) { - const std::set<NodeType> unsupported_node_types = { NodeType::DetectionOutputLayer, - NodeType::NormalizationLayer, - NodeType::PriorBoxLayer - }; + const std::set<NodeType> unsupported_node_types = {NodeType::DetectionOutputLayer, NodeType::NormalizationLayer, + NodeType::PriorBoxLayer}; - for(const auto &utype : unsupported_node_types) + for (const auto &utype : unsupported_node_types) { - if(!g.nodes(utype).empty()) + if (!g.nodes(utype).empty()) { return false; } @@ -83,12 +81,12 @@ bool is_mutation_supported(Graph &g) */ void remove_optimized_nodes(Graph &g) { - const std::set<NodeType> optimized_node_types = { NodeType::BatchNormalizationLayer }; + const std::set<NodeType> optimized_node_types = {NodeType::BatchNormalizationLayer}; - for(const auto &opt_type : optimized_node_types) + for (const auto &opt_type : optimized_node_types) { const std::vector<NodeID> opt_nodes_ids = g.nodes(opt_type); - for(const auto &node_id : opt_nodes_ids) + for (const auto &node_id : opt_nodes_ids) { INode *node = g.node(node_id); @@ -108,7 +106,7 @@ void remove_optimized_nodes(Graph &g) g.remove_node(node->id()); // Update connections - for(auto &driving_node : driving_nodes) + for (auto &driving_node : driving_nodes) { g.add_connection(producer->id(), producer_edge_id, driving_node.node_id, driving_node.index); } @@ -123,11 +121,11 @@ void remove_optimized_nodes(Graph &g) void convert_tensors(Graph &g, DataType data_type) { auto &tensors = g.tensors(); - for(auto &tensor : tensors) + for (auto &tensor : tensors) { - if(tensor != nullptr) + if (tensor != nullptr) { - switch(data_type) + switch (data_type) { case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: @@ -156,7 +154,7 @@ template <typename NT> void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const &f) { const std::vector<NodeID> nodes_ids = g.nodes(NT::node_type); - for(const auto &nodes_id : nodes_ids) + for (const auto &nodes_id : nodes_ids) { INode *node = arm_compute::utils::cast::polymorphic_downcast<NT *>(g.node(nodes_id)); ARM_COMPUTE_ERROR_ON(node == nullptr); @@ -174,41 +172,41 @@ void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const */ void convert_special_tensors(Graph &g) { - auto softmax_func = [](INode * node, Tensor * tensor) + auto softmax_func = [](INode *node, Tensor *tensor) { ARM_COMPUTE_UNUSED(node); - if(tensor->desc().data_type == DataType::QASYMM8) + if (tensor->desc().data_type == DataType::QASYMM8) { tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); } - else if(tensor->desc().data_type == DataType::QASYMM8_SIGNED) + else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED) { tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128); } return true; }; - auto act_func = [](INode * node, Tensor * tensor) + auto act_func = [](INode *node, Tensor *tensor) { auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(node); - if(tensor->desc().data_type == DataType::QASYMM8) + if (tensor->desc().data_type == DataType::QASYMM8) { - if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) + if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) { tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128); } - else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) { tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); } } - else if(tensor->desc().data_type == DataType::QASYMM8_SIGNED) + else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED) { - if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) + if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) { tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 0); } - else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) { tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128); } @@ -228,22 +226,19 @@ void convert_special_tensors(Graph &g) */ void handle_nodes_with_bias(Graph &g) { - const std::set<NodeType> special_node_types = { NodeType::ConvolutionLayer, - NodeType::DeconvolutionLayer, - NodeType::DepthwiseConvolutionLayer, - NodeType::FullyConnectedLayer - }; + const std::set<NodeType> special_node_types = {NodeType::ConvolutionLayer, NodeType::DeconvolutionLayer, + NodeType::DepthwiseConvolutionLayer, NodeType::FullyConnectedLayer}; - for(const auto &spc_type : special_node_types) + for (const auto &spc_type : special_node_types) { const std::vector<NodeID> scp_nodes_ids = g.nodes(spc_type); - for(const auto &node_id : scp_nodes_ids) + for (const auto &node_id : scp_nodes_ids) { INode *node = g.node(node_id); - if(node != nullptr) + if (node != nullptr) { Tensor *tensor = node->input(2); - if(tensor != nullptr) + if (tensor != nullptr) { tensor->desc().data_type = DataType::S32; } @@ -253,8 +248,8 @@ void handle_nodes_with_bias(Graph &g) params.name = params.name.empty() ? "" : params.name + "Bias"; TensorDescriptor b_desc = node->input(1)->desc(); - auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)]; - b_desc.shape = TensorShape(depth); + auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)]; + b_desc.shape = TensorShape(depth); auto accessor = std::make_unique<EmptyAccessor>(); auto b_nid = GraphBuilder::add_const_node(g, params, b_desc, std::move(accessor)); @@ -266,8 +261,7 @@ void handle_nodes_with_bias(Graph &g) } } // namespace -SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type) - : _mutate_type{ mutate_type } +SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type) : _mutate_type{mutate_type} { } @@ -283,7 +277,7 @@ IGraphMutator::MutationType SyntheticDataTypeMutator::type() const void SyntheticDataTypeMutator::mutate(Graph &g) { - if(is_mutation_supported(g)) + if (is_mutation_supported(g)) { // Remove nodes that get optimized out (e.g. BatchNorm) remove_optimized_nodes(g); |