diff options
Diffstat (limited to 'src/graph/mutators')
-rw-r--r-- | src/graph/mutators/DepthConcatSubTensorMutator.cpp | 45 | ||||
-rw-r--r-- | src/graph/mutators/GroupedConvolutionMutator.cpp | 81 | ||||
-rw-r--r-- | src/graph/mutators/InPlaceOperationMutator.cpp | 235 | ||||
-rw-r--r-- | src/graph/mutators/MutatorUtils.cpp | 52 | ||||
-rw-r--r-- | src/graph/mutators/MutatorUtils.h | 42 | ||||
-rw-r--r-- | src/graph/mutators/NodeExecutionMethodMutator.cpp | 46 | ||||
-rw-r--r-- | src/graph/mutators/NodeFusionMutator.cpp | 304 | ||||
-rw-r--r-- | src/graph/mutators/SplitLayerSubTensorMutator.cpp | 41 | ||||
-rw-r--r-- | src/graph/mutators/SyntheticDataTypeMutator.cpp | 113 |
9 files changed, 698 insertions, 261 deletions
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp index 30d6700446..1b7ee3c4a4 100644 --- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp +++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,15 +23,15 @@ */ #include "arm_compute/graph/mutators/DepthConcatSubTensorMutator.h" -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/Logger.h" #include "arm_compute/graph/nodes/ConcatenateLayerNode.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/Iterable.h" +#include "support/Cast.h" +#include "support/Iterable.h" namespace arm_compute { @@ -50,7 +50,7 @@ IGraphMutator::MutationType DepthConcatSubTensorMutator::type() const void DepthConcatSubTensorMutator::mutate(Graph &g) { // Early exit if no Concatenation layers exist in graph - if(g.nodes(NodeType::ConcatenateLayer).empty()) + if (g.nodes(NodeType::ConcatenateLayer).empty()) { return; } @@ -59,43 +59,48 @@ void DepthConcatSubTensorMutator::mutate(Graph &g) std::vector<NodeID> topological_sorted_node_ids = dfs(g); // Should be in reverse order of execution - for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) + for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { INode *node = g.node(node_id); - if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) + if (node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) { // Get output tensor auto output_tensor = node->output(0); // Check concatenation axis (Sub-tensor optimization is supported for concatenation axis >=2) auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node); - if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2) + if (output_tensor == nullptr || + get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2) { continue; } // Check that all tensor have the same target, valid inputs and same quantization info - bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(), - [&](const EdgeID & eid) - { - return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) - && (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info); - }); + bool is_valid = + std::all_of(node->input_edges().cbegin(), node->input_edges().cend(), + [&](const EdgeID &eid) + { + return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && + (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) && + (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info); + }); // Create subtensors - if(is_valid && is_target_supported(output_tensor->desc().target)) + if (is_valid && is_target_supported(output_tensor->desc().target)) { ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : " << node->id() << " and name : " << node->name() << std::endl); // Create sub-tensor handles unsigned depth = 0; - for(unsigned int i = 0; i < node->input_edges().size(); ++i) + for (unsigned int i = 0; i < node->input_edges().size(); ++i) { auto input_tensor = node->input(i); const auto input_shape = input_tensor->desc().shape; - backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target); - std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false); + backends::IDeviceBackend &backend = + backends::BackendRegistry::get().get_backend(input_tensor->desc().target); + std::unique_ptr<ITensorHandle> handle = + backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false); input_tensor->set_handle(std::move(handle)); depth += input_shape.z(); diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp index f8494a872f..31efba6bb1 100644 --- a/src/graph/mutators/GroupedConvolutionMutator.cpp +++ b/src/graph/mutators/GroupedConvolutionMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,15 +23,14 @@ */ #include "arm_compute/graph/mutators/GroupedConvolutionMutator.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" - +#include "support/Cast.h" #include "support/StringSupport.h" #include <set> @@ -42,43 +41,51 @@ namespace graph { namespace { -NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias, - PadStrideInfo conv_info, ConvolutionMethod method, ActivationLayerInfo fused_act, FastMathHint fast_math_hint, unsigned int num_groups) +NodeID create_grouped_convolution(Graph &g, + const NodeParams ¶ms, + NodeIdxPair input, + NodeID weights, + NodeID bias, + PadStrideInfo conv_info, + ConvolutionMethod method, + ActivationLayerInfo fused_act, + FastMathHint fast_math_hint, + unsigned int num_groups) { bool has_bias = (bias != EmptyNodeID); // Split input const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); - const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL); - NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); + const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL); + NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); // Split weights const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]); - const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES); - NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx); + const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES); + NodeID weights_split = GraphBuilder::add_split_node(g, params, {weights, 0}, num_groups, batch_idx); // Split bias NodeID bias_split = EmptyNodeID; - if(has_bias) + if (has_bias) { // Split bias - bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0); + bias_split = GraphBuilder::add_split_node(g, params, {bias, 0}, num_groups, 0); } std::vector<NodeIdxPair> convolution_outputs; - for(unsigned int i = 0; i < num_groups; ++i) + for (unsigned int i = 0; i < num_groups; ++i) { NodeParams group_params = params; NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint); g.add_connection(input_split, i, conv_nid, 0); g.add_connection(weights_split, i, conv_nid, 1); - if(has_bias) + if (has_bias) { g.add_connection(bias_split, i, conv_nid, 2); } // Add group name - if(!group_params.name.empty()) + if (!group_params.name.empty()) { group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i)); } @@ -92,7 +99,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPai auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node); conv_node->set_fused_activation(fused_act); - convolution_outputs.push_back({ conv_nid, 0 }); + convolution_outputs.push_back({conv_nid, 0}); } // Depth concatenate output @@ -113,7 +120,7 @@ IGraphMutator::MutationType GroupedConvolutionMutator::type() const void GroupedConvolutionMutator::mutate(Graph &g) { // Early exit if no Convolution layers exist in graph - if(g.nodes(NodeType::ConvolutionLayer).empty()) + if (g.nodes(NodeType::ConvolutionLayer).empty()) { return; } @@ -122,17 +129,18 @@ void GroupedConvolutionMutator::mutate(Graph &g) size_t total_nodes = g.nodes().size(); // Iterate over convolution nodes - for(unsigned int i = 0; i < total_nodes; ++i) + for (unsigned int i = 0; i < total_nodes; ++i) { INode *node = g.node(i); - if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1) + if (node != nullptr && node->type() == NodeType::ConvolutionLayer && + arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1) { // Validate node backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target()); Status status = backend.validate_node(*node); // If grouped convolution is not supported - if(!bool(status)) + if (!bool(status)) { // Down-cast node auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node); @@ -151,7 +159,8 @@ void GroupedConvolutionMutator::mutate(Graph &g) ARM_COMPUTE_ERROR_ON(conv_node->input_edge(0) == nullptr || conv_node->input_edge(1) == nullptr); const NodeID input_id = conv_node->input_edge(0)->producer()->id(); const NodeID weights_id = conv_node->input_edge(1)->producer()->id(); - const NodeID bias_id = (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID; + const NodeID bias_id = + (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID; // Get driving nodes std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node); @@ -164,14 +173,15 @@ void GroupedConvolutionMutator::mutate(Graph &g) NodeID latest_nid = g.nodes().size(); // Create grouped convolution node - NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id, - conv_info, conv_method, fused_act_info, fast_math_hint, num_groups); + NodeID grouped_conv_id = + create_grouped_convolution(g, params, {input_id, 0}, weights_id, bias_id, conv_info, conv_method, + fused_act_info, fast_math_hint, num_groups); // Remove convolution node g.remove_node(node->id()); // Update batch normalization node outputs - for(auto &driving_node : driving_nodes) + for (auto &driving_node : driving_nodes) { g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index); } @@ -180,17 +190,16 @@ void GroupedConvolutionMutator::mutate(Graph &g) g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor)); // Configure new tensors and nodes - std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t) - { - configure_tensor(t.get()); - }); - std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n) - { - if(n != nullptr) - { - n->set_assigned_target(assigned_target); - } - }); + std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), + [](std::unique_ptr<Tensor> &t) { configure_tensor(t.get()); }); + std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), + [&assigned_target](std::unique_ptr<INode> &n) + { + if (n != nullptr) + { + n->set_assigned_target(assigned_target); + } + }); } } } diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp index 3b06537cd9..a51dcc4f42 100644 --- a/src/graph/mutators/InPlaceOperationMutator.cpp +++ b/src/graph/mutators/InPlaceOperationMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,13 +23,193 @@ */ #include "arm_compute/graph/mutators/InPlaceOperationMutator.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h" +#include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h" + +#include "support/Cast.h" + +using namespace arm_compute::utils::cast; namespace arm_compute { namespace graph { +namespace +{ +// Check if the output edges of the parent node are separate tensors. If not, +// it means the same output is connected to multiple nodes and computations on +// these nodes cannot be done in-place. +bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge) +{ + const auto parent_node = input_edge->producer(); + const auto input_tensor = input_edge->tensor(); + const auto input_edge_id = input_edge->id(); + + if (parent_node == nullptr) + { + return false; + } + + const auto output_edges = parent_node->output_edges(); + + // If the output is connected to only one edge, then computations can + // be done in-place. + if (output_edges.size() == 1) + { + return true; + } + + return std::all_of(output_edges.begin(), output_edges.end(), + [&](const EdgeID &edge_id) + { + // Skip check on current input edge + if (edge_id == input_edge_id) + { + return true; + } + + auto edge = g.edge(edge_id); + return edge->tensor() != input_tensor; + }); +} + +// If do in-place calculation, then need to use the new output and inherit original output's accessor +void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output) +{ + ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : " + << node->id() << " and name : " << node->name() << std::endl); + // Update accessor + new_output->set_accessor(orig_output->extract_accessor()); + // Update output + node->set_output_tensor(new_output->id(), 0); +} + +// Try to mutate the node to perform the depthwise in-place calculation +void try_in_place_depthwiseconv(std::unique_ptr<INode> &node) +{ + // Get input edge + Edge *input_edge = node->input_edge(0); + Edge *weight_edge = node->input_edge(1); + ARM_COMPUTE_ERROR_ON(input_edge == nullptr || weight_edge == nullptr); + + auto input_tensor = input_edge->tensor(); + auto weight_tensor = weight_edge->tensor(); + ARM_COMPUTE_ERROR_ON(input_tensor == nullptr || weight_tensor == nullptr); + + const auto input_shape = input_tensor->desc().shape; + const auto qinfo_input = input_tensor->desc().quant_info; + + const auto weight_shape = weight_tensor->desc().shape; + const auto weight_layout = weight_tensor->desc().layout; + + // Extract PadStrideInfo and depth multiplier + PadStrideInfo conv_info{}; + unsigned int depth_multiplier{}; + if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer) + { + conv_info = + polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info(); + depth_multiplier = + polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier(); + } + else if (node->type() == NodeType::DepthwiseConvolutionLayer) + { + conv_info = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info(); + depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier(); + } + + // Get current output tensor + auto current_output_tensor = node->output(0); + ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr); + const auto out_shape = current_output_tensor->desc().shape; + const auto qinfo_out = current_output_tensor->desc().quant_info; + + bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) && + (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr); + + // Specify conditions with which input can be in-placed + input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC; + + const int weights_width_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::WIDTH); + const int weights_height_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::HEIGHT); + const bool is_1x1 = weight_shape[weights_width_idx] == 1U && weight_shape[weights_height_idx] == 1U; + input_can_in_place &= is_1x1; + + input_can_in_place &= depth_multiplier == 1; + input_can_in_place &= conv_info.stride() == std::make_pair(1U, 1U); + input_can_in_place &= !conv_info.has_padding(); + // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node + + if (input_can_in_place) + { + set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor); + } + else + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor " + "or the quantization info are different.\n"); + } +} + +// Try to mutate the node to perform the elementwise in-place calculation +void try_in_place_elementwise(std::unique_ptr<INode> &node) +{ + // Get input edge + Edge *input0_edge = node->input_edge(0); + Edge *input1_edge = node->input_edge(1); + ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr); + + auto input0_tensor = input0_edge->tensor(); + auto input1_tensor = input1_edge->tensor(); + ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr); + + const auto shape0 = input0_tensor->desc().shape; + const auto shape1 = input1_tensor->desc().shape; + const auto qinfo0 = input0_tensor->desc().quant_info; + const auto qinfo1 = input1_tensor->desc().quant_info; + + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + // Inputs are not broadcast compatible + if (out_shape.total_size() == 0) + { + return; + } + + // Get current output tensor + auto current_output_tensor = node->output(0); + ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr); + const auto qinfo_out = current_output_tensor->desc().quant_info; + + // Can do in place, if the input has same shape as output, has same quntisation info as output, has same data type as output and input doesn't have accessor. + bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && + (qinfo0 == qinfo_out) && + (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) && + (input0_tensor->accessor() == nullptr); + bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && + (qinfo1 == qinfo_out) && + (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) && + (input1_tensor->accessor() == nullptr); + + if (input0_can_in_place) + { + set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor); + } + else if (input1_can_in_place) + { + set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor); + } + else + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor " + "or the quantization info are different.\n"); + } +} +} // namespace + const char *InPlaceOperationMutator::name() { return "InPlaceOperationMutator"; @@ -42,38 +222,53 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const void InPlaceOperationMutator::mutate(Graph &g) { - std::set<NodeType> in_place_nodes = { NodeType::BatchNormalizationLayer, NodeType::ActivationLayer, NodeType::PrintLayer }; + std::set<NodeType> in_place_nodes = {NodeType::ActivationLayer, + NodeType::BatchNormalizationLayer, + NodeType::EltwiseLayer, + NodeType::UnaryEltwiseLayer, + NodeType::DepthwiseConvolutionLayer, + NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer, + NodeType::PrintLayer}; // Not interested in the order of nodes - for(auto &node : g.nodes()) + for (auto &node : g.nodes()) { - if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes)) + if (node && in_place_nodes.find(node->type()) != std::end(in_place_nodes)) { // Get input edge Edge *input_edge = node->input_edge(0); // Check if parent has a single output if yes then force in place calculation else not - if((input_edge != nullptr) && (input_edge->producer() != nullptr) && (input_edge->producer()->output_edges().size() == 1)) + if ((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge)) { - // Get current and new output tensors - auto current_output_tensor = node->output(0); - auto new_output_tensor = input_edge->tensor(); - - ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr); - - // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different - if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) + if (node->type() == NodeType::EltwiseLayer) { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + try_in_place_elementwise(node); + } + else if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer || + node->type() == NodeType::DepthwiseConvolutionLayer) + { + try_in_place_depthwiseconv(node); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : " - << node->id() << " and name : " << node->name() << std::endl); - // Update accessor - new_output_tensor->set_accessor(current_output_tensor->extract_accessor()); - // Update output - node->set_output_tensor(new_output_tensor->id(), 0); + // Get current and new output tensors + auto current_output_tensor = node->output(0); + auto new_output_tensor = input_edge->tensor(); + + ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr); + + // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different + if (new_output_tensor->accessor() != nullptr || + current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to " + "the input tensor or the quantization info are different.\n"); + } + else + { + set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor); + } } } } diff --git a/src/graph/mutators/MutatorUtils.cpp b/src/graph/mutators/MutatorUtils.cpp new file mode 100644 index 0000000000..f47240eadd --- /dev/null +++ b/src/graph/mutators/MutatorUtils.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/graph/mutators/MutatorUtils.h" + +namespace arm_compute +{ +namespace graph +{ +bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list) +{ + if (layout == DataLayout::NCHW || layout == DataLayout::NHWC) + { + const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT); + const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH); + + for (unsigned int i = 0; i < padding_list.size(); ++i) + { + if (i != height_index && i != width_index && padding_list[i] != PaddingInfo(0, 0)) + { + // if the index is not either height or width, don't fuse + return false; + } + } + + return true; + } + + return false; +} +} // namespace graph +} // namespace arm_compute diff --git a/src/graph/mutators/MutatorUtils.h b/src/graph/mutators/MutatorUtils.h new file mode 100644 index 0000000000..170d892c93 --- /dev/null +++ b/src/graph/mutators/MutatorUtils.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H +#define ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H + +#include "arm_compute/graph/Utils.h" + +namespace arm_compute +{ +namespace graph +{ +/** Check if padding is in height and/or width dimensions + * + * @param[in] layout Data layout of the tensor + * @param[in] padding_list List of padding pairs + */ +bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list); +} // namespace graph +} // namespace arm_compute + +#endif /* ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H */
\ No newline at end of file diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp index 72e2645dd2..588befecae 100644 --- a/src/graph/mutators/NodeExecutionMethodMutator.cpp +++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,13 +23,13 @@ */ #include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { @@ -49,17 +49,17 @@ template <typename Setter> void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter) { const std::vector<NodeID> &node_ids = g.nodes(node_type); - for(auto &node_id : node_ids) + for (auto &node_id : node_ids) { INode *node = g.node(node_id); - if(node != nullptr) + if (node != nullptr) { // Validate node backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target()); Status status = backend.validate_node(*node); // Set default execution method in case of failure - if(!bool(status)) + if (!bool(status)) { setter(node); } @@ -81,22 +81,26 @@ IGraphMutator::MutationType NodeExecutionMethodMutator::type() const void NodeExecutionMethodMutator::mutate(Graph &g) { // Convolution Layer - set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n) - { - ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " - << n->id() << " and Name: " << n->name() << std::endl); - auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n); - casted_node->set_convolution_method(ConvolutionMethod::Default); - }); + set_default_on_invalid_method(g, NodeType::ConvolutionLayer, + [](INode *n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = + arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n); + casted_node->set_convolution_method(ConvolutionMethod::Default); + }); // Depthwise Convolution Layer - set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n) - { - ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : " - << n->id() << " and Name: " << n->name() << std::endl); - auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n); - casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); - }); + set_default_on_invalid_method( + g, NodeType::DepthwiseConvolutionLayer, + [](INode *n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n); + casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); + }); } } // namespace graph } // namespace arm_compute diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index ae53b8ff75..998a4a05c7 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,15 +23,18 @@ */ #include "arm_compute/graph/mutators/NodeFusionMutator.h" +#include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "src/graph/mutators/MutatorUtils.h" +#include "support/Cast.h" +#include <list> #include <set> namespace arm_compute @@ -40,24 +43,60 @@ namespace graph { namespace detail { +void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode *old_node, bool add_output_tensor) +{ + if (new_node == nullptr || old_node == nullptr) + { + return; + } + + // Get driving nodes of last fusable node + std::vector<NodeIdxPair> last_driving_nodes = get_driving_nodes(*old_node); + + // Extract last fusable node accessor if any + if (old_node->output(0) == nullptr) + { + return; + } + auto old_node_accessor = old_node->output(0)->extract_accessor(); + + // Remove node + g.remove_node(old_node->id()); + + // Update fused node outputs + for (auto &driving_node : last_driving_nodes) + { + g.add_connection(new_node->id(), 0, driving_node.node_id, driving_node.index); + if (add_output_tensor) + { + configure_tensor(new_node->output(0)); + } + } + + // Update accessor to fused node + new_node->output(0)->set_accessor(std::move(old_node_accessor)); +} + void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge) { ARM_COMPUTE_ERROR_ON(output_edge == nullptr); auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer()); - auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); + auto *bn_node = + arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); // Not fusing if number of groups is greater than 1 - if(conv_node->num_groups() > 1) + if (conv_node->num_groups() > 1) { return; } - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << output_edge->producer_id() - << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " + << output_edge->producer_id() << " with BatchNormalization Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(conv_node->output(0)->accessor() == nullptr) + if (conv_node->output(0)->accessor() == nullptr) { const Target assigned_target = conv_node->assigned_target(); @@ -77,9 +116,10 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge const auto epsilon = bn_node->epsilon(); // Create the fused node - const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info); + const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>( + epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info); - if(conv_node->input_edge(2) != nullptr) + if (conv_node->input_edge(2) != nullptr) { auto conv_bias_id = conv_node->input_edge(2)->producer_id(); g.add_connection(conv_bias_id, 0, fused_id, 2); @@ -91,45 +131,33 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge g.add_connection(bn_mean_id, 0, fused_id, 3); g.add_connection(bn_var_id, 0, fused_id, 4); - if(bn_node->input_edge(3) != nullptr) + if (bn_node->input_edge(3) != nullptr) { const auto bn_beta_id = bn_node->input_edge(3)->producer_id(); g.add_connection(bn_beta_id, 0, fused_id, 5); } - if(bn_node->input_edge(4) != nullptr) + if (bn_node->input_edge(4) != nullptr) { const auto bn_gamma_id = bn_node->input_edge(4)->producer_id(); g.add_connection(bn_gamma_id, 0, fused_id, 6); } - auto fused_node = g.node(fused_id); - std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node); + auto fused_node = g.node(fused_id); + auto bn_node_name = bn_node->name(); - // Extract batch normalization node accessor if any - auto bn_node_accessor = bn_node->output(0)->extract_accessor(); - auto bn_node_name = bn_node->name(); + transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true); - // Remove batch normalization node - g.remove_node(bn_node->id()); - - // Get driving nodes of batch normalization node - for(auto &driving_node : bn_driving_nodes) - { - g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index); - configure_tensor(fused_node->output(0)); - } - // Update fused node outputs - fused_node->output(0)->set_accessor(std::move(bn_node_accessor)); fused_node->set_assigned_target(assigned_target); - fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + bn_node_name, assigned_target }); + fused_node->set_common_node_parameters(NodeParams{conv_node->name() + "+" + bn_node_name, assigned_target}); // Remove convolution node g.remove_node(conv_node->id()); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE( + "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n"); } } @@ -137,14 +165,17 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o { ARM_COMPUTE_ERROR_ON(output_edge == nullptr); - auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer()); - auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); + auto *depth_conv_node = + arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer()); + auto *bn_node = + arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer()); - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " << output_edge->producer_id() - << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " + << output_edge->producer_id() << " with BatchNormalization Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(depth_conv_node->output(0)->accessor() == nullptr) + if (depth_conv_node->output(0)->accessor() == nullptr) { const Target assigned_target = depth_conv_node->assigned_target(); @@ -164,9 +195,10 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o const auto epsilon = bn_node->epsilon(); // Create the fused node - const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(epsilon, conv_info, depth_multiplier, depth_conv_method, act_info); + const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>( + epsilon, conv_info, depth_multiplier, depth_conv_method, act_info); - if(depth_conv_node->input_edge(2) != nullptr) + if (depth_conv_node->input_edge(2) != nullptr) { const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id(); g.add_connection(conv_bias_id, 0, fused_id, 2); @@ -180,38 +212,29 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o g.add_connection(bn_beta_id, 0, fused_id, 5); g.add_connection(bn_gamma_id, 0, fused_id, 6); - auto fused_node = g.node(fused_id); - std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node); - - // Extract batch normalization node accessor if any - auto bn_node_accessor = bn_node->output(0)->extract_accessor(); - auto bn_node_name = bn_node->name(); + auto fused_node = g.node(fused_id); + auto bn_node_name = bn_node->name(); - // Remove batch normalization node - g.remove_node(bn_node->id()); + transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true); - // Get driving nodes of batch normalization node - for(auto &driving_node : bn_driving_nodes) - { - g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index); - configure_tensor(fused_node->output(0)); - } - // Update fused node outputs - fused_node->output(0)->set_accessor(std::move(bn_node_accessor)); fused_node->set_assigned_target(assigned_target); - fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target }); + fused_node->set_common_node_parameters( + NodeParams{depth_conv_node->name() + "+" + bn_node_name, assigned_target}); // Remove convolution node g.remove_node(depth_conv_node->id()); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n"); + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the " + "presence of an output accessor\n"); } } template <typename N> -void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations) +void fuse_node_with_activation(Graph &g, + const Edge *output_edge, + const std::set<Activation> &supported_fused_activations) { ARM_COMPUTE_ERROR_ON(output_edge == nullptr); @@ -221,64 +244,126 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr); // Check if activation is supported for fusion - if(supported_fused_activations.count(act_node->activation_info().activation()) == 0) + if (supported_fused_activations.count(act_node->activation_info().activation()) == 0) + { + return; + } + + // EltwiseLayerNode can only be fused when dataype is float + if (n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type)) { return; } ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id() - << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl); + << " with Activation Layer node with ID : " + << output_edge->consumer_id() << std::endl); // Prevent fusion if fused node has an output accessor - if(n_node->output(0)->accessor() == nullptr) + if (n_node->output(0)->accessor() == nullptr) { - // Get driving nodes of activation node - std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node); - // Set activation info to fused node n_node->set_fused_activation(act_node->activation_info()); - // Extract activation node accessor if any - auto act_node_accessor = act_node->output(0)->extract_accessor(); + transfer_driving_nodes_and_remove_old_node(g, n_node, act_node, false); + } + else + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE( + "Prevented fusion of node with activation due to the presence of an output accessor\n"); + } +} + +template <typename N> +void fuse_pad_with_convolution(Graph &g, const Edge *output_edge) +{ + auto *pad_node = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->producer()); + auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer()); + + const Edge *input_edge = pad_node->input_edge(0); + if (input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr && + pad_node->pad_value().get<float>() == 0.0) + { + const DataLayout layout = input_edge->tensor()->desc().layout; + const PaddingList padding_list = pad_node->padding(); - // Remove activation node - g.remove_node(act_node->id()); + const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT); + const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH); - // Update fused node outputs - for(auto &driving_node : act_driving_nodes) + const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0); + const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0); + + if (is_padding_in_height_or_width(layout, padding_list)) { - g.add_connection(n_node->id(), 0, driving_node.node_id, driving_node.index); + // Add paddings to the convolution node + const PadStrideInfo conv_info = conv_node->convolution_info(); + const PadStrideInfo new_conv_info(conv_info.stride().first, conv_info.stride().second, + conv_info.pad_left() + pad_w.first, conv_info.pad_right() + pad_w.second, + conv_info.pad_top() + pad_h.first, conv_info.pad_bottom() + pad_h.second, + conv_info.round()); + conv_node->set_convolution_info(new_conv_info); + + // Update drivers of the convolution node + std::vector<NodeIdxPair> pad_driver_nodes = get_driver_nodes(*pad_node); + g.remove_node(pad_node->id()); + + // Update fused node inputs + for (auto &driver_node : pad_driver_nodes) + { + g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0); + } } - - // Update accessor to fused node - n_node->output(0)->set_accessor(std::move(act_node_accessor)); - } - else - { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n"); } } template <typename N1, typename N2, typename F, typename... Args> -void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments) +void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments) { - // Not interested in the order of nodes - for(auto &node : g.nodes()) + // Note that fused nodes may be added to the end of the node list. + // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing. + // This is intentional as it probes the newly added fused nodes for further fusing opportunities. + for (unsigned int i = 0; i < g.nodes().size(); ++i) { - // Check if the node is of type N and not a branching node - if(node && node->type() == N1::node_type && node->output_edges().size() == 1) + auto node = g.node(i); + // Check if the node is of type N1 and not a branching node + if (node && node->type() == N1::node_type && node->output_edges().size() == 1) { const auto output_edge_id = *node->output_edges().begin(); const auto output_edge = g.edge(output_edge_id); - // Check if following node is an activation layer node - if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer())) + // Check if following node is a type N2 node + if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && + (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer())) { fuse_fcn(g, output_edge, optional_arguments...); } } } } + +template <typename N1, typename F, typename... Args> +void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments) +{ + // Note that fused nodes may be added to the end of the node list. + // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing. + // This is intentional as it probes the newly added fused nodes for further fusing opportunities. + for (unsigned int i = 0; i < g.nodes().size(); ++i) + { + auto node = g.node(i); + // Check if the node is of type N1 and not a branching node + if (node && node->type() == N1::node_type && node->output_edges().size() == 1) + { + const auto output_edge_id = *node->output_edges().begin(); + const auto output_edge = g.edge(output_edge_id); + + // Check if it's the correct target + if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer())) + { + fuse_fcn(g, output_edge, i, optional_arguments...); + } + } + } +} } // namespace detail const char *NodeFusionMutator::name() @@ -294,41 +379,50 @@ IGraphMutator::MutationType NodeFusionMutator::type() const void NodeFusionMutator::mutate(Graph &g) { // Supported activations when fusing - const std::set<Activation> supported_fused_activations_conv = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU }; - const std::set<Activation> supported_fused_activations_eltwise = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU, - Activation::TANH, Activation::LOGISTIC - }; + const std::set<Activation> supported_fused_activations = { + Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU, + Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU, + Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU, + Activation::RELU, Activation::SOFT_RELU, Activation::SQRT, + Activation::SQUARE, Activation::TANH}; // Preconditions - auto empty_prec = [](INode &) - { - return true; - }; - auto cl_target_prec = [](INode & n) - { - return n.assigned_target() == Target::CL; - }; - auto qs8_prec = [&g](INode & n) + auto empty_prec = [](INode &) { return true; }; + auto cl_target_prec = [](INode &n) { return n.assigned_target() == Target::CL; }; + auto qs8_prec = [&g](INode &n) { ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr); const auto output_edge_id = *n.output_edges().begin(); const auto output_edge = g.edge(output_edge_id); // To perform fusion the two nodes must have same output quantization information - const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info; + const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info; const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8; return (output_qasymm8 && same_qinfo) || !output_qasymm8; }; // Fusion mutations - detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations_conv); - detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations_conv); - detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations_conv); - detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations_conv); - detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations_eltwise); - detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization); - detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); + + detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec, + detail::fuse_pad_with_convolution<ConvolutionLayerNode>); + detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>( + g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>); + detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations); + detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations); + detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>( + g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations); + detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>( + g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations); + detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>( + g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations); + // The fusion of BatchNormalizationLayer must occur after the fusion of ActivationLayer. Because FusedConvolutionBatchNormalizationNode assumes the BatchNormalization is already fused with activation, if any + detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>( + g, empty_prec, detail::fuse_convolution_with_batch_normalization); + detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>( + g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } } // namespace graph } // namespace arm_compute diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp index 3ba73071ed..533f8944cf 100644 --- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp +++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,15 +23,15 @@ */ #include "arm_compute/graph/mutators/SplitLayerSubTensorMutator.h" -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/Logger.h" #include "arm_compute/graph/nodes/SplitLayerNode.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/Iterable.h" +#include "support/Cast.h" +#include "support/Iterable.h" namespace arm_compute { @@ -50,7 +50,7 @@ IGraphMutator::MutationType SplitLayerSubTensorMutator::type() const void SplitLayerSubTensorMutator::mutate(Graph &g) { // Early exit if no Split layers exist in graph - if(g.nodes(NodeType::SplitLayer).empty()) + if (g.nodes(NodeType::SplitLayer).empty()) { return; } @@ -59,43 +59,46 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) std::vector<NodeID> topological_sorted_node_ids = dfs(g); // Should be in reverse order of execution - for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) + for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { INode *node = g.node(node_id); - if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) + if (node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) { // Get output tensor Tensor *input_tensor = node->input(0); // Check that all tensor have the same target and are valid bool is_valid = std::all_of(node->outputs().cbegin(), node->outputs().cend(), - [&](const TensorID & tid) - { - return (g.tensor(tid) != nullptr) && (g.tensor(tid)->desc().target == input_tensor->desc().target); - }); + [&](const TensorID &tid) { + return (g.tensor(tid) != nullptr) && + (g.tensor(tid)->desc().target == input_tensor->desc().target); + }); // Create subtensors - if(is_valid && is_target_supported(input_tensor->desc().target)) + if (is_valid && is_target_supported(input_tensor->desc().target)) { ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : " << node->id() << " and name : " << node->name() << std::endl); auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node); - const unsigned int axis = split_node->axis(); + const int axis = split_node->axis(); const unsigned int num_splits = split_node->num_splits(); const bool extend_parent = (axis < 2); // Create sub-tensor handles - for(unsigned int i = 0; i < node->outputs().size(); ++i) + for (unsigned int i = 0; i < node->outputs().size(); ++i) { Tensor *output_tensor = node->output(i); const TensorShape output_shape = output_tensor->desc().shape; Coordinates coords; - std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); + std::tie(std::ignore, coords) = + split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); - backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target); - std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent); + backends::IDeviceBackend &backend = + backends::BackendRegistry::get().get_backend(output_tensor->desc().target); + std::unique_ptr<ITensorHandle> handle = + backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent); output_tensor->set_handle(std::move(handle)); } } diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp index 0a9f5058dd..3dc2480e85 100644 --- a/src/graph/mutators/SyntheticDataTypeMutator.cpp +++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,10 +26,10 @@ #include "arm_compute/graph/GraphBuilder.h" #include "arm_compute/graph/ITensorAccessor.h" #include "arm_compute/graph/Logger.h" -#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/nodes/Nodes.h" +#include "arm_compute/graph/Utils.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" #include <set> @@ -62,14 +62,12 @@ public: */ bool is_mutation_supported(Graph &g) { - const std::set<NodeType> unsupported_node_types = { NodeType::DetectionOutputLayer, - NodeType::NormalizationLayer, - NodeType::PriorBoxLayer - }; + const std::set<NodeType> unsupported_node_types = {NodeType::DetectionOutputLayer, NodeType::NormalizationLayer, + NodeType::PriorBoxLayer}; - for(const auto &utype : unsupported_node_types) + for (const auto &utype : unsupported_node_types) { - if(!g.nodes(utype).empty()) + if (!g.nodes(utype).empty()) { return false; } @@ -83,12 +81,12 @@ bool is_mutation_supported(Graph &g) */ void remove_optimized_nodes(Graph &g) { - const std::set<NodeType> optimized_node_types = { NodeType::BatchNormalizationLayer }; + const std::set<NodeType> optimized_node_types = {NodeType::BatchNormalizationLayer}; - for(const auto &opt_type : optimized_node_types) + for (const auto &opt_type : optimized_node_types) { const std::vector<NodeID> opt_nodes_ids = g.nodes(opt_type); - for(const auto &node_id : opt_nodes_ids) + for (const auto &node_id : opt_nodes_ids) { INode *node = g.node(node_id); @@ -108,7 +106,7 @@ void remove_optimized_nodes(Graph &g) g.remove_node(node->id()); // Update connections - for(auto &driving_node : driving_nodes) + for (auto &driving_node : driving_nodes) { g.add_connection(producer->id(), producer_edge_id, driving_node.node_id, driving_node.index); } @@ -120,15 +118,28 @@ void remove_optimized_nodes(Graph &g) * * @param[in,out] g Graph to convert tensors of. */ -void convert_tensors(Graph &g) +void convert_tensors(Graph &g, DataType data_type) { auto &tensors = g.tensors(); - for(auto &tensor : tensors) + for (auto &tensor : tensors) { - if(tensor != nullptr) + if (tensor != nullptr) { - tensor->desc().data_type = DataType::QASYMM8; - tensor->desc().quant_info = QuantizationInfo(0.125f, -10); + switch (data_type) + { + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + { + tensor->desc().quant_info = QuantizationInfo(0.125f, -10); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported mutation type"); + break; + } + } + tensor->desc().data_type = data_type; } } } @@ -143,7 +154,7 @@ template <typename NT> void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const &f) { const std::vector<NodeID> nodes_ids = g.nodes(NT::node_type); - for(const auto &nodes_id : nodes_ids) + for (const auto &nodes_id : nodes_ids) { INode *node = arm_compute::utils::cast::polymorphic_downcast<NT *>(g.node(nodes_id)); ARM_COMPUTE_ERROR_ON(node == nullptr); @@ -161,23 +172,44 @@ void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const */ void convert_special_tensors(Graph &g) { - auto softmax_func = [](INode * node, Tensor * tensor) + auto softmax_func = [](INode *node, Tensor *tensor) { ARM_COMPUTE_UNUSED(node); - tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); + if (tensor->desc().data_type == DataType::QASYMM8) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); + } + else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128); + } return true; }; - auto act_func = [](INode * node, Tensor * tensor) + auto act_func = [](INode *node, Tensor *tensor) { auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(node); - if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) + if (tensor->desc().data_type == DataType::QASYMM8) { - tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128); + if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128); + } + else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); + } } - else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED) { - tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0); + if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 0); + } + else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128); + } } return true; }; @@ -194,22 +226,19 @@ void convert_special_tensors(Graph &g) */ void handle_nodes_with_bias(Graph &g) { - const std::set<NodeType> special_node_types = { NodeType::ConvolutionLayer, - NodeType::DeconvolutionLayer, - NodeType::DepthwiseConvolutionLayer, - NodeType::FullyConnectedLayer - }; + const std::set<NodeType> special_node_types = {NodeType::ConvolutionLayer, NodeType::DeconvolutionLayer, + NodeType::DepthwiseConvolutionLayer, NodeType::FullyConnectedLayer}; - for(const auto &spc_type : special_node_types) + for (const auto &spc_type : special_node_types) { const std::vector<NodeID> scp_nodes_ids = g.nodes(spc_type); - for(const auto &node_id : scp_nodes_ids) + for (const auto &node_id : scp_nodes_ids) { INode *node = g.node(node_id); - if(node != nullptr) + if (node != nullptr) { Tensor *tensor = node->input(2); - if(tensor != nullptr) + if (tensor != nullptr) { tensor->desc().data_type = DataType::S32; } @@ -219,10 +248,10 @@ void handle_nodes_with_bias(Graph &g) params.name = params.name.empty() ? "" : params.name + "Bias"; TensorDescriptor b_desc = node->input(1)->desc(); - auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)]; - b_desc.shape = TensorShape(depth); + auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)]; + b_desc.shape = TensorShape(depth); - auto accessor = support::cpp14::make_unique<EmptyAccessor>(); + auto accessor = std::make_unique<EmptyAccessor>(); auto b_nid = GraphBuilder::add_const_node(g, params, b_desc, std::move(accessor)); g.add_connection(b_nid, 0, node_id, 2); } @@ -232,6 +261,10 @@ void handle_nodes_with_bias(Graph &g) } } // namespace +SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type) : _mutate_type{mutate_type} +{ +} + const char *SyntheticDataTypeMutator::name() { return "SyntheticDataTypeMutator"; @@ -244,13 +277,13 @@ IGraphMutator::MutationType SyntheticDataTypeMutator::type() const void SyntheticDataTypeMutator::mutate(Graph &g) { - if(is_mutation_supported(g)) + if (is_mutation_supported(g)) { // Remove nodes that get optimized out (e.g. BatchNorm) remove_optimized_nodes(g); // Convert tensor - convert_tensors(g); + convert_tensors(g, _mutate_type); convert_special_tensors(g); // Handle special nodes |