aboutsummaryrefslogtreecommitdiff
path: root/src/graph/mutators/InPlaceOperationMutator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/mutators/InPlaceOperationMutator.cpp')
-rw-r--r--src/graph/mutators/InPlaceOperationMutator.cpp227
1 files changed, 187 insertions, 40 deletions
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index 61639a8f6f..a51dcc4f42 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,16 @@
*/
#include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h"
+#include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h"
+
+#include "support/Cast.h"
+
+using namespace arm_compute::utils::cast;
namespace arm_compute
{
@@ -41,7 +49,7 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
const auto input_tensor = input_edge->tensor();
const auto input_edge_id = input_edge->id();
- if(parent_node == nullptr)
+ if (parent_node == nullptr)
{
return false;
}
@@ -50,24 +58,155 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
// If the output is connected to only one edge, then computations can
// be done in-place.
- if(output_edges.size() == 1)
+ if (output_edges.size() == 1)
{
return true;
}
- return std::all_of(output_edges.begin(),
- output_edges.end(),
- [&](const EdgeID & edge_id)
+ return std::all_of(output_edges.begin(), output_edges.end(),
+ [&](const EdgeID &edge_id)
+ {
+ // Skip check on current input edge
+ if (edge_id == input_edge_id)
+ {
+ return true;
+ }
+
+ auto edge = g.edge(edge_id);
+ return edge->tensor() != input_tensor;
+ });
+}
+
+// If do in-place calculation, then need to use the new output and inherit original output's accessor
+void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output)
+{
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : "
+ << node->id() << " and name : " << node->name() << std::endl);
+ // Update accessor
+ new_output->set_accessor(orig_output->extract_accessor());
+ // Update output
+ node->set_output_tensor(new_output->id(), 0);
+}
+
+// Try to mutate the node to perform the depthwise in-place calculation
+void try_in_place_depthwiseconv(std::unique_ptr<INode> &node)
+{
+ // Get input edge
+ Edge *input_edge = node->input_edge(0);
+ Edge *weight_edge = node->input_edge(1);
+ ARM_COMPUTE_ERROR_ON(input_edge == nullptr || weight_edge == nullptr);
+
+ auto input_tensor = input_edge->tensor();
+ auto weight_tensor = weight_edge->tensor();
+ ARM_COMPUTE_ERROR_ON(input_tensor == nullptr || weight_tensor == nullptr);
+
+ const auto input_shape = input_tensor->desc().shape;
+ const auto qinfo_input = input_tensor->desc().quant_info;
+
+ const auto weight_shape = weight_tensor->desc().shape;
+ const auto weight_layout = weight_tensor->desc().layout;
+
+ // Extract PadStrideInfo and depth multiplier
+ PadStrideInfo conv_info{};
+ unsigned int depth_multiplier{};
+ if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer)
{
- // Skip check on current input edge
- if(edge_id == input_edge_id)
- {
- return true;
- }
+ conv_info =
+ polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info();
+ depth_multiplier =
+ polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier();
+ }
+ else if (node->type() == NodeType::DepthwiseConvolutionLayer)
+ {
+ conv_info = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info();
+ depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier();
+ }
+
+ // Get current output tensor
+ auto current_output_tensor = node->output(0);
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+ const auto out_shape = current_output_tensor->desc().shape;
+ const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+ bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) &&
+ (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr);
- auto edge = g.edge(edge_id);
- return edge->tensor() != input_tensor;
- });
+ // Specify conditions with which input can be in-placed
+ input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC;
+
+ const int weights_width_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::WIDTH);
+ const int weights_height_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::HEIGHT);
+ const bool is_1x1 = weight_shape[weights_width_idx] == 1U && weight_shape[weights_height_idx] == 1U;
+ input_can_in_place &= is_1x1;
+
+ input_can_in_place &= depth_multiplier == 1;
+ input_can_in_place &= conv_info.stride() == std::make_pair(1U, 1U);
+ input_can_in_place &= !conv_info.has_padding();
+ // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node
+
+ if (input_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+ "or the quantization info are different.\n");
+ }
+}
+
+// Try to mutate the node to perform the elementwise in-place calculation
+void try_in_place_elementwise(std::unique_ptr<INode> &node)
+{
+ // Get input edge
+ Edge *input0_edge = node->input_edge(0);
+ Edge *input1_edge = node->input_edge(1);
+ ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr);
+
+ auto input0_tensor = input0_edge->tensor();
+ auto input1_tensor = input1_edge->tensor();
+ ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr);
+
+ const auto shape0 = input0_tensor->desc().shape;
+ const auto shape1 = input1_tensor->desc().shape;
+ const auto qinfo0 = input0_tensor->desc().quant_info;
+ const auto qinfo1 = input1_tensor->desc().quant_info;
+
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ // Inputs are not broadcast compatible
+ if (out_shape.total_size() == 0)
+ {
+ return;
+ }
+
+ // Get current output tensor
+ auto current_output_tensor = node->output(0);
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+ const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+ // Can do in place, if the input has same shape as output, has same quntisation info as output, has same data type as output and input doesn't have accessor.
+ bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
+ (qinfo0 == qinfo_out) &&
+ (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+ (input0_tensor->accessor() == nullptr);
+ bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
+ (qinfo1 == qinfo_out) &&
+ (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+ (input1_tensor->accessor() == nullptr);
+
+ if (input0_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor);
+ }
+ else if (input1_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+ "or the quantization info are different.\n");
+ }
}
} // namespace
@@ -83,45 +222,53 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const
void InPlaceOperationMutator::mutate(Graph &g)
{
- std::set<NodeType> in_place_nodes =
- {
- NodeType::ActivationLayer,
- NodeType::BatchNormalizationLayer,
- NodeType::EltwiseLayer,
- NodeType::UnaryEltwiseLayer,
- NodeType::PrintLayer
- };
+ std::set<NodeType> in_place_nodes = {NodeType::ActivationLayer,
+ NodeType::BatchNormalizationLayer,
+ NodeType::EltwiseLayer,
+ NodeType::UnaryEltwiseLayer,
+ NodeType::DepthwiseConvolutionLayer,
+ NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer,
+ NodeType::PrintLayer};
// Not interested in the order of nodes
- for(auto &node : g.nodes())
+ for (auto &node : g.nodes())
{
- if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
+ if (node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
{
// Get input edge
Edge *input_edge = node->input_edge(0);
// Check if parent has a single output if yes then force in place calculation else not
- if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
+ if ((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
{
- // Get current and new output tensors
- auto current_output_tensor = node->output(0);
- auto new_output_tensor = input_edge->tensor();
-
- ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
-
- // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
- if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+ if (node->type() == NodeType::EltwiseLayer)
+ {
+ try_in_place_elementwise(node);
+ }
+ else if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer ||
+ node->type() == NodeType::DepthwiseConvolutionLayer)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+ try_in_place_depthwiseconv(node);
}
else
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
- << node->id() << " and name : " << node->name() << std::endl);
- // Update accessor
- new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
- // Update output
- node->set_output_tensor(new_output_tensor->id(), 0);
+ // Get current and new output tensors
+ auto current_output_tensor = node->output(0);
+ auto new_output_tensor = input_edge->tensor();
+
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+ // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
+ if (new_output_tensor->accessor() != nullptr ||
+ current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to "
+ "the input tensor or the quantization info are different.\n");
+ }
+ else
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor);
+ }
}
}
}