aboutsummaryrefslogtreecommitdiff
path: root/src/graph/mutators
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/mutators')
-rw-r--r--src/graph/mutators/DepthConcatSubTensorMutator.cpp45
-rw-r--r--src/graph/mutators/GroupedConvolutionMutator.cpp81
-rw-r--r--src/graph/mutators/InPlaceOperationMutator.cpp235
-rw-r--r--src/graph/mutators/MutatorUtils.cpp52
-rw-r--r--src/graph/mutators/MutatorUtils.h42
-rw-r--r--src/graph/mutators/NodeExecutionMethodMutator.cpp46
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp304
-rw-r--r--src/graph/mutators/SplitLayerSubTensorMutator.cpp41
-rw-r--r--src/graph/mutators/SyntheticDataTypeMutator.cpp113
9 files changed, 698 insertions, 261 deletions
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index 30d6700446..1b7ee3c4a4 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,15 +23,15 @@
*/
#include "arm_compute/graph/mutators/DepthConcatSubTensorMutator.h"
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/Iterable.h"
+#include "support/Cast.h"
+#include "support/Iterable.h"
namespace arm_compute
{
@@ -50,7 +50,7 @@ IGraphMutator::MutationType DepthConcatSubTensorMutator::type() const
void DepthConcatSubTensorMutator::mutate(Graph &g)
{
// Early exit if no Concatenation layers exist in graph
- if(g.nodes(NodeType::ConcatenateLayer).empty())
+ if (g.nodes(NodeType::ConcatenateLayer).empty())
{
return;
}
@@ -59,43 +59,48 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
std::vector<NodeID> topological_sorted_node_ids = dfs(g);
// Should be in reverse order of execution
- for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+ for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
{
INode *node = g.node(node_id);
- if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
+ if (node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
{
// Get output tensor
auto output_tensor = node->output(0);
// Check concatenation axis (Sub-tensor optimization is supported for concatenation axis >=2)
auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
- if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2)
+ if (output_tensor == nullptr ||
+ get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2)
{
continue;
}
// Check that all tensor have the same target, valid inputs and same quantization info
- bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
- [&](const EdgeID & eid)
- {
- return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target)
- && (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info);
- });
+ bool is_valid =
+ std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
+ [&](const EdgeID &eid)
+ {
+ return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) &&
+ (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) &&
+ (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info);
+ });
// Create subtensors
- if(is_valid && is_target_supported(output_tensor->desc().target))
+ if (is_valid && is_target_supported(output_tensor->desc().target))
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
// Create sub-tensor handles
unsigned depth = 0;
- for(unsigned int i = 0; i < node->input_edges().size(); ++i)
+ for (unsigned int i = 0; i < node->input_edges().size(); ++i)
{
auto input_tensor = node->input(i);
const auto input_shape = input_tensor->desc().shape;
- backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
- std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
+ backends::IDeviceBackend &backend =
+ backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
+ std::unique_ptr<ITensorHandle> handle =
+ backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
input_tensor->set_handle(std::move(handle));
depth += input_shape.z();
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
index f8494a872f..31efba6bb1 100644
--- a/src/graph/mutators/GroupedConvolutionMutator.cpp
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,15 +23,14 @@
*/
#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/GraphBuilder.h"
#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-
+#include "support/Cast.h"
#include "support/StringSupport.h"
#include <set>
@@ -42,43 +41,51 @@ namespace graph
{
namespace
{
-NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
- PadStrideInfo conv_info, ConvolutionMethod method, ActivationLayerInfo fused_act, FastMathHint fast_math_hint, unsigned int num_groups)
+NodeID create_grouped_convolution(Graph &g,
+ const NodeParams &params,
+ NodeIdxPair input,
+ NodeID weights,
+ NodeID bias,
+ PadStrideInfo conv_info,
+ ConvolutionMethod method,
+ ActivationLayerInfo fused_act,
+ FastMathHint fast_math_hint,
+ unsigned int num_groups)
{
bool has_bias = (bias != EmptyNodeID);
// Split input
const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
- const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL);
- NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+ const unsigned int input_idx = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL);
+ NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
// Split weights
const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
- const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES);
- NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+ const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES);
+ NodeID weights_split = GraphBuilder::add_split_node(g, params, {weights, 0}, num_groups, batch_idx);
// Split bias
NodeID bias_split = EmptyNodeID;
- if(has_bias)
+ if (has_bias)
{
// Split bias
- bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+ bias_split = GraphBuilder::add_split_node(g, params, {bias, 0}, num_groups, 0);
}
std::vector<NodeIdxPair> convolution_outputs;
- for(unsigned int i = 0; i < num_groups; ++i)
+ for (unsigned int i = 0; i < num_groups; ++i)
{
NodeParams group_params = params;
NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
g.add_connection(input_split, i, conv_nid, 0);
g.add_connection(weights_split, i, conv_nid, 1);
- if(has_bias)
+ if (has_bias)
{
g.add_connection(bias_split, i, conv_nid, 2);
}
// Add group name
- if(!group_params.name.empty())
+ if (!group_params.name.empty())
{
group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
}
@@ -92,7 +99,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPai
auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
conv_node->set_fused_activation(fused_act);
- convolution_outputs.push_back({ conv_nid, 0 });
+ convolution_outputs.push_back({conv_nid, 0});
}
// Depth concatenate output
@@ -113,7 +120,7 @@ IGraphMutator::MutationType GroupedConvolutionMutator::type() const
void GroupedConvolutionMutator::mutate(Graph &g)
{
// Early exit if no Convolution layers exist in graph
- if(g.nodes(NodeType::ConvolutionLayer).empty())
+ if (g.nodes(NodeType::ConvolutionLayer).empty())
{
return;
}
@@ -122,17 +129,18 @@ void GroupedConvolutionMutator::mutate(Graph &g)
size_t total_nodes = g.nodes().size();
// Iterate over convolution nodes
- for(unsigned int i = 0; i < total_nodes; ++i)
+ for (unsigned int i = 0; i < total_nodes; ++i)
{
INode *node = g.node(i);
- if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+ if (node != nullptr && node->type() == NodeType::ConvolutionLayer &&
+ arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
{
// Validate node
backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
Status status = backend.validate_node(*node);
// If grouped convolution is not supported
- if(!bool(status))
+ if (!bool(status))
{
// Down-cast node
auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
@@ -151,7 +159,8 @@ void GroupedConvolutionMutator::mutate(Graph &g)
ARM_COMPUTE_ERROR_ON(conv_node->input_edge(0) == nullptr || conv_node->input_edge(1) == nullptr);
const NodeID input_id = conv_node->input_edge(0)->producer()->id();
const NodeID weights_id = conv_node->input_edge(1)->producer()->id();
- const NodeID bias_id = (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID;
+ const NodeID bias_id =
+ (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID;
// Get driving nodes
std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
@@ -164,14 +173,15 @@ void GroupedConvolutionMutator::mutate(Graph &g)
NodeID latest_nid = g.nodes().size();
// Create grouped convolution node
- NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
- conv_info, conv_method, fused_act_info, fast_math_hint, num_groups);
+ NodeID grouped_conv_id =
+ create_grouped_convolution(g, params, {input_id, 0}, weights_id, bias_id, conv_info, conv_method,
+ fused_act_info, fast_math_hint, num_groups);
// Remove convolution node
g.remove_node(node->id());
// Update batch normalization node outputs
- for(auto &driving_node : driving_nodes)
+ for (auto &driving_node : driving_nodes)
{
g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
}
@@ -180,17 +190,16 @@ void GroupedConvolutionMutator::mutate(Graph &g)
g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
// Configure new tensors and nodes
- std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
- {
- configure_tensor(t.get());
- });
- std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
- {
- if(n != nullptr)
- {
- n->set_assigned_target(assigned_target);
- }
- });
+ std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(),
+ [](std::unique_ptr<Tensor> &t) { configure_tensor(t.get()); });
+ std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(),
+ [&assigned_target](std::unique_ptr<INode> &n)
+ {
+ if (n != nullptr)
+ {
+ n->set_assigned_target(assigned_target);
+ }
+ });
}
}
}
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index 3b06537cd9..a51dcc4f42 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,13 +23,193 @@
*/
#include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h"
+#include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h"
+
+#include "support/Cast.h"
+
+using namespace arm_compute::utils::cast;
namespace arm_compute
{
namespace graph
{
+namespace
+{
+// Check if the output edges of the parent node are separate tensors. If not,
+// it means the same output is connected to multiple nodes and computations on
+// these nodes cannot be done in-place.
+bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
+{
+ const auto parent_node = input_edge->producer();
+ const auto input_tensor = input_edge->tensor();
+ const auto input_edge_id = input_edge->id();
+
+ if (parent_node == nullptr)
+ {
+ return false;
+ }
+
+ const auto output_edges = parent_node->output_edges();
+
+ // If the output is connected to only one edge, then computations can
+ // be done in-place.
+ if (output_edges.size() == 1)
+ {
+ return true;
+ }
+
+ return std::all_of(output_edges.begin(), output_edges.end(),
+ [&](const EdgeID &edge_id)
+ {
+ // Skip check on current input edge
+ if (edge_id == input_edge_id)
+ {
+ return true;
+ }
+
+ auto edge = g.edge(edge_id);
+ return edge->tensor() != input_tensor;
+ });
+}
+
+// If do in-place calculation, then need to use the new output and inherit original output's accessor
+void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output)
+{
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : "
+ << node->id() << " and name : " << node->name() << std::endl);
+ // Update accessor
+ new_output->set_accessor(orig_output->extract_accessor());
+ // Update output
+ node->set_output_tensor(new_output->id(), 0);
+}
+
+// Try to mutate the node to perform the depthwise in-place calculation
+void try_in_place_depthwiseconv(std::unique_ptr<INode> &node)
+{
+ // Get input edge
+ Edge *input_edge = node->input_edge(0);
+ Edge *weight_edge = node->input_edge(1);
+ ARM_COMPUTE_ERROR_ON(input_edge == nullptr || weight_edge == nullptr);
+
+ auto input_tensor = input_edge->tensor();
+ auto weight_tensor = weight_edge->tensor();
+ ARM_COMPUTE_ERROR_ON(input_tensor == nullptr || weight_tensor == nullptr);
+
+ const auto input_shape = input_tensor->desc().shape;
+ const auto qinfo_input = input_tensor->desc().quant_info;
+
+ const auto weight_shape = weight_tensor->desc().shape;
+ const auto weight_layout = weight_tensor->desc().layout;
+
+ // Extract PadStrideInfo and depth multiplier
+ PadStrideInfo conv_info{};
+ unsigned int depth_multiplier{};
+ if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer)
+ {
+ conv_info =
+ polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info();
+ depth_multiplier =
+ polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier();
+ }
+ else if (node->type() == NodeType::DepthwiseConvolutionLayer)
+ {
+ conv_info = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info();
+ depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier();
+ }
+
+ // Get current output tensor
+ auto current_output_tensor = node->output(0);
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+ const auto out_shape = current_output_tensor->desc().shape;
+ const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+ bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) &&
+ (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr);
+
+ // Specify conditions with which input can be in-placed
+ input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC;
+
+ const int weights_width_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::WIDTH);
+ const int weights_height_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::HEIGHT);
+ const bool is_1x1 = weight_shape[weights_width_idx] == 1U && weight_shape[weights_height_idx] == 1U;
+ input_can_in_place &= is_1x1;
+
+ input_can_in_place &= depth_multiplier == 1;
+ input_can_in_place &= conv_info.stride() == std::make_pair(1U, 1U);
+ input_can_in_place &= !conv_info.has_padding();
+ // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node
+
+ if (input_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+ "or the quantization info are different.\n");
+ }
+}
+
+// Try to mutate the node to perform the elementwise in-place calculation
+void try_in_place_elementwise(std::unique_ptr<INode> &node)
+{
+ // Get input edge
+ Edge *input0_edge = node->input_edge(0);
+ Edge *input1_edge = node->input_edge(1);
+ ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr);
+
+ auto input0_tensor = input0_edge->tensor();
+ auto input1_tensor = input1_edge->tensor();
+ ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr);
+
+ const auto shape0 = input0_tensor->desc().shape;
+ const auto shape1 = input1_tensor->desc().shape;
+ const auto qinfo0 = input0_tensor->desc().quant_info;
+ const auto qinfo1 = input1_tensor->desc().quant_info;
+
+ const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+ // Inputs are not broadcast compatible
+ if (out_shape.total_size() == 0)
+ {
+ return;
+ }
+
+ // Get current output tensor
+ auto current_output_tensor = node->output(0);
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+ const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+ // Can do in place, if the input has same shape as output, has same quntisation info as output, has same data type as output and input doesn't have accessor.
+ bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
+ (qinfo0 == qinfo_out) &&
+ (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+ (input0_tensor->accessor() == nullptr);
+ bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
+ (qinfo1 == qinfo_out) &&
+ (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+ (input1_tensor->accessor() == nullptr);
+
+ if (input0_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor);
+ }
+ else if (input1_can_in_place)
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+ "or the quantization info are different.\n");
+ }
+}
+} // namespace
+
const char *InPlaceOperationMutator::name()
{
return "InPlaceOperationMutator";
@@ -42,38 +222,53 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const
void InPlaceOperationMutator::mutate(Graph &g)
{
- std::set<NodeType> in_place_nodes = { NodeType::BatchNormalizationLayer, NodeType::ActivationLayer, NodeType::PrintLayer };
+ std::set<NodeType> in_place_nodes = {NodeType::ActivationLayer,
+ NodeType::BatchNormalizationLayer,
+ NodeType::EltwiseLayer,
+ NodeType::UnaryEltwiseLayer,
+ NodeType::DepthwiseConvolutionLayer,
+ NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer,
+ NodeType::PrintLayer};
// Not interested in the order of nodes
- for(auto &node : g.nodes())
+ for (auto &node : g.nodes())
{
- if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
+ if (node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
{
// Get input edge
Edge *input_edge = node->input_edge(0);
// Check if parent has a single output if yes then force in place calculation else not
- if((input_edge != nullptr) && (input_edge->producer() != nullptr) && (input_edge->producer()->output_edges().size() == 1))
+ if ((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
{
- // Get current and new output tensors
- auto current_output_tensor = node->output(0);
- auto new_output_tensor = input_edge->tensor();
-
- ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
-
- // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
- if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+ if (node->type() == NodeType::EltwiseLayer)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+ try_in_place_elementwise(node);
+ }
+ else if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer ||
+ node->type() == NodeType::DepthwiseConvolutionLayer)
+ {
+ try_in_place_depthwiseconv(node);
}
else
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
- << node->id() << " and name : " << node->name() << std::endl);
- // Update accessor
- new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
- // Update output
- node->set_output_tensor(new_output_tensor->id(), 0);
+ // Get current and new output tensors
+ auto current_output_tensor = node->output(0);
+ auto new_output_tensor = input_edge->tensor();
+
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+ // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
+ if (new_output_tensor->accessor() != nullptr ||
+ current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to "
+ "the input tensor or the quantization info are different.\n");
+ }
+ else
+ {
+ set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor);
+ }
}
}
}
diff --git a/src/graph/mutators/MutatorUtils.cpp b/src/graph/mutators/MutatorUtils.cpp
new file mode 100644
index 0000000000..f47240eadd
--- /dev/null
+++ b/src/graph/mutators/MutatorUtils.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/graph/mutators/MutatorUtils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list)
+{
+ if (layout == DataLayout::NCHW || layout == DataLayout::NHWC)
+ {
+ const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);
+ const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH);
+
+ for (unsigned int i = 0; i < padding_list.size(); ++i)
+ {
+ if (i != height_index && i != width_index && padding_list[i] != PaddingInfo(0, 0))
+ {
+ // if the index is not either height or width, don't fuse
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/MutatorUtils.h b/src/graph/mutators/MutatorUtils.h
new file mode 100644
index 0000000000..170d892c93
--- /dev/null
+++ b/src/graph/mutators/MutatorUtils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H
+#define ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H
+
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Check if padding is in height and/or width dimensions
+ *
+ * @param[in] layout Data layout of the tensor
+ * @param[in] padding_list List of padding pairs
+ */
+bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list);
+} // namespace graph
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H */ \ No newline at end of file
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
index 72e2645dd2..588befecae 100644
--- a/src/graph/mutators/NodeExecutionMethodMutator.cpp
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,13 +23,13 @@
*/
#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
namespace arm_compute
{
@@ -49,17 +49,17 @@ template <typename Setter>
void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
{
const std::vector<NodeID> &node_ids = g.nodes(node_type);
- for(auto &node_id : node_ids)
+ for (auto &node_id : node_ids)
{
INode *node = g.node(node_id);
- if(node != nullptr)
+ if (node != nullptr)
{
// Validate node
backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
Status status = backend.validate_node(*node);
// Set default execution method in case of failure
- if(!bool(status))
+ if (!bool(status))
{
setter(node);
}
@@ -81,22 +81,26 @@ IGraphMutator::MutationType NodeExecutionMethodMutator::type() const
void NodeExecutionMethodMutator::mutate(Graph &g)
{
// Convolution Layer
- set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
- {
- ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
- << n->id() << " and Name: " << n->name() << std::endl);
- auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
- casted_node->set_convolution_method(ConvolutionMethod::Default);
- });
+ set_default_on_invalid_method(g, NodeType::ConvolutionLayer,
+ [](INode *n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node =
+ arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+ casted_node->set_convolution_method(ConvolutionMethod::Default);
+ });
// Depthwise Convolution Layer
- set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
- {
- ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
- << n->id() << " and Name: " << n->name() << std::endl);
- auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
- casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
- });
+ set_default_on_invalid_method(
+ g, NodeType::DepthwiseConvolutionLayer,
+ [](INode *n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+ casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+ });
}
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index ae53b8ff75..998a4a05c7 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,15 +23,18 @@
*/
#include "arm_compute/graph/mutators/NodeFusionMutator.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/GraphBuilder.h"
#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h"
#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "src/graph/mutators/MutatorUtils.h"
+#include "support/Cast.h"
+#include <list>
#include <set>
namespace arm_compute
@@ -40,24 +43,60 @@ namespace graph
{
namespace detail
{
+void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode *old_node, bool add_output_tensor)
+{
+ if (new_node == nullptr || old_node == nullptr)
+ {
+ return;
+ }
+
+ // Get driving nodes of last fusable node
+ std::vector<NodeIdxPair> last_driving_nodes = get_driving_nodes(*old_node);
+
+ // Extract last fusable node accessor if any
+ if (old_node->output(0) == nullptr)
+ {
+ return;
+ }
+ auto old_node_accessor = old_node->output(0)->extract_accessor();
+
+ // Remove node
+ g.remove_node(old_node->id());
+
+ // Update fused node outputs
+ for (auto &driving_node : last_driving_nodes)
+ {
+ g.add_connection(new_node->id(), 0, driving_node.node_id, driving_node.index);
+ if (add_output_tensor)
+ {
+ configure_tensor(new_node->output(0));
+ }
+ }
+
+ // Update accessor to fused node
+ new_node->output(0)->set_accessor(std::move(old_node_accessor));
+}
+
void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
{
ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer());
- auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
+ auto *bn_node =
+ arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
// Not fusing if number of groups is greater than 1
- if(conv_node->num_groups() > 1)
+ if (conv_node->num_groups() > 1)
{
return;
}
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << output_edge->producer_id()
- << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : "
+ << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
+ << output_edge->consumer_id() << std::endl);
// Prevent fusion if fused node has an output accessor
- if(conv_node->output(0)->accessor() == nullptr)
+ if (conv_node->output(0)->accessor() == nullptr)
{
const Target assigned_target = conv_node->assigned_target();
@@ -77,9 +116,10 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge
const auto epsilon = bn_node->epsilon();
// Create the fused node
- const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
+ const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(
+ epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
- if(conv_node->input_edge(2) != nullptr)
+ if (conv_node->input_edge(2) != nullptr)
{
auto conv_bias_id = conv_node->input_edge(2)->producer_id();
g.add_connection(conv_bias_id, 0, fused_id, 2);
@@ -91,45 +131,33 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge
g.add_connection(bn_mean_id, 0, fused_id, 3);
g.add_connection(bn_var_id, 0, fused_id, 4);
- if(bn_node->input_edge(3) != nullptr)
+ if (bn_node->input_edge(3) != nullptr)
{
const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
g.add_connection(bn_beta_id, 0, fused_id, 5);
}
- if(bn_node->input_edge(4) != nullptr)
+ if (bn_node->input_edge(4) != nullptr)
{
const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
g.add_connection(bn_gamma_id, 0, fused_id, 6);
}
- auto fused_node = g.node(fused_id);
- std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
+ auto fused_node = g.node(fused_id);
+ auto bn_node_name = bn_node->name();
- // Extract batch normalization node accessor if any
- auto bn_node_accessor = bn_node->output(0)->extract_accessor();
- auto bn_node_name = bn_node->name();
+ transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
- // Remove batch normalization node
- g.remove_node(bn_node->id());
-
- // Get driving nodes of batch normalization node
- for(auto &driving_node : bn_driving_nodes)
- {
- g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
- configure_tensor(fused_node->output(0));
- }
- // Update fused node outputs
- fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
fused_node->set_assigned_target(assigned_target);
- fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + bn_node_name, assigned_target });
+ fused_node->set_common_node_parameters(NodeParams{conv_node->name() + "+" + bn_node_name, assigned_target});
// Remove convolution node
g.remove_node(conv_node->id());
}
else
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE(
+ "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
}
}
@@ -137,14 +165,17 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
{
ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
- auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
- auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
+ auto *depth_conv_node =
+ arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
+ auto *bn_node =
+ arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " << output_edge->producer_id()
- << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : "
+ << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
+ << output_edge->consumer_id() << std::endl);
// Prevent fusion if fused node has an output accessor
- if(depth_conv_node->output(0)->accessor() == nullptr)
+ if (depth_conv_node->output(0)->accessor() == nullptr)
{
const Target assigned_target = depth_conv_node->assigned_target();
@@ -164,9 +195,10 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
const auto epsilon = bn_node->epsilon();
// Create the fused node
- const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
+ const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(
+ epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
- if(depth_conv_node->input_edge(2) != nullptr)
+ if (depth_conv_node->input_edge(2) != nullptr)
{
const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id();
g.add_connection(conv_bias_id, 0, fused_id, 2);
@@ -180,38 +212,29 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
g.add_connection(bn_beta_id, 0, fused_id, 5);
g.add_connection(bn_gamma_id, 0, fused_id, 6);
- auto fused_node = g.node(fused_id);
- std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
-
- // Extract batch normalization node accessor if any
- auto bn_node_accessor = bn_node->output(0)->extract_accessor();
- auto bn_node_name = bn_node->name();
+ auto fused_node = g.node(fused_id);
+ auto bn_node_name = bn_node->name();
- // Remove batch normalization node
- g.remove_node(bn_node->id());
+ transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
- // Get driving nodes of batch normalization node
- for(auto &driving_node : bn_driving_nodes)
- {
- g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
- configure_tensor(fused_node->output(0));
- }
- // Update fused node outputs
- fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
fused_node->set_assigned_target(assigned_target);
- fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target });
+ fused_node->set_common_node_parameters(
+ NodeParams{depth_conv_node->name() + "+" + bn_node_name, assigned_target});
// Remove convolution node
g.remove_node(depth_conv_node->id());
}
else
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n");
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the "
+ "presence of an output accessor\n");
}
}
template <typename N>
-void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations)
+void fuse_node_with_activation(Graph &g,
+ const Edge *output_edge,
+ const std::set<Activation> &supported_fused_activations)
{
ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
@@ -221,64 +244,126 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set
ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
// Check if activation is supported for fusion
- if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
+ if (supported_fused_activations.count(act_node->activation_info().activation()) == 0)
+ {
+ return;
+ }
+
+ // EltwiseLayerNode can only be fused when dataype is float
+ if (n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))
{
return;
}
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()
- << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
+ << " with Activation Layer node with ID : "
+ << output_edge->consumer_id() << std::endl);
// Prevent fusion if fused node has an output accessor
- if(n_node->output(0)->accessor() == nullptr)
+ if (n_node->output(0)->accessor() == nullptr)
{
- // Get driving nodes of activation node
- std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
-
// Set activation info to fused node
n_node->set_fused_activation(act_node->activation_info());
- // Extract activation node accessor if any
- auto act_node_accessor = act_node->output(0)->extract_accessor();
+ transfer_driving_nodes_and_remove_old_node(g, n_node, act_node, false);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE(
+ "Prevented fusion of node with activation due to the presence of an output accessor\n");
+ }
+}
+
+template <typename N>
+void fuse_pad_with_convolution(Graph &g, const Edge *output_edge)
+{
+ auto *pad_node = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->producer());
+ auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer());
+
+ const Edge *input_edge = pad_node->input_edge(0);
+ if (input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr &&
+ pad_node->pad_value().get<float>() == 0.0)
+ {
+ const DataLayout layout = input_edge->tensor()->desc().layout;
+ const PaddingList padding_list = pad_node->padding();
- // Remove activation node
- g.remove_node(act_node->id());
+ const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);
+ const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH);
- // Update fused node outputs
- for(auto &driving_node : act_driving_nodes)
+ const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0);
+ const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0);
+
+ if (is_padding_in_height_or_width(layout, padding_list))
{
- g.add_connection(n_node->id(), 0, driving_node.node_id, driving_node.index);
+ // Add paddings to the convolution node
+ const PadStrideInfo conv_info = conv_node->convolution_info();
+ const PadStrideInfo new_conv_info(conv_info.stride().first, conv_info.stride().second,
+ conv_info.pad_left() + pad_w.first, conv_info.pad_right() + pad_w.second,
+ conv_info.pad_top() + pad_h.first, conv_info.pad_bottom() + pad_h.second,
+ conv_info.round());
+ conv_node->set_convolution_info(new_conv_info);
+
+ // Update drivers of the convolution node
+ std::vector<NodeIdxPair> pad_driver_nodes = get_driver_nodes(*pad_node);
+ g.remove_node(pad_node->id());
+
+ // Update fused node inputs
+ for (auto &driver_node : pad_driver_nodes)
+ {
+ g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);
+ }
}
-
- // Update accessor to fused node
- n_node->output(0)->set_accessor(std::move(act_node_accessor));
- }
- else
- {
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n");
}
}
template <typename N1, typename N2, typename F, typename... Args>
-void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)
+void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)
{
- // Not interested in the order of nodes
- for(auto &node : g.nodes())
+ // Note that fused nodes may be added to the end of the node list.
+ // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
+ // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
+ for (unsigned int i = 0; i < g.nodes().size(); ++i)
{
- // Check if the node is of type N and not a branching node
- if(node && node->type() == N1::node_type && node->output_edges().size() == 1)
+ auto node = g.node(i);
+ // Check if the node is of type N1 and not a branching node
+ if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
{
const auto output_edge_id = *node->output_edges().begin();
const auto output_edge = g.edge(output_edge_id);
- // Check if following node is an activation layer node
- if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
+ // Check if following node is a type N2 node
+ if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) &&
+ (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
{
fuse_fcn(g, output_edge, optional_arguments...);
}
}
}
}
+
+template <typename N1, typename F, typename... Args>
+void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)
+{
+ // Note that fused nodes may be added to the end of the node list.
+ // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
+ // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
+ for (unsigned int i = 0; i < g.nodes().size(); ++i)
+ {
+ auto node = g.node(i);
+ // Check if the node is of type N1 and not a branching node
+ if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
+ {
+ const auto output_edge_id = *node->output_edges().begin();
+ const auto output_edge = g.edge(output_edge_id);
+
+ // Check if it's the correct target
+ if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer()))
+ {
+ fuse_fcn(g, output_edge, i, optional_arguments...);
+ }
+ }
+ }
+}
} // namespace detail
const char *NodeFusionMutator::name()
@@ -294,41 +379,50 @@ IGraphMutator::MutationType NodeFusionMutator::type() const
void NodeFusionMutator::mutate(Graph &g)
{
// Supported activations when fusing
- const std::set<Activation> supported_fused_activations_conv = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
- const std::set<Activation> supported_fused_activations_eltwise = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU,
- Activation::TANH, Activation::LOGISTIC
- };
+ const std::set<Activation> supported_fused_activations = {
+ Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
+ Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,
+ Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,
+ Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,
+ Activation::SQUARE, Activation::TANH};
// Preconditions
- auto empty_prec = [](INode &)
- {
- return true;
- };
- auto cl_target_prec = [](INode & n)
- {
- return n.assigned_target() == Target::CL;
- };
- auto qs8_prec = [&g](INode & n)
+ auto empty_prec = [](INode &) { return true; };
+ auto cl_target_prec = [](INode &n) { return n.assigned_target() == Target::CL; };
+ auto qs8_prec = [&g](INode &n)
{
ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
const auto output_edge_id = *n.output_edges().begin();
const auto output_edge = g.edge(output_edge_id);
// To perform fusion the two nodes must have same output quantization information
- const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
+ const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8;
return (output_qasymm8 && same_qinfo) || !output_qasymm8;
};
// Fusion mutations
- detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations_conv);
- detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations_conv);
- detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations_conv);
- detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations_conv);
- detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations_eltwise);
- detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
- detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
+
+ detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec,
+ detail::fuse_pad_with_convolution<ConvolutionLayerNode>);
+ detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(
+ g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>);
+ detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(
+ g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
+ detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(
+ g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
+ detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(
+ g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
+ detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(
+ g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
+ detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(
+ g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
+ // The fusion of BatchNormalizationLayer must occur after the fusion of ActivationLayer. Because FusedConvolutionBatchNormalizationNode assumes the BatchNormalization is already fused with activation, if any
+ detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(
+ g, empty_prec, detail::fuse_convolution_with_batch_normalization);
+ detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(
+ g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
}
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 3ba73071ed..533f8944cf 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,15 +23,15 @@
*/
#include "arm_compute/graph/mutators/SplitLayerSubTensorMutator.h"
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
#include "arm_compute/graph/nodes/SplitLayerNode.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/core/utils/misc/Iterable.h"
+#include "support/Cast.h"
+#include "support/Iterable.h"
namespace arm_compute
{
@@ -50,7 +50,7 @@ IGraphMutator::MutationType SplitLayerSubTensorMutator::type() const
void SplitLayerSubTensorMutator::mutate(Graph &g)
{
// Early exit if no Split layers exist in graph
- if(g.nodes(NodeType::SplitLayer).empty())
+ if (g.nodes(NodeType::SplitLayer).empty())
{
return;
}
@@ -59,43 +59,46 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
std::vector<NodeID> topological_sorted_node_ids = dfs(g);
// Should be in reverse order of execution
- for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+ for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
{
INode *node = g.node(node_id);
- if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+ if (node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
{
// Get output tensor
Tensor *input_tensor = node->input(0);
// Check that all tensor have the same target and are valid
bool is_valid = std::all_of(node->outputs().cbegin(), node->outputs().cend(),
- [&](const TensorID & tid)
- {
- return (g.tensor(tid) != nullptr) && (g.tensor(tid)->desc().target == input_tensor->desc().target);
- });
+ [&](const TensorID &tid) {
+ return (g.tensor(tid) != nullptr) &&
+ (g.tensor(tid)->desc().target == input_tensor->desc().target);
+ });
// Create subtensors
- if(is_valid && is_target_supported(input_tensor->desc().target))
+ if (is_valid && is_target_supported(input_tensor->desc().target))
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node);
- const unsigned int axis = split_node->axis();
+ const int axis = split_node->axis();
const unsigned int num_splits = split_node->num_splits();
const bool extend_parent = (axis < 2);
// Create sub-tensor handles
- for(unsigned int i = 0; i < node->outputs().size(); ++i)
+ for (unsigned int i = 0; i < node->outputs().size(); ++i)
{
Tensor *output_tensor = node->output(i);
const TensorShape output_shape = output_tensor->desc().shape;
Coordinates coords;
- std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
+ std::tie(std::ignore, coords) =
+ split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
- backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
- std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
+ backends::IDeviceBackend &backend =
+ backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
+ std::unique_ptr<ITensorHandle> handle =
+ backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
output_tensor->set_handle(std::move(handle));
}
}
diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp
index 0a9f5058dd..3dc2480e85 100644
--- a/src/graph/mutators/SyntheticDataTypeMutator.cpp
+++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,10 +26,10 @@
#include "arm_compute/graph/GraphBuilder.h"
#include "arm_compute/graph/ITensorAccessor.h"
#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
-#include "arm_compute/core/utils/misc/Cast.h"
+#include "support/Cast.h"
#include <set>
@@ -62,14 +62,12 @@ public:
*/
bool is_mutation_supported(Graph &g)
{
- const std::set<NodeType> unsupported_node_types = { NodeType::DetectionOutputLayer,
- NodeType::NormalizationLayer,
- NodeType::PriorBoxLayer
- };
+ const std::set<NodeType> unsupported_node_types = {NodeType::DetectionOutputLayer, NodeType::NormalizationLayer,
+ NodeType::PriorBoxLayer};
- for(const auto &utype : unsupported_node_types)
+ for (const auto &utype : unsupported_node_types)
{
- if(!g.nodes(utype).empty())
+ if (!g.nodes(utype).empty())
{
return false;
}
@@ -83,12 +81,12 @@ bool is_mutation_supported(Graph &g)
*/
void remove_optimized_nodes(Graph &g)
{
- const std::set<NodeType> optimized_node_types = { NodeType::BatchNormalizationLayer };
+ const std::set<NodeType> optimized_node_types = {NodeType::BatchNormalizationLayer};
- for(const auto &opt_type : optimized_node_types)
+ for (const auto &opt_type : optimized_node_types)
{
const std::vector<NodeID> opt_nodes_ids = g.nodes(opt_type);
- for(const auto &node_id : opt_nodes_ids)
+ for (const auto &node_id : opt_nodes_ids)
{
INode *node = g.node(node_id);
@@ -108,7 +106,7 @@ void remove_optimized_nodes(Graph &g)
g.remove_node(node->id());
// Update connections
- for(auto &driving_node : driving_nodes)
+ for (auto &driving_node : driving_nodes)
{
g.add_connection(producer->id(), producer_edge_id, driving_node.node_id, driving_node.index);
}
@@ -120,15 +118,28 @@ void remove_optimized_nodes(Graph &g)
*
* @param[in,out] g Graph to convert tensors of.
*/
-void convert_tensors(Graph &g)
+void convert_tensors(Graph &g, DataType data_type)
{
auto &tensors = g.tensors();
- for(auto &tensor : tensors)
+ for (auto &tensor : tensors)
{
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
- tensor->desc().data_type = DataType::QASYMM8;
- tensor->desc().quant_info = QuantizationInfo(0.125f, -10);
+ switch (data_type)
+ {
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ {
+ tensor->desc().quant_info = QuantizationInfo(0.125f, -10);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported mutation type");
+ break;
+ }
+ }
+ tensor->desc().data_type = data_type;
}
}
}
@@ -143,7 +154,7 @@ template <typename NT>
void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const &f)
{
const std::vector<NodeID> nodes_ids = g.nodes(NT::node_type);
- for(const auto &nodes_id : nodes_ids)
+ for (const auto &nodes_id : nodes_ids)
{
INode *node = arm_compute::utils::cast::polymorphic_downcast<NT *>(g.node(nodes_id));
ARM_COMPUTE_ERROR_ON(node == nullptr);
@@ -161,23 +172,44 @@ void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const
*/
void convert_special_tensors(Graph &g)
{
- auto softmax_func = [](INode * node, Tensor * tensor)
+ auto softmax_func = [](INode *node, Tensor *tensor)
{
ARM_COMPUTE_UNUSED(node);
- tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
+ if (tensor->desc().data_type == DataType::QASYMM8)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
+ }
+ else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128);
+ }
return true;
};
- auto act_func = [](INode * node, Tensor * tensor)
+ auto act_func = [](INode *node, Tensor *tensor)
{
auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(node);
- if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
+ if (tensor->desc().data_type == DataType::QASYMM8)
{
- tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128);
+ if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128);
+ }
+ else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
+ }
}
- else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED)
{
- tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
+ if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 0);
+ }
+ else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128);
+ }
}
return true;
};
@@ -194,22 +226,19 @@ void convert_special_tensors(Graph &g)
*/
void handle_nodes_with_bias(Graph &g)
{
- const std::set<NodeType> special_node_types = { NodeType::ConvolutionLayer,
- NodeType::DeconvolutionLayer,
- NodeType::DepthwiseConvolutionLayer,
- NodeType::FullyConnectedLayer
- };
+ const std::set<NodeType> special_node_types = {NodeType::ConvolutionLayer, NodeType::DeconvolutionLayer,
+ NodeType::DepthwiseConvolutionLayer, NodeType::FullyConnectedLayer};
- for(const auto &spc_type : special_node_types)
+ for (const auto &spc_type : special_node_types)
{
const std::vector<NodeID> scp_nodes_ids = g.nodes(spc_type);
- for(const auto &node_id : scp_nodes_ids)
+ for (const auto &node_id : scp_nodes_ids)
{
INode *node = g.node(node_id);
- if(node != nullptr)
+ if (node != nullptr)
{
Tensor *tensor = node->input(2);
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
tensor->desc().data_type = DataType::S32;
}
@@ -219,10 +248,10 @@ void handle_nodes_with_bias(Graph &g)
params.name = params.name.empty() ? "" : params.name + "Bias";
TensorDescriptor b_desc = node->input(1)->desc();
- auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)];
- b_desc.shape = TensorShape(depth);
+ auto depth = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)];
+ b_desc.shape = TensorShape(depth);
- auto accessor = support::cpp14::make_unique<EmptyAccessor>();
+ auto accessor = std::make_unique<EmptyAccessor>();
auto b_nid = GraphBuilder::add_const_node(g, params, b_desc, std::move(accessor));
g.add_connection(b_nid, 0, node_id, 2);
}
@@ -232,6 +261,10 @@ void handle_nodes_with_bias(Graph &g)
}
} // namespace
+SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type) : _mutate_type{mutate_type}
+{
+}
+
const char *SyntheticDataTypeMutator::name()
{
return "SyntheticDataTypeMutator";
@@ -244,13 +277,13 @@ IGraphMutator::MutationType SyntheticDataTypeMutator::type() const
void SyntheticDataTypeMutator::mutate(Graph &g)
{
- if(is_mutation_supported(g))
+ if (is_mutation_supported(g))
{
// Remove nodes that get optimized out (e.g. BatchNorm)
remove_optimized_nodes(g);
// Convert tensor
- convert_tensors(g);
+ convert_tensors(g, _mutate_type);
convert_special_tensors(g);
// Handle special nodes