aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2019-03-14 10:32:11 +0000
committerIsabella Gottardi <isabella.gottardi@arm.com>2019-03-20 11:23:43 +0000
commit0ae5de9124a0094e656244ad2f807c084966fc04 (patch)
treeab698ad9c43f95dda13f78cf76b753105cf69388
parentb0c5037d94ba7073ccabb0ebaff54db320f184c4 (diff)
downloadComputeLibrary-0ae5de9124a0094e656244ad2f807c084966fc04.tar.gz
COMPMID-1995: Prepare Graph to support different input/output quantization info
- Added support for different input/output qinfo in ActivationLayer and DepthwiseConv - Added support for different input/output qinfo in ConcatenateLayer introducing ConcatDescriptor - Added reshape validate - Allow OutputLayer to return a specific connection index from the input - Not run Inplace and Depth mutator when input/output quantization info are different Change-Id: I03f5e416fc43ddd284e1501887202a3145f76d8a Signed-off-by: Isabella Gottardi <isabella.gottardi@arm.com> Reviewed-on: https://review.mlplatform.org/c/852 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/graph/GraphBuilder.h27
-rw-r--r--arm_compute/graph/INode.h3
-rw-r--r--arm_compute/graph/LayerDescriptors.h69
-rw-r--r--arm_compute/graph/backends/FunctionHelpers.h25
-rw-r--r--arm_compute/graph/backends/ValidateHelpers.h24
-rw-r--r--arm_compute/graph/frontend/Layers.h72
-rw-r--r--arm_compute/graph/nodes/ActivationLayerNode.h7
-rw-r--r--arm_compute/graph/nodes/ConcatenateLayerNode.h20
-rw-r--r--arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h7
-rw-r--r--src/graph/GraphBuilder.cpp13
-rw-r--r--src/graph/backends/CL/CLNodeValidator.cpp2
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp6
-rw-r--r--src/graph/backends/NEON/NENodeValidator.cpp2
-rw-r--r--src/graph/mutators/DepthConcatSubTensorMutator.cpp7
-rw-r--r--src/graph/mutators/InPlaceOperationMutator.cpp6
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp11
-rw-r--r--src/graph/nodes/ActivationLayerNode.cpp12
-rw-r--r--src/graph/nodes/ConcatenateLayerNode.cpp17
-rw-r--r--src/graph/nodes/DepthwiseConvolutionLayerNode.cpp15
-rw-r--r--utils/GraphUtils.cpp13
-rw-r--r--utils/GraphUtils.h4
-rw-r--r--utils/Utils.h11
22 files changed, 274 insertions, 99 deletions
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index a2a938b1cc..590e4d9b44 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_GRAPH_GRAPH_BUILDER_H__
#include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph/LayerDescriptors.h"
#include "arm_compute/graph/Types.h"
namespace arm_compute
@@ -73,14 +74,16 @@ public:
static NodeID add_output_node(Graph &g, NodeParams params, NodeIdxPair input, ITensorAccessorUPtr accessor = nullptr);
/** Adds an activation layer node to the graph
*
- * @param[in] g Graph to add the node to
- * @param[in] params Common node parameters
- * @param[in] input Input to the activation layer node as a NodeID-Index pair
- * @param[in] act_info Activation layer information
+ * @param[in] g Graph to add the node to
+ * @param[in] params Common node parameters
+ * @param[in] input Input to the activation layer node as a NodeID-Index pair
+ * @param[in] act_info Activation layer information
+ * @param[in] out_quant_info (Optional) Output quantization info
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
- static NodeID add_activation_node(Graph &g, NodeParams params, NodeIdxPair input, ActivationLayerInfo act_info);
+ static NodeID add_activation_node(Graph &g, NodeParams params, NodeIdxPair input, ActivationLayerInfo act_info,
+ const QuantizationInfo out_quant_info = QuantizationInfo());
/** Adds a batch normalization layer node to the graph
*
* @param[in] g Graph to add the node to
@@ -163,14 +166,14 @@ public:
ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr);
/** Adds a depth concatenate node to the graph
*
- * @param[in] g Graph to add the node to
- * @param[in] params Common node parameters
- * @param[in] inputs Inputs to the depth concatenate layer node as a NodeID-Index pair
- * @param[in] axis Concatenation axis
+ * @param[in] g Graph to add the node to
+ * @param[in] params Common node parameters
+ * @param[in] inputs Inputs to the depth concatenate layer node as a NodeID-Index pair
+ * @param[in] concat_descriptor Concatenation layer descriptor
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
- static NodeID add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis);
+ static NodeID add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, descriptors::ConcatLayerDescriptor concat_descriptor);
/** Adds a depth-wise convolution layer node to the graph
*
* @param[in] g Graph to add the node to
@@ -183,13 +186,15 @@ public:
* @param[in] weights_accessor (Optional) Accessor of the weights node data
* @param[in] bias_accessor (Optional) Accessor of the bias node data
* @param[in] quant_info (Optional) Weights quantization info
+ * @param[in] out_quant_info (Optional) Output quantization info
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, PadStrideInfo conv_info, int depth_multiplier = 1,
DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default,
- ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo());
+ ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo(),
+ const QuantizationInfo out_quant_info = QuantizationInfo());
/** Adds an element-wise layer node to the graph
*
* @param[in] g Graph to add the node to
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 4219150f58..edff8379d3 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_GRAPH_INODE_H__
#include "arm_compute/core/Error.h"
+#include "arm_compute/graph/LayerDescriptors.h"
#include "arm_compute/graph/TensorDescriptor.h"
#include "arm_compute/graph/Types.h"
diff --git a/arm_compute/graph/LayerDescriptors.h b/arm_compute/graph/LayerDescriptors.h
new file mode 100644
index 0000000000..79099326ec
--- /dev/null
+++ b/arm_compute/graph/LayerDescriptors.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CONCAT_DESCRIPTOR_H__
+#define __ARM_COMPUTE_CONCAT_DESCRIPTOR_H__
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace descriptors
+{
+/** Common node parameters */
+struct ConcatLayerDescriptor
+{
+ /** Default constructor */
+ ConcatLayerDescriptor()
+ : axis(DataLayoutDimension::CHANNEL), output_qinfo()
+ {
+ }
+
+ /** Constructor concatenate layer descriptor
+ *
+ * @param[in] axis Axis.
+ */
+ ConcatLayerDescriptor(DataLayoutDimension axis)
+ : axis(axis), output_qinfo()
+ {
+ }
+
+ /** Constructor concatenate layer descriptor
+ *
+ * @param[in] axis Axis.
+ * @param[in] output_qinfo Output quantization info.
+ */
+ ConcatLayerDescriptor(DataLayoutDimension axis, QuantizationInfo output_qinfo)
+ : axis(axis), output_qinfo(output_qinfo)
+ {
+ }
+
+ const DataLayoutDimension axis; /**< Concatenation Axis */
+ const QuantizationInfo output_qinfo; /**< Output quantizazion info */
+};
+} // namespace descriptor
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CONCAT_DESCRIPTOR_H__ */ \ No newline at end of file
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index d0035d9a84..4a423d2490 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -109,7 +109,7 @@ std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
<< node.name()
<< " Type: " << node.type()
- << " Target " << TargetInfo::TargetType
+ << " Target: " << TargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Shape: " << input->info()->tensor_shape()
<< " Activation function: " << act_info.activation()
@@ -245,8 +245,10 @@ std::unique_ptr<IFunction> create_bounding_box_transform_layer(BoundingBoxTransf
func->configure(input, output, deltas, bbox_info);
// Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
- << " Target " << TargetInfo::TargetType
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
+ << node.name()
+ << " Type: " << node.type()
+ << " Target: " << TargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Shape: " << input->info()->tensor_shape()
<< " BoundingBox Info img W: " << bbox_info.img_width() << " "
@@ -326,6 +328,12 @@ std::unique_ptr<arm_compute::IFunction> create_concatenate_layer(ConcatenateLaye
func->configure(inputs, output, concat_axis);
// Log info
+ const bool is_quantized = is_data_type_quantized_asymmetric(output->info()->data_type());
+ std::ostringstream qss;
+ if(is_quantized)
+ {
+ qss << " Output QuantInfo: " << output->info()->quantization_info();
+ }
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
<< node.name()
<< " Type: " << node.type()
@@ -334,6 +342,7 @@ std::unique_ptr<arm_compute::IFunction> create_concatenate_layer(ConcatenateLaye
<< " Shape: " << output->info()->tensor_shape()
<< " Num Inputs: " << inputs.size()
<< " Axis: " << concat_axis
+ << qss.str()
<< std::endl);
return std::move(func);
@@ -421,10 +430,10 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
<< " Target: " << TargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Groups: " << num_groups
- << qss.str()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
+ << qss.str()
<< (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
<< std::endl);
return func;
@@ -536,11 +545,11 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
<< " Type: " << func_name
<< " Target: " << TargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
- << qss.str()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
<< " Depth multiplier: " << depth_multiplier
+ << qss.str()
<< (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
<< std::endl);
return func;
@@ -1177,8 +1186,10 @@ std::unique_ptr<IFunction> create_roi_align_layer(ROIAlignLayerNode &node)
func->configure(input, rois, output, pool_info);
// Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
- << " Target " << TargetInfo::TargetType
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
+ << node.name()
+ << " Type: " << node.type()
+ << " Target: " << TargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Input shape: " << input->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index 8942be2da8..dbf8f35121 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -203,6 +203,7 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
return status;
}
+
/** Validates a detection output layer node
*
* @tparam DetectionOutputLayer DetectionOutput layer type
@@ -372,6 +373,29 @@ Status validate_reorg_layer(ReorgLayerNode &node)
return ReorgLayer::validate(input, output, node.stride());
}
+/** Validates a Reshape layer node
+ *
+ * @tparam ReshapeLayer Reshape layer type
+ *
+ * @param[in] node Node to validate
+ *
+ * @return Status
+ */
+template <typename ReshapeLayer>
+Status validate_reshape_layer(ReshapeLayerNode &node)
+{
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating ReshapeLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+ ARM_COMPUTE_RETURN_ERROR_ON(node.num_inputs() != 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
+
+ // Extract input and output
+ arm_compute::ITensorInfo *input = detail::get_backing_tensor_info(node.input(0));
+ arm_compute::ITensorInfo *output = detail::get_backing_tensor_info(node.output(0));
+
+ // Validate function
+ return ReshapeLayer::validate(input, output);
+}
+
/** Validates a ROI Align layer node
*
* @tparam ROIAlignLayer ROIAlign layer type
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index d062d5a53e..67dc06c878 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -72,22 +72,24 @@ class OutputLayer final : public ILayer
public:
/** Construct an output layer.
*
- * @param[in] accessor Accessor to give output tensor data to.
+ * @param[in] accessor Accessor to give output tensor data to.
+ * @param[in] connection_idx (Optional) Input connection index
*/
- OutputLayer(ITensorAccessorUPtr accessor)
- : _accessor(std::move(accessor))
+ OutputLayer(ITensorAccessorUPtr accessor, unsigned int connection_idx = 0)
+ : _accessor(std::move(accessor)), _connection_idx(connection_idx)
{
}
NodeID create_layer(IStream &s) override
{
NodeParams common_params = { name(), s.hints().target_hint };
- NodeIdxPair input = { s.tail_node(), 0 };
+ NodeIdxPair input = { s.tail_node(), _connection_idx };
return GraphBuilder::add_output_node(s.graph(), common_params, input, std::move(_accessor));
}
private:
ITensorAccessorUPtr _accessor;
+ unsigned int _connection_idx;
};
/** Activation Layer */
@@ -96,10 +98,13 @@ class ActivationLayer final : public ILayer
public:
/** Construct an activation layer.
*
- * @param[in] act_info Activation information
+ * @param[in] act_info Activation information
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
- ActivationLayer(ActivationLayerInfo act_info)
- : _act_info(act_info)
+ ActivationLayer(ActivationLayerInfo act_info,
+ const QuantizationInfo out_quant_info = QuantizationInfo())
+ : _act_info(act_info),
+ _out_quant_info(std::move(out_quant_info))
{
}
@@ -107,11 +112,12 @@ public:
{
NodeParams common_params = { name(), s.hints().target_hint };
NodeIdxPair input = { s.tail_node(), 0 };
- return GraphBuilder::add_activation_node(s.graph(), common_params, input, _act_info);
+ return GraphBuilder::add_activation_node(s.graph(), common_params, input, _act_info, std::move(_out_quant_info));
}
private:
- ActivationLayerInfo _act_info;
+ ActivationLayerInfo _act_info;
+ const QuantizationInfo _out_quant_info;
};
/** Batchnormalization Layer */
@@ -225,7 +231,7 @@ public:
*/
template <typename... Ts>
ConcatLayer(SubStream &&sub_stream1, SubStream &&sub_stream2, Ts &&... rest_sub_streams)
- : _sub_streams(), _axis(DataLayoutDimension::CHANNEL)
+ : _sub_streams(), _concat_descriptor(DataLayoutDimension::CHANNEL)
{
_sub_streams.push_back(arm_compute::support::cpp14::make_unique<SubStream>(std::move(sub_stream1)));
_sub_streams.push_back(arm_compute::support::cpp14::make_unique<SubStream>(std::move(sub_stream2)));
@@ -238,14 +244,14 @@ public:
}
/** Construct a concatenation layer
*
- * @param[in] axis Axis over the concatenation will be performed
- * @param[in] sub_stream1 First graph branch
- * @param[in] sub_stream2 Second graph branch
- * @param[in] rest_sub_streams Rest sub-graph branches
+ * @param[in] concat_descriptor Concat layer descriptor
+ * @param[in] sub_stream1 First graph branch
+ * @param[in] sub_stream2 Second graph branch
+ * @param[in] rest_sub_streams Rest sub-graph branches
*/
template <typename... Ts>
- ConcatLayer(DataLayoutDimension axis, SubStream &&sub_stream1, SubStream &&sub_stream2, Ts &&... rest_sub_streams)
- : _sub_streams(), _axis(axis)
+ ConcatLayer(descriptors::ConcatLayerDescriptor concat_descriptor, SubStream &&sub_stream1, SubStream &&sub_stream2, Ts &&... rest_sub_streams)
+ : _sub_streams(), _concat_descriptor(concat_descriptor)
{
_sub_streams.push_back(arm_compute::support::cpp14::make_unique<SubStream>(std::move(sub_stream1)));
_sub_streams.push_back(arm_compute::support::cpp14::make_unique<SubStream>(std::move(sub_stream2)));
@@ -262,7 +268,7 @@ public:
*/
template <typename... Ts>
ConcatLayer(SubStream &&sub_stream)
- : _sub_streams(), _axis(DataLayoutDimension::CHANNEL)
+ : _sub_streams(), _concat_descriptor(DataLayoutDimension::CHANNEL)
{
_sub_streams.push_back(arm_compute::support::cpp14::make_unique<SubStream>(std::move(sub_stream)));
}
@@ -289,14 +295,14 @@ public:
}
}
}
- nid = GraphBuilder::add_concatenate_node(s.graph(), common_params, nodes, _axis);
+ nid = GraphBuilder::add_concatenate_node(s.graph(), common_params, nodes, _concat_descriptor);
}
return nid;
}
private:
std::vector<std::unique_ptr<SubStream>> _sub_streams;
- DataLayoutDimension _axis;
+ descriptors::ConcatLayerDescriptor _concat_descriptor;
};
/** Convolution Layer */
@@ -414,28 +420,31 @@ class DepthwiseConvolutionLayer final : public ILayer
public:
/** Construct a depthwise convolution layer.
*
- * @param[in] conv_width Convolution width.
- * @param[in] conv_height Convolution height.
- * @param[in] weights Accessor to get kernel weights from.
- * @param[in] bias Accessor to get kernel bias from.
- * @param[in] conv_info Padding and stride information.
- * @param[in] depth_multiplier (Optional) Depth multiplier parameter.
- * @param[in] quant_info (Optional) Quantization info used for weights
+ * @param[in] conv_width Convolution width.
+ * @param[in] conv_height Convolution height.
+ * @param[in] weights Accessor to get kernel weights from.
+ * @param[in] bias Accessor to get kernel bias from.
+ * @param[in] conv_info Padding and stride information.
+ * @param[in] depth_multiplier (Optional) Depth multiplier parameter.
+ * @param[in] weights_quant_info (Optional) Quantization info used for weights
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
DepthwiseConvolutionLayer(unsigned int conv_width,
unsigned int conv_height,
ITensorAccessorUPtr weights,
ITensorAccessorUPtr bias,
PadStrideInfo conv_info,
- int depth_multiplier = 1,
- const QuantizationInfo quant_info = QuantizationInfo())
+ int depth_multiplier = 1,
+ const QuantizationInfo weights_quant_info = QuantizationInfo(),
+ const QuantizationInfo out_quant_info = QuantizationInfo())
: _conv_width(conv_width),
_conv_height(conv_height),
_conv_info(std::move(conv_info)),
_weights(std::move(weights)),
_bias(std::move(bias)),
_depth_multiplier(depth_multiplier),
- _quant_info(std::move(quant_info))
+ _weights_quant_info(std::move(weights_quant_info)),
+ _out_quant_info(std::move(out_quant_info))
{
}
@@ -446,7 +455,7 @@ public:
return GraphBuilder::add_depthwise_convolution_node(s.graph(), common_params,
input, Size2D(_conv_width, _conv_height), _conv_info, _depth_multiplier,
s.hints().depthwise_convolution_method_hint,
- std::move(_weights), std::move(_bias), std::move(_quant_info));
+ std::move(_weights), std::move(_bias), std::move(_weights_quant_info), std::move(_out_quant_info));
}
private:
@@ -456,7 +465,8 @@ private:
ITensorAccessorUPtr _weights;
ITensorAccessorUPtr _bias;
int _depth_multiplier;
- const QuantizationInfo _quant_info;
+ const QuantizationInfo _weights_quant_info;
+ const QuantizationInfo _out_quant_info;
};
/** DetectionOutput Layer */
class DetectionOutputLayer final : public ILayer
diff --git a/arm_compute/graph/nodes/ActivationLayerNode.h b/arm_compute/graph/nodes/ActivationLayerNode.h
index 723120655b..a17b0103e4 100644
--- a/arm_compute/graph/nodes/ActivationLayerNode.h
+++ b/arm_compute/graph/nodes/ActivationLayerNode.h
@@ -36,9 +36,11 @@ class ActivationLayerNode final : public INode
public:
/** Constructor
*
- * @param[in] info Activation Layer information
+ * @param[in] info Activation Layer information
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
- ActivationLayerNode(ActivationLayerInfo info);
+ ActivationLayerNode(ActivationLayerInfo info,
+ QuantizationInfo out_quant_info = QuantizationInfo());
/** Activation metadata accessor
*
* @return The activation info of the layer
@@ -56,6 +58,7 @@ public:
private:
ActivationLayerInfo _info;
+ QuantizationInfo _out_quant_info;
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/nodes/ConcatenateLayerNode.h b/arm_compute/graph/nodes/ConcatenateLayerNode.h
index 20c8523752..fc122845e8 100644
--- a/arm_compute/graph/nodes/ConcatenateLayerNode.h
+++ b/arm_compute/graph/nodes/ConcatenateLayerNode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,10 +36,10 @@ class ConcatenateLayerNode final : public INode
public:
/** Constructor
*
- * @param[in] total_nodes Number of nodes that will get concatenated
- * @param[in] axis Concatenation axis
+ * @param[in] total_nodes Number of nodes that will get concatenated
+ * @param[in] concat_descriptor Concatenate Layer Descriptor
*/
- ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis);
+ ConcatenateLayerNode(unsigned int total_nodes, descriptors::ConcatLayerDescriptor concat_descriptor);
/** Computes concatenations output descriptor
*
* @param[in] input_descriptors Input descriptors
@@ -68,6 +68,12 @@ public:
*/
DataLayoutDimension concatenation_axis() const;
+ /** Concatenation output quantization info accessor
+ *
+ * @return Output quantization info
+ */
+ QuantizationInfo output_quantization_info() const;
+
// Inherited overridden methods:
NodeType type() const override;
bool forward_descriptors() override;
@@ -75,9 +81,9 @@ public:
void accept(INodeVisitor &v) override;
private:
- unsigned int _total_nodes;
- DataLayoutDimension _axis;
- bool _is_enabled;
+ unsigned int _total_nodes;
+ descriptors::ConcatLayerDescriptor _concat_descriptor;
+ bool _is_enabled;
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
index 8c0aae13c9..fd0273416e 100644
--- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,10 +39,12 @@ public:
* @param[in] info Convolution layer attributes
* @param[in] depth_multiplier (Optional) Depth multiplier parameter.
* @param[in] method (Optional) Depthwise convolution method to use
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
DepthwiseConvolutionLayerNode(PadStrideInfo info,
int depth_multiplier = 1,
- DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default);
+ DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default,
+ QuantizationInfo out_quant_info = QuantizationInfo());
/** Sets the depthwise convolution method to use
*
* @param[in] method Depthwise convolution method to use
@@ -103,6 +105,7 @@ private:
PadStrideInfo _info;
int _depth_multiplier;
DepthwiseConvolutionMethod _method;
+ QuantizationInfo _out_quant_info;
ActivationLayerInfo _fused_activation;
};
} // namespace graph
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 30f1fc6894..74f60d5354 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -111,9 +111,10 @@ NodeID GraphBuilder::add_output_node(Graph &g, NodeParams params, NodeIdxPair in
return nid;
}
-NodeID GraphBuilder::add_activation_node(Graph &g, NodeParams params, NodeIdxPair input, ActivationLayerInfo act_info)
+NodeID GraphBuilder::add_activation_node(Graph &g, NodeParams params, NodeIdxPair input, ActivationLayerInfo act_info,
+ const QuantizationInfo out_quant_info)
{
- return create_simple_single_input_output_node<ActivationLayerNode>(g, params, input, act_info);
+ return create_simple_single_input_output_node<ActivationLayerNode>(g, params, input, act_info, out_quant_info);
}
NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, NodeIdxPair input, float epsilon,
@@ -293,11 +294,11 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx
return deconv_nid;
}
-NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis)
+NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, descriptors::ConcatLayerDescriptor concat_descriptor)
{
ARM_COMPUTE_ERROR_ON(inputs.size() == 0);
- NodeID nid = g.add_node<ConcatenateLayerNode>(inputs.size(), axis);
+ NodeID nid = g.add_node<ConcatenateLayerNode>(inputs.size(), concat_descriptor);
unsigned int i = 0;
for(const auto &input : inputs)
@@ -312,7 +313,7 @@ NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vect
NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend,
PadStrideInfo conv_info, int depth_multiplier, DepthwiseConvolutionMethod method,
- ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo quant_info)
+ ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo quant_info, const QuantizationInfo out_quant_info)
{
CHECK_NODEIDX_PAIR(input, g);
ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
@@ -351,7 +352,7 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
}
// Create convolution node and connect
- NodeID conv_nid = g.add_node<DepthwiseConvolutionLayerNode>(conv_info, depth_multiplier, method);
+ NodeID conv_nid = g.add_node<DepthwiseConvolutionLayerNode>(conv_info, depth_multiplier, method, out_quant_info);
g.add_connection(input.node_id, input.index, conv_nid, 0);
g.add_connection(w_nid, 0, conv_nid, 1);
if(has_bias)
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 4b71837a49..cb8dc0a172 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -74,6 +74,8 @@ Status CLNodeValidator::validate(INode *node)
return detail::validate_priorbox_layer<CLPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
case NodeType::ReorgLayer:
return detail::validate_reorg_layer<CLReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
+ case NodeType::ReshapeLayer:
+ return detail::validate_reshape_layer<CLReshapeLayer>(*polymorphic_downcast<ReshapeLayerNode *>(node));
case NodeType::ROIAlignLayer:
return detail::validate_roi_align_layer<CLROIAlignLayer>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
case NodeType::SliceLayer:
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index f23845c314..81c6e09f92 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -143,8 +143,10 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
<< " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Output QuantInfo: " << output->info()->quantization_info();
}
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Target " << NETargetInfo::TargetType
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
+ << node.name()
+ << " Type: " << func_name
+ << " Target: " << NETargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< qss.str()
<< " Input shape: " << input->info()->tensor_shape()
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index b0feec563b..77f2e7f21d 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -74,6 +74,8 @@ Status NENodeValidator::validate(INode *node)
return detail::validate_priorbox_layer<NEPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
case NodeType::ReorgLayer:
return detail::validate_reorg_layer<NEReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
+ case NodeType::ReshapeLayer:
+ return detail::validate_reshape_layer<NEReshapeLayer>(*polymorphic_downcast<ReshapeLayerNode *>(node));
case NodeType::ROIAlignLayer:
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ROIAlignLayer");
case NodeType::SliceLayer:
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index a170c4d899..0e0a26b886 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -69,11 +69,12 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
continue;
}
- // Check that all tensor have the same target and valid inputs
+ // Check that all tensor have the same target, valid inputs and same quantization info
bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
[&](const EdgeID & eid)
{
- return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target);
+ return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target)
+ && (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info);
});
// Create subtensors
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index 31921b328e..1c2985dce6 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,8 +56,8 @@ void InPlaceOperationMutator::mutate(Graph &g)
ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
- // Prevent in-place operation if there is an accessor bound to the in-place tensor
- if(new_output_tensor->accessor() == nullptr)
+ // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
+ if(new_output_tensor->accessor() == nullptr || current_output_tensor->desc().quant_info == new_output_tensor->desc().quant_info)
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 5927a597bb..724307e7b7 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -211,10 +211,17 @@ void NodeFusionMutator::mutate(Graph &g)
{
return true;
};
- auto qs8_prec = [](INode & n)
+ auto qs8_prec = [&g](INode & n)
{
ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
- return n.output(0)->desc().data_type == DataType::QASYMM8;
+
+ const auto output_edge_id = *n.output_edges().begin();
+ const auto output_edge = g.edge(output_edge_id);
+ // To perform fusion the two nodes must have same output quantization information
+ const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
+ const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8;
+
+ return output_qasymm8 && same_qinfo;
};
// Fusion mutations
diff --git a/src/graph/nodes/ActivationLayerNode.cpp b/src/graph/nodes/ActivationLayerNode.cpp
index 85cb10bbdb..ada6cf981f 100644
--- a/src/graph/nodes/ActivationLayerNode.cpp
+++ b/src/graph/nodes/ActivationLayerNode.cpp
@@ -30,8 +30,8 @@ namespace arm_compute
{
namespace graph
{
-ActivationLayerNode::ActivationLayerNode(ActivationLayerInfo info)
- : _info(info)
+ActivationLayerNode::ActivationLayerNode(ActivationLayerInfo info, QuantizationInfo out_quant_info)
+ : _info(info), _out_quant_info(out_quant_info)
{
_input_edges.resize(1, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -62,7 +62,13 @@ TensorDescriptor ActivationLayerNode::configure_output(size_t idx) const
const Tensor *src = input(0);
ARM_COMPUTE_ERROR_ON(src == nullptr);
- return src->desc();
+ TensorDescriptor output_info = src->desc();
+ if(!_out_quant_info.empty())
+ {
+ output_info.quant_info = _out_quant_info;
+ }
+
+ return output_info;
}
NodeType ActivationLayerNode::type() const
diff --git a/src/graph/nodes/ConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp
index 3ce09d0073..bbdc4dc029 100644
--- a/src/graph/nodes/ConcatenateLayerNode.cpp
+++ b/src/graph/nodes/ConcatenateLayerNode.cpp
@@ -34,8 +34,8 @@ namespace arm_compute
{
namespace graph
{
-ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis)
- : _total_nodes(total_nodes), _axis(axis), _is_enabled(true)
+ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, descriptors::ConcatLayerDescriptor concat_descriptor)
+ : _total_nodes(total_nodes), _concat_descriptor(std::move(concat_descriptor)), _is_enabled(true)
{
_input_edges.resize(_total_nodes, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -53,7 +53,12 @@ bool ConcatenateLayerNode::is_enabled() const
DataLayoutDimension ConcatenateLayerNode::concatenation_axis() const
{
- return _axis;
+ return _concat_descriptor.axis;
+}
+
+QuantizationInfo ConcatenateLayerNode::output_quantization_info() const
+{
+ return _concat_descriptor.output_qinfo;
}
TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors,
@@ -121,7 +126,11 @@ TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const
ARM_COMPUTE_ERROR_ON(t == nullptr);
inputs_descriptors.push_back(t->desc());
}
- output_info = compute_output_descriptor(inputs_descriptors, _axis);
+ output_info = compute_output_descriptor(inputs_descriptors, _concat_descriptor.axis);
+ if(!_concat_descriptor.output_qinfo.empty())
+ {
+ output_info.quant_info = _concat_descriptor.output_qinfo;
+ }
}
return output_info;
diff --git a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
index 75ca5f4e03..935902d3fd 100644
--- a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
+++ b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,8 +32,9 @@ namespace arm_compute
{
namespace graph
{
-DepthwiseConvolutionLayerNode::DepthwiseConvolutionLayerNode(PadStrideInfo info, int depth_multiplier, DepthwiseConvolutionMethod method)
- : _info(std::move(info)), _depth_multiplier(depth_multiplier), _method(method), _fused_activation()
+DepthwiseConvolutionLayerNode::DepthwiseConvolutionLayerNode(PadStrideInfo info, int depth_multiplier, DepthwiseConvolutionMethod method,
+ QuantizationInfo out_quant_info)
+ : _info(std::move(info)), _depth_multiplier(depth_multiplier), _method(method), _out_quant_info(out_quant_info), _fused_activation()
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -113,7 +114,13 @@ TensorDescriptor DepthwiseConvolutionLayerNode::configure_output(size_t idx) con
ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
- return compute_output_descriptor(src->desc(), weights->desc(), _info, _depth_multiplier);
+ TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info, _depth_multiplier);
+ if(!_out_quant_info.empty())
+ {
+ output_info.quant_info = _out_quant_info;
+ }
+
+ return output_info;
}
NodeType DepthwiseConvolutionLayerNode::type() const
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index b714c55136..26ea02a9ff 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -153,24 +153,29 @@ NumPyAccessor::NumPyAccessor(std::string npy_path, TensorShape shape, DataType d
}
template <typename T>
-void NumPyAccessor::access_numpy_tensor(ITensor &tensor)
+void NumPyAccessor::access_numpy_tensor(ITensor &tensor, T tolerance)
{
const int num_elements = tensor.info()->tensor_shape().total_size();
- int num_mismatches = utils::compare_tensor<T>(tensor, _npy_tensor);
+ int num_mismatches = utils::compare_tensor<T>(tensor, _npy_tensor, tolerance);
float percentage_mismatches = static_cast<float>(num_mismatches) / num_elements;
_output_stream << "Results: " << 100.f - (percentage_mismatches * 100) << " % matches with the provided output[" << _filename << "]." << std::endl;
+ _output_stream << " " << num_elements - num_mismatches << " out of " << num_elements << " matches with the provided output[" << _filename << "]." << std::endl
+ << std::endl;
}
bool NumPyAccessor::access_tensor(ITensor &tensor)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32, DataType::QASYMM8);
ARM_COMPUTE_ERROR_ON(_npy_tensor.info()->dimension(0) != tensor.info()->dimension(0));
switch(tensor.info()->data_type())
{
+ case DataType::QASYMM8:
+ access_numpy_tensor<qasymm8_t>(tensor, 0);
+ break;
case DataType::F32:
- access_numpy_tensor<float>(tensor);
+ access_numpy_tensor<float>(tensor, 0.0001f);
break;
default:
ARM_COMPUTE_ERROR("NOT SUPPORTED!");
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index 131378e5bd..47656766a6 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -160,7 +160,7 @@ public:
private:
template <typename T>
- void access_numpy_tensor(ITensor &tensor);
+ void access_numpy_tensor(ITensor &tensor, T tolerance);
Tensor _npy_tensor;
const std::string _filename;
diff --git a/utils/Utils.h b/utils/Utils.h
index 04ccc3e812..788ae4eeb7 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -782,15 +782,16 @@ void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt)
*/
uint64_t get_mem_free_from_meminfo();
-/** Compare to tensor
+/** Compare two tensors
*
- * @param[in] tensor1 First tensor to be compared.
- * @param[in] tensor2 Second tensor to be compared.
+ * @param[in] tensor1 First tensor to be compared.
+ * @param[in] tensor2 Second tensor to be compared.
+ * @param[in] tolerance Tolerance used for the comparison.
*
* @return The number of mismatches
*/
template <typename T>
-int compare_tensor(ITensor &tensor1, ITensor &tensor2)
+int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance)
{
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&tensor1, &tensor2);
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(&tensor1, &tensor2);
@@ -807,7 +808,7 @@ int compare_tensor(ITensor &tensor1, ITensor &tensor2)
execute_window_loop(window, [&](const Coordinates & id)
{
- if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > 0.0001)
+ if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > tolerance)
{
++num_mismatches;
}