aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-04-05 17:20:34 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commitbb54e4e40b7b08c509e234cd91ebd3087af66c23 (patch)
tree5e0b6bdf58bb129ef2b3b26e6e65515bc8b76f83
parent4d33630096c769dd43716dd5607f151e3d5abef7 (diff)
downloadComputeLibrary-bb54e4e40b7b08c509e234cd91ebd3087af66c23.tar.gz
COMPMID-797 Integrate Mobilenet QASYMM8 with new graph.
Change-Id: I4df63ec2f4eb27a8a6eec2bea27741bf8dec6910 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126966 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/graph/GraphBuilder.h9
-rw-r--r--arm_compute/graph/TensorDescriptor.h18
-rw-r--r--arm_compute/graph/TypePrinter.h8
-rw-r--r--arm_compute/graph/backends/ValidateHelpers.h18
-rw-r--r--arm_compute/graph/frontend/Layers.h82
-rw-r--r--arm_compute/graph/nodes/ConvolutionLayerNode.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h2
-rw-r--r--examples/graph_mobilenet_qasymm8.cpp91
-rw-r--r--src/core/TensorInfo.cpp2
-rw-r--r--src/graph/GraphBuilder.cpp24
-rw-r--r--src/graph/backends/CL/CLDeviceBackend.cpp2
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp32
-rw-r--r--src/graph/backends/GLES/GCDeviceBackend.cpp2
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp32
-rw-r--r--src/graph/backends/NEON/NEDeviceBackend.cpp2
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp32
-rw-r--r--src/graph/nodes/ConvolutionLayerNode.cpp10
-rw-r--r--src/graph/nodes/SoftmaxLayerNode.cpp5
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp10
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp5
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp16
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp20
22 files changed, 272 insertions, 158 deletions
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index e01cfb1bc6..bbbbefbcbb 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -109,13 +109,17 @@ public:
* @param[in] method (Optional) Convolution method to use
* @param[in] weights_accessor (Optional) Accessor of the weights node data
* @param[in] bias_accessor (Optional) Accessor of the bias node data
+ * @param[in] weights_quant_info (Optional) Weights quantization info
+ * @param[in] out_quant_info (Optional) Output quantization info
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT,
- ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr);
+ ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr,
+ const QuantizationInfo weights_quant_info = QuantizationInfo(),
+ const QuantizationInfo out_quant_info = QuantizationInfo());
/** Adds a depth concatenate node to the graph
*
* @param[in] g Graph to add the node to
@@ -135,13 +139,14 @@ public:
* @param[in] method (Optional) Convolution method to use
* @param[in] weights_accessor (Optional) Accessor of the weights node data
* @param[in] bias_accessor (Optional) Accessor of the bias node data
+ * @param[in] quant_info (Optional) Weights quantization info
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, PadStrideInfo conv_info,
DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT,
- ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr);
+ ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo());
/** Adds an element-wise layer node to the graph
*
* @param[in] g Graph to add the node to
diff --git a/arm_compute/graph/TensorDescriptor.h b/arm_compute/graph/TensorDescriptor.h
index fc0095e3b9..785c493cbc 100644
--- a/arm_compute/graph/TensorDescriptor.h
+++ b/arm_compute/graph/TensorDescriptor.h
@@ -37,18 +37,20 @@ struct TensorDescriptor final
TensorDescriptor() = default;
/** Constructor
*
- * @param[in] tensor_shape Tensor shape
- * @param[in] tensor_data_type Tensor data type
- * @param[in] tensor_target Target to allocate the tensor for
+ * @param[in] tensor_shape Tensor shape
+ * @param[in] tensor_data_type Tensor data type
+ * @param[in] tensor_quant_info Tensor quantization info
+ * @param[in] tensor_target Target to allocate the tensor for
*/
- TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, Target tensor_target = Target::UNSPECIFIED)
- : shape(tensor_shape), data_type(tensor_data_type), target(tensor_target)
+ TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, QuantizationInfo tensor_quant_info = QuantizationInfo(), Target tensor_target = Target::UNSPECIFIED)
+ : shape(tensor_shape), data_type(tensor_data_type), quant_info(tensor_quant_info), target(tensor_target)
{
}
- TensorShape shape{}; /**< Tensor shape */
- DataType data_type{ DataType::UNKNOWN }; /**< Data type */
- Target target{ Target::UNSPECIFIED }; /**< Target */
+ TensorShape shape{}; /**< Tensor shape */
+ DataType data_type{ DataType::UNKNOWN }; /**< Data type */
+ QuantizationInfo quant_info{}; /**< Quantization info */
+ Target target{ Target::UNSPECIFIED }; /**< Target */
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index d9bc8376bd..ed578307b5 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -308,6 +308,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const PadStrideInfo &pad_s
return os;
}
+
+/** Formatted output of the QuantizationInfo type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &quantization_info)
+{
+ os << "Scale:" << quantization_info.scale << "~"
+ << "Offset:" << quantization_info.offset;
+ return os;
+}
} // namespace graph
} // namespace arm_compute
#endif /* __ARM_COMPUTE_GRAPH_TYPE_PRINTER_H__ */
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index ca01295d15..68a718ab97 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -70,12 +70,18 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- arm_compute::ITensorInfo *input = get_backing_tensor_info(node.input(0));
- arm_compute::ITensorInfo *weights = get_backing_tensor_info(node.input(1));
- arm_compute::ITensorInfo *biases = get_backing_tensor_info(node.input(2));
- arm_compute::ITensorInfo *output = get_backing_tensor_info(node.output(0));
- const PadStrideInfo conv_info = node.convolution_info();
- const ConvolutionMethod conv_algorithm = node.convolution_method();
+ arm_compute::ITensorInfo *input = get_backing_tensor_info(node.input(0));
+ arm_compute::ITensorInfo *weights = get_backing_tensor_info(node.input(1));
+ arm_compute::ITensorInfo *biases = get_backing_tensor_info(node.input(2));
+ arm_compute::ITensorInfo *output = get_backing_tensor_info(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->data_type()))
+ {
+ biases->set_data_type(DataType::S32);
+ }
+
+ const PadStrideInfo conv_info = node.convolution_info();
+ const ConvolutionMethod conv_algorithm = node.convolution_method();
// Validate function
Status status{};
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 22133b8376..2e7c50e6da 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -160,28 +160,34 @@ class ConvolutionLayer final : public ILayer
public:
/** Construct a convolution layer.
*
- * @param[in] conv_width Convolution width.
- * @param[in] conv_height Convolution height.
- * @param[in] ofm Output feature map.
- * @param[in] weights Accessor to get kernel weights from.
- * @param[in] bias Accessor to get kernel bias from.
- * @param[in] conv_info Padding and stride information.
- * @param[in] num_groups (Optional) Number of groups. Default: 1.
+ * @param[in] conv_width Convolution width.
+ * @param[in] conv_height Convolution height.
+ * @param[in] ofm Output feature map.
+ * @param[in] weights Accessor to get kernel weights from.
+ * @param[in] bias Accessor to get kernel bias from.
+ * @param[in] conv_info Padding and stride information.
+ * @param[in] num_groups (Optional) Number of groups. Default: 1.
+ * @param[in] weights_quant_info (Optional) Weights quantization information
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
- ConvolutionLayer(unsigned int conv_width,
- unsigned int conv_height,
- unsigned int ofm,
- ITensorAccessorUPtr weights,
- ITensorAccessorUPtr bias,
- PadStrideInfo conv_info,
- unsigned int num_groups = 1)
+ ConvolutionLayer(unsigned int conv_width,
+ unsigned int conv_height,
+ unsigned int ofm,
+ ITensorAccessorUPtr weights,
+ ITensorAccessorUPtr bias,
+ PadStrideInfo conv_info,
+ unsigned int num_groups = 1,
+ const QuantizationInfo weights_quant_info = QuantizationInfo(),
+ const QuantizationInfo out_quant_info = QuantizationInfo())
: _conv_width(conv_width),
_conv_height(conv_height),
_ofm(ofm),
_conv_info(std::move(conv_info)),
_num_groups(num_groups),
_weights(std::move(weights)),
- _bias(std::move(bias))
+ _bias(std::move(bias)),
+ _weights_quant_info(std::move(weights_quant_info)),
+ _out_quant_info(std::move(out_quant_info))
{
}
@@ -192,17 +198,19 @@ public:
return GraphBuilder::add_convolution_node(s.graph(), common_params, input,
Size2D(_conv_width, _conv_height), _ofm, _conv_info, _num_groups,
s.hints().convolution_method_hint,
- std::move(_weights), std::move(_bias));
+ std::move(_weights), std::move(_bias), std::move(_weights_quant_info), std::move(_out_quant_info));
}
private:
- unsigned int _conv_width;
- unsigned int _conv_height;
- unsigned int _ofm;
- const PadStrideInfo _conv_info;
- unsigned int _num_groups;
- ITensorAccessorUPtr _weights;
- ITensorAccessorUPtr _bias;
+ unsigned int _conv_width;
+ unsigned int _conv_height;
+ unsigned int _ofm;
+ const PadStrideInfo _conv_info;
+ unsigned int _num_groups;
+ ITensorAccessorUPtr _weights;
+ ITensorAccessorUPtr _bias;
+ const QuantizationInfo _weights_quant_info;
+ const QuantizationInfo _out_quant_info;
};
/** Depthwise Convolution Layer */
@@ -216,17 +224,20 @@ public:
* @param[in] weights Accessor to get kernel weights from.
* @param[in] bias Accessor to get kernel bias from.
* @param[in] conv_info Padding and stride information.
+ * @param[in] quant_info (Optional) Quantization info used for weights
*/
- DepthwiseConvolutionLayer(unsigned int conv_width,
- unsigned int conv_height,
- ITensorAccessorUPtr weights,
- ITensorAccessorUPtr bias,
- PadStrideInfo conv_info)
+ DepthwiseConvolutionLayer(unsigned int conv_width,
+ unsigned int conv_height,
+ ITensorAccessorUPtr weights,
+ ITensorAccessorUPtr bias,
+ PadStrideInfo conv_info,
+ const QuantizationInfo quant_info = QuantizationInfo())
: _conv_width(conv_width),
_conv_height(conv_height),
_conv_info(std::move(conv_info)),
_weights(std::move(weights)),
- _bias(std::move(bias))
+ _bias(std::move(bias)),
+ _quant_info(std::move(quant_info))
{
}
@@ -237,15 +248,16 @@ public:
return GraphBuilder::add_depthwise_convolution_node(s.graph(), common_params,
input, Size2D(_conv_width, _conv_height), _conv_info,
s.hints().depthwise_convolution_method_hint,
- std::move(_weights), std::move(_bias));
+ std::move(_weights), std::move(_bias), std::move(_quant_info));
}
private:
- unsigned int _conv_width;
- unsigned int _conv_height;
- const PadStrideInfo _conv_info;
- ITensorAccessorUPtr _weights;
- ITensorAccessorUPtr _bias;
+ unsigned int _conv_width;
+ unsigned int _conv_height;
+ const PadStrideInfo _conv_info;
+ ITensorAccessorUPtr _weights;
+ ITensorAccessorUPtr _bias;
+ const QuantizationInfo _quant_info;
};
/** Flatten Layer */
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index 70fefbeeab..d029895609 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -36,10 +36,11 @@ class ConvolutionLayerNode final : public INode
public:
/** Constructor
*
- * @param[in] info Convolution layer attributes
- * @param[in] method (Optional) Convolution method to use
+ * @param[in] info Convolution layer attributes
+ * @param[in] method (Optional) Convolution method to use
+ * @param[in] out_quant_info (Optional) Output quantization info
*/
- ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT);
+ ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, QuantizationInfo out_quant_info = QuantizationInfo());
/** Sets the convolution layer method to use
*
* @param[in] method Method to use for convolution
@@ -78,6 +79,7 @@ public:
private:
PadStrideInfo _info;
ConvolutionMethod _method;
+ QuantizationInfo _out_quant_info;
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 5e05e93855..3c9fb0ea5f 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -106,6 +106,8 @@ private:
int32_t _b_offset;
bool _run_vector_matrix_multiplication;
bool _dot_product_path;
+ bool _is_first_run;
+ bool _reshape_b_only_on_first_run;
};
}
#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */
diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp
index cb49ffdfb4..ddf6175c4a 100644
--- a/examples/graph_mobilenet_qasymm8.cpp
+++ b/examples/graph_mobilenet_qasymm8.cpp
@@ -28,6 +28,7 @@
#include <cstdlib>
+using namespace arm_compute;
using namespace arm_compute::utils;
using namespace arm_compute::graph::frontend;
using namespace arm_compute::graph_utils;
@@ -40,8 +41,6 @@ using namespace arm_compute::graph_utils;
class GraphMobileNetQASYMM8Example : public Example
{
public:
- //FIXME: Missing quantization info to the tensor descriptor (Giorgio is working on it)
-#if 0
void do_setup(int argc, char **argv) override
{
std::string data_path; /* Path to the trainable data */
@@ -93,8 +92,8 @@ public:
};
// Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
- const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
- TargetHint target_hint = set_target_hint(int_target_hint);
+ const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+ Target target_hint = set_target_hint(target);
// Parse arguments
if(argc < 2)
@@ -123,54 +122,56 @@ public:
}
graph << target_hint
- << arm_compute::graph::Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::QASYMM8, in_quant_info),
- get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
+ << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method
+ << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info),
+ get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
<< ConvolutionLayer(
3U, 3U, 32U,
get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"),
get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"),
PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
- 1, WeightsInfo(),
- conv_weights_quant_info.at(0),
- mid_quant_info)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
- << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0))
- << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
- point_weights_quant_info.at(1))
- << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
- point_weights_quant_info.at(2))
- << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
- point_weights_quant_info.at(3))
- << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
- point_weights_quant_info.at(4))
- << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
- point_weights_quant_info.at(5))
- << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
- point_weights_quant_info.at(6))
- << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
- point_weights_quant_info.at(7))
- << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
- point_weights_quant_info.at(8))
- << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
- point_weights_quant_info.at(9))
- << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
- point_weights_quant_info.at(10))
- << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
- point_weights_quant_info.at(11))
- << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
+ 1, conv_weights_quant_info.at(0), mid_quant_info)
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
+ graph << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0));
+ graph << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
+ point_weights_quant_info.at(1));
+ graph << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
+ point_weights_quant_info.at(2));
+ graph << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
+ point_weights_quant_info.at(3));
+ graph << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
+ point_weights_quant_info.at(4));
+ graph << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
+ point_weights_quant_info.at(5));
+ graph << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
+ point_weights_quant_info.at(6));
+ graph << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
+ point_weights_quant_info.at(7));
+ graph << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
+ point_weights_quant_info.at(8));
+ graph << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
+ point_weights_quant_info.at(9));
+ graph << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
+ point_weights_quant_info.at(10));
+ graph << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
+ point_weights_quant_info.at(11));
+ graph << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
point_weights_quant_info.at(12))
<< PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
<< ConvolutionLayer(
1U, 1U, 1001U,
get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"),
get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"),
- PadStrideInfo(1U, 1U, 0U, 0U), 1, WeightsInfo(), conv_weights_quant_info.at(1))
+ PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1))
<< ReshapeLayer(TensorShape(1001U))
<< SoftmaxLayer()
- << arm_compute::graph::Tensor(get_output_accessor(label, 5));
+ << OutputLayer(get_output_accessor(label, 5));
- // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated
- graph.graph_init(int_target_hint == 2);
+ // Finalize graph
+ GraphConfig config;
+ config.use_function_memory_manager = true;
+ config.use_tuner = (target == 2);
+ graph.finalize(target_hint, config);
}
void do_run() override
{
@@ -179,7 +180,7 @@ public:
}
private:
- Graph graph{};
+ Stream graph{ 0, "MobileNetV1_QASYMM8" };
/** This function produces a depthwise separable convolution node (i.e. depthwise + pointwise layers) with ReLU6 activation after each layer.
*
@@ -199,29 +200,23 @@ private:
QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info)
{
std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_";
- SubGraph sg;
+ SubStream sg(graph);
sg << DepthwiseConvolutionLayer(
3U, 3U,
get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
- dwc_pad_stride_info,
- true,
- depth_weights_quant_info)
+ dwc_pad_stride_info, depth_weights_quant_info)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
<< ConvolutionLayer(
1U, 1U, conv_filt,
get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
get_weights_accessor(data_path, total_path + "pointwise_bias.npy"),
- conv_pad_stride_info,
- 1, WeightsInfo(),
- point_weights_quant_info)
+ conv_pad_stride_info, 1, point_weights_quant_info)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
return BranchLayer(std::move(sg));
}
-#endif /* if 0 */
- Stream graph { 0, "MobileNetV1_QASYMM8" };
};
/** Main program for MobileNetQASYMM8
*
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 539d0f84b3..676938a231 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -322,7 +322,7 @@ ITensorInfo &TensorInfo::set_data_type(DataType data_type)
{
_data_type = data_type;
_format = Format::UNKNOWN;
- return *this;
+ return set_tensor_shape(tensor_shape()); // Force total size and strides to update
}
ITensorInfo &TensorInfo::set_num_channels(int num_channels)
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 0d1bdc3596..4ad34e789c 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -206,7 +206,9 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N
NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
unsigned int num_groups, ConvolutionMethod method,
- ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+ ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+ const QuantizationInfo weights_quant_info,
+ const QuantizationInfo out_quant_info)
{
CHECK_NODEIDX_PAIR(input, g);
ARM_COMPUTE_ERROR_ON(depth == 0);
@@ -220,7 +222,13 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
// Create weights node
TensorDescriptor w_desc = input_tensor_desc;
w_desc.shape = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z() / num_groups, depth);
- NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+ if(!weights_quant_info.empty())
+ {
+ w_desc.quant_info = weights_quant_info;
+ }
+
+ NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
// Create bias nodes
NodeID b_nid = EmptyNodeID;
@@ -234,7 +242,7 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
if(num_groups == 1)
{
// Create convolution node and connect
- NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method);
+ NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, out_quant_info);
g.add_connection(input.node_id, input.index, conv_nid, 0);
g.add_connection(w_nid, 0, conv_nid, 1);
if(has_bias)
@@ -270,7 +278,7 @@ NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std
NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, PadStrideInfo conv_info,
DepthwiseConvolutionMethod method,
- ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+ ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo quant_info)
{
CHECK_NODEIDX_PAIR(input, g);
ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
@@ -283,7 +291,13 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
// Create weights node
TensorDescriptor w_desc = input_tensor_desc;
w_desc.shape = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z());
- NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+ if(!quant_info.empty())
+ {
+ w_desc.quant_info = quant_info;
+ }
+
+ NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
// Create bias nodes
NodeID b_nid = EmptyNodeID;
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index f10eb33a98..92cb6936c3 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -126,7 +126,7 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens
ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::CL);
// Create backend tensor handle
- TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+ TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
auto backend_tensor_handle = support::cpp14::make_unique<CLTensorHandle>(info);
return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 1b448fefd2..ad73a797e3 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -154,10 +154,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *weights = get_backing_tensor(node.input(1));
- ICLTensor *biases = get_backing_tensor(node.input(2));
- ICLTensor *output = get_backing_tensor(node.output(0));
+ ICLTensor *input = get_backing_tensor(node.input(0));
+ ICLTensor *weights = get_backing_tensor(node.input(1));
+ ICLTensor *biases = get_backing_tensor(node.input(2));
+ ICLTensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const ConvolutionMethod conv_algorithm = node.convolution_method();
@@ -190,6 +196,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
@@ -251,10 +259,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *weights = get_backing_tensor(node.input(1));
- ICLTensor *biases = get_backing_tensor(node.input(2));
- ICLTensor *output = get_backing_tensor(node.output(0));
+ ICLTensor *input = get_backing_tensor(node.input(0));
+ ICLTensor *weights = get_backing_tensor(node.input(1));
+ ICLTensor *biases = get_backing_tensor(node.input(2));
+ ICLTensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
@@ -275,6 +289,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 8cd9994744..a55215f058 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -87,7 +87,7 @@ std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tens
ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::GC);
// Create backend tensor handle
- TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+ TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
auto backend_tensor_handle = support::cpp14::make_unique<GCTensorHandle>(info);
return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 12e7c042d4..d3c5737e68 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -154,10 +154,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *weights = get_backing_tensor(node.input(1));
- IGCTensor *biases = get_backing_tensor(node.input(2));
- IGCTensor *output = get_backing_tensor(node.output(0));
+ IGCTensor *input = get_backing_tensor(node.input(0));
+ IGCTensor *weights = get_backing_tensor(node.input(1));
+ IGCTensor *biases = get_backing_tensor(node.input(2));
+ IGCTensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const ConvolutionMethod conv_algorithm = node.convolution_method();
@@ -180,6 +186,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
@@ -241,10 +249,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *weights = get_backing_tensor(node.input(1));
- IGCTensor *biases = get_backing_tensor(node.input(2));
- IGCTensor *output = get_backing_tensor(node.output(0));
+ IGCTensor *input = get_backing_tensor(node.input(0));
+ IGCTensor *weights = get_backing_tensor(node.input(1));
+ IGCTensor *biases = get_backing_tensor(node.input(2));
+ IGCTensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
@@ -264,6 +278,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index aaf05829bb..9123196540 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -93,7 +93,7 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tens
ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::NEON);
// Create backend tensor handle
- TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+ TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
auto backend_tensor_handle = support::cpp14::make_unique<NETensorHandle>(info);
return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 228af9ca6f..906378c565 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -140,10 +140,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *weights = get_backing_tensor(node.input(1));
- ITensor *biases = get_backing_tensor(node.input(2));
- ITensor *output = get_backing_tensor(node.output(0));
+ ITensor *input = get_backing_tensor(node.input(0));
+ ITensor *weights = get_backing_tensor(node.input(1));
+ ITensor *biases = get_backing_tensor(node.input(2));
+ ITensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const ConvolutionMethod conv_algorithm = node.convolution_method();
@@ -175,6 +181,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
@@ -234,10 +242,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *weights = get_backing_tensor(node.input(1));
- ITensor *biases = get_backing_tensor(node.input(2));
- ITensor *output = get_backing_tensor(node.output(0));
+ ITensor *input = get_backing_tensor(node.input(0));
+ ITensor *weights = get_backing_tensor(node.input(1));
+ ITensor *biases = get_backing_tensor(node.input(2));
+ ITensor *output = get_backing_tensor(node.output(0));
+
+ if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+ {
+ biases->info()->set_data_type(DataType::S32);
+ }
+
const PadStrideInfo conv_info = node.convolution_info();
const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
@@ -258,6 +272,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
<< " Data Type: " << input->info()->data_type()
+ << " Input QuantInfo: " << input->info()->quantization_info()
+ << " Weights QuantInfo: " << weights->info()->quantization_info()
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 461728487f..eb0c6a1c1a 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -31,8 +31,8 @@ namespace arm_compute
{
namespace graph
{
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method)
- : _info(std::move(info)), _method(method)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, QuantizationInfo out_quant_info)
+ : _info(std::move(info)), _method(method), _out_quant_info(out_quant_info)
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -90,6 +90,12 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const
TensorDescriptor output_info = src->desc();
TensorShape output_shape = compute_output_shape(src->desc().shape, weights->desc().shape, _info);
output_info.shape = output_shape;
+
+ if(!_out_quant_info.empty())
+ {
+ output_info.quant_info = _out_quant_info;
+ }
+
return output_info;
}
diff --git a/src/graph/nodes/SoftmaxLayerNode.cpp b/src/graph/nodes/SoftmaxLayerNode.cpp
index 4c21ac6ad0..b6241e6654 100644
--- a/src/graph/nodes/SoftmaxLayerNode.cpp
+++ b/src/graph/nodes/SoftmaxLayerNode.cpp
@@ -63,7 +63,10 @@ TensorDescriptor SoftmaxLayerNode::configure_output(size_t idx) const
const Tensor *src = input(0);
ARM_COMPUTE_ERROR_ON(src == nullptr);
- return src->desc();
+ TensorDescriptor out_desc = src->desc();
+ out_desc.quant_info = QuantizationInfo(1.f / 256.f, 0);
+
+ return out_desc;
}
Status SoftmaxLayerNode::validate()
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 5f747bc477..711b006ede 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -102,7 +102,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
matrix_b = &_tmp_b;
_memory_group.manage(&_tmp_a);
- _memory_group.manage(&_tmp_b);
+ if(!_reshape_b_only_on_first_run)
+ {
+ _memory_group.manage(&_tmp_b);
+ }
// Configure interleave kernel
_mtx_a_reshape_kernel.configure(a, &_tmp_a, mult_interleave4x4_height);
@@ -119,7 +122,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
{
TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
_vector_sum_col.allocator()->init(info_vector_sum_col);
- _memory_group.manage(&_vector_sum_col);
+ if(!_reshape_b_only_on_first_run)
+ {
+ _memory_group.manage(&_vector_sum_col);
+ }
// Configure Matrix B reduction kernel
_mtx_b_reduction_kernel.configure(b, &_vector_sum_col);
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 18e6e919c3..e0859be93e 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -107,7 +107,10 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
// Manage intermediate buffers
_memory_group.manage(&_tmp_a);
- _memory_group.manage(&_tmp_b);
+ if(!_reshape_b_only_on_first_run)
+ {
+ _memory_group.manage(&_tmp_b);
+ }
int m = a->info()->dimension(1);
int n = b->info()->dimension(0);
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 7f25c2e717..3c48d691ed 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -109,14 +109,6 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
- // Checks performed when biases are present
- if(append_bias)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
- ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
- ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
- }
-
if(transpose1xW)
{
TensorInfo weights_reshaped = weights->clone()->set_tensor_shape(get_reshaped_weights_shape(weights, append_bias));
@@ -344,7 +336,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_memory_group.manage(&_input_im2col_reshaped);
// Create tensor (interleave) to prepare input tensor for GEMM
- if(!_is_fully_connected_convolution && !run_optimised)
+ if(!_is_fully_connected_convolution && !run_optimised && _is_interleaved)
{
TensorShape shape_interleaved(shape_im2col);
shape_interleaved.set(0, shape_interleaved.x() * 4);
@@ -362,7 +354,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
info_gemm.set_quantization_info(output->info()->quantization_info());
_gemm_output.allocator()->init(info_gemm);
- _memory_group.manage(&_gemm_output);
+
+ // FIXME: enabling memory manager for _gemm_output gives incorrect results (maybe bound to the assembly kernel in GEMMLowp?)
+ // _memory_group.manage(&_gemm_output);
// Configure kernels
// Configure im2col
@@ -491,7 +485,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
reshaped_weights->set_tensor_shape(get_reshaped_weights_shape_conv(weights, append_bias, is_fully_connected_convolution));
ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, biases, reshaped_weights.get(), !is_fully_connected_convolution /* 1xW transpose */));
}
- else
+ else if(!is_quantized)
{
TensorShape reshaped_weights_shape;
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 7372c6ca57..cbec73fc31 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -43,19 +43,19 @@ using namespace arm_compute::misc::shape_calculator;
NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _asm_glue_unsigned(), _asm_glue_signed(), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(),
_mtx_b_reduction_kernel(), _offset_contribution_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _workspace(), _a_offset(0), _b_offset(0), _run_vector_matrix_multiplication(false),
- _dot_product_path(false)
+ _dot_product_path(false), _is_first_run(true), _reshape_b_only_on_first_run(false)
{
}
void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, ITensor *output, const GEMMInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
- ARM_COMPUTE_UNUSED(gemm_info);
ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), output->info(), gemm_info));
_a_offset = a->info()->quantization_info().offset;
_b_offset = b->info()->quantization_info().offset;
_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
+ _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
#ifdef __aarch64__
switch(a->info()->data_type())
@@ -98,7 +98,10 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
_tmp_a.allocator()->init(info_a);
_tmp_b.allocator()->init(info_b);
_memory_group.manage(&_tmp_a);
- _memory_group.manage(&_tmp_b);
+ if(!_reshape_b_only_on_first_run)
+ {
+ _memory_group.manage(&_tmp_b);
+ }
// Configure interleave kernel
{
@@ -129,7 +132,10 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
_vector_sum_col.allocator()->init(info_vector_sum_col);
- _memory_group.manage(&_vector_sum_col);
+ if(!_reshape_b_only_on_first_run)
+ {
+ _memory_group.manage(&_vector_sum_col);
+ }
// Configure Matrix B reduction kernel
_mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false);
@@ -252,7 +258,7 @@ void NEGEMMLowpMatrixMultiplyCore::run()
NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
}
- if(_mtx_b_reshape_kernel)
+ if(_mtx_b_reshape_kernel && (_is_first_run || !_reshape_b_only_on_first_run))
{
NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
}
@@ -278,7 +284,7 @@ void NEGEMMLowpMatrixMultiplyCore::run()
}
// Run matrix B reduction kernel only if _a_offset is not equal to 0
- if(_a_offset != 0)
+ if(_a_offset != 0 && (_is_first_run || !_reshape_b_only_on_first_run))
{
NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
}
@@ -287,4 +293,6 @@ void NEGEMMLowpMatrixMultiplyCore::run()
NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
_memory_group.release();
+
+ _is_first_run = false;
}