diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-04-05 17:20:34 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:37 +0000 |
commit | bb54e4e40b7b08c509e234cd91ebd3087af66c23 (patch) | |
tree | 5e0b6bdf58bb129ef2b3b26e6e65515bc8b76f83 | |
parent | 4d33630096c769dd43716dd5607f151e3d5abef7 (diff) | |
download | ComputeLibrary-bb54e4e40b7b08c509e234cd91ebd3087af66c23.tar.gz |
COMPMID-797 Integrate Mobilenet QASYMM8 with new graph.
Change-Id: I4df63ec2f4eb27a8a6eec2bea27741bf8dec6910
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126966
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
22 files changed, 272 insertions, 158 deletions
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h index e01cfb1bc6..bbbbefbcbb 100644 --- a/arm_compute/graph/GraphBuilder.h +++ b/arm_compute/graph/GraphBuilder.h @@ -109,13 +109,17 @@ public: * @param[in] method (Optional) Convolution method to use * @param[in] weights_accessor (Optional) Accessor of the weights node data * @param[in] bias_accessor (Optional) Accessor of the bias node data + * @param[in] weights_quant_info (Optional) Weights quantization info + * @param[in] out_quant_info (Optional) Output quantization info * * @return Node ID of the created node, EmptyNodeID in case of error */ static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info, unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT, - ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr); + ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, + const QuantizationInfo weights_quant_info = QuantizationInfo(), + const QuantizationInfo out_quant_info = QuantizationInfo()); /** Adds a depth concatenate node to the graph * * @param[in] g Graph to add the node to @@ -135,13 +139,14 @@ public: * @param[in] method (Optional) Convolution method to use * @param[in] weights_accessor (Optional) Accessor of the weights node data * @param[in] bias_accessor (Optional) Accessor of the bias node data + * @param[in] quant_info (Optional) Weights quantization info * * @return Node ID of the created node, EmptyNodeID in case of error */ static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, PadStrideInfo conv_info, DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT, - ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr); + ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo()); /** Adds an element-wise layer node to the graph * * @param[in] g Graph to add the node to diff --git a/arm_compute/graph/TensorDescriptor.h b/arm_compute/graph/TensorDescriptor.h index fc0095e3b9..785c493cbc 100644 --- a/arm_compute/graph/TensorDescriptor.h +++ b/arm_compute/graph/TensorDescriptor.h @@ -37,18 +37,20 @@ struct TensorDescriptor final TensorDescriptor() = default; /** Constructor * - * @param[in] tensor_shape Tensor shape - * @param[in] tensor_data_type Tensor data type - * @param[in] tensor_target Target to allocate the tensor for + * @param[in] tensor_shape Tensor shape + * @param[in] tensor_data_type Tensor data type + * @param[in] tensor_quant_info Tensor quantization info + * @param[in] tensor_target Target to allocate the tensor for */ - TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, Target tensor_target = Target::UNSPECIFIED) - : shape(tensor_shape), data_type(tensor_data_type), target(tensor_target) + TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, QuantizationInfo tensor_quant_info = QuantizationInfo(), Target tensor_target = Target::UNSPECIFIED) + : shape(tensor_shape), data_type(tensor_data_type), quant_info(tensor_quant_info), target(tensor_target) { } - TensorShape shape{}; /**< Tensor shape */ - DataType data_type{ DataType::UNKNOWN }; /**< Data type */ - Target target{ Target::UNSPECIFIED }; /**< Target */ + TensorShape shape{}; /**< Tensor shape */ + DataType data_type{ DataType::UNKNOWN }; /**< Data type */ + QuantizationInfo quant_info{}; /**< Quantization info */ + Target target{ Target::UNSPECIFIED }; /**< Target */ }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h index d9bc8376bd..ed578307b5 100644 --- a/arm_compute/graph/TypePrinter.h +++ b/arm_compute/graph/TypePrinter.h @@ -308,6 +308,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const PadStrideInfo &pad_s return os; } + +/** Formatted output of the QuantizationInfo type. */ +inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &quantization_info) +{ + os << "Scale:" << quantization_info.scale << "~" + << "Offset:" << quantization_info.offset; + return os; +} } // namespace graph } // namespace arm_compute #endif /* __ARM_COMPUTE_GRAPH_TYPE_PRINTER_H__ */ diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h index ca01295d15..68a718ab97 100644 --- a/arm_compute/graph/backends/ValidateHelpers.h +++ b/arm_compute/graph/backends/ValidateHelpers.h @@ -70,12 +70,18 @@ Status validate_convolution_layer(ConvolutionLayerNode &node) ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - arm_compute::ITensorInfo *input = get_backing_tensor_info(node.input(0)); - arm_compute::ITensorInfo *weights = get_backing_tensor_info(node.input(1)); - arm_compute::ITensorInfo *biases = get_backing_tensor_info(node.input(2)); - arm_compute::ITensorInfo *output = get_backing_tensor_info(node.output(0)); - const PadStrideInfo conv_info = node.convolution_info(); - const ConvolutionMethod conv_algorithm = node.convolution_method(); + arm_compute::ITensorInfo *input = get_backing_tensor_info(node.input(0)); + arm_compute::ITensorInfo *weights = get_backing_tensor_info(node.input(1)); + arm_compute::ITensorInfo *biases = get_backing_tensor_info(node.input(2)); + arm_compute::ITensorInfo *output = get_backing_tensor_info(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->data_type())) + { + biases->set_data_type(DataType::S32); + } + + const PadStrideInfo conv_info = node.convolution_info(); + const ConvolutionMethod conv_algorithm = node.convolution_method(); // Validate function Status status{}; diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h index 22133b8376..2e7c50e6da 100644 --- a/arm_compute/graph/frontend/Layers.h +++ b/arm_compute/graph/frontend/Layers.h @@ -160,28 +160,34 @@ class ConvolutionLayer final : public ILayer public: /** Construct a convolution layer. * - * @param[in] conv_width Convolution width. - * @param[in] conv_height Convolution height. - * @param[in] ofm Output feature map. - * @param[in] weights Accessor to get kernel weights from. - * @param[in] bias Accessor to get kernel bias from. - * @param[in] conv_info Padding and stride information. - * @param[in] num_groups (Optional) Number of groups. Default: 1. + * @param[in] conv_width Convolution width. + * @param[in] conv_height Convolution height. + * @param[in] ofm Output feature map. + * @param[in] weights Accessor to get kernel weights from. + * @param[in] bias Accessor to get kernel bias from. + * @param[in] conv_info Padding and stride information. + * @param[in] num_groups (Optional) Number of groups. Default: 1. + * @param[in] weights_quant_info (Optional) Weights quantization information + * @param[in] out_quant_info (Optional) Output quantization info */ - ConvolutionLayer(unsigned int conv_width, - unsigned int conv_height, - unsigned int ofm, - ITensorAccessorUPtr weights, - ITensorAccessorUPtr bias, - PadStrideInfo conv_info, - unsigned int num_groups = 1) + ConvolutionLayer(unsigned int conv_width, + unsigned int conv_height, + unsigned int ofm, + ITensorAccessorUPtr weights, + ITensorAccessorUPtr bias, + PadStrideInfo conv_info, + unsigned int num_groups = 1, + const QuantizationInfo weights_quant_info = QuantizationInfo(), + const QuantizationInfo out_quant_info = QuantizationInfo()) : _conv_width(conv_width), _conv_height(conv_height), _ofm(ofm), _conv_info(std::move(conv_info)), _num_groups(num_groups), _weights(std::move(weights)), - _bias(std::move(bias)) + _bias(std::move(bias)), + _weights_quant_info(std::move(weights_quant_info)), + _out_quant_info(std::move(out_quant_info)) { } @@ -192,17 +198,19 @@ public: return GraphBuilder::add_convolution_node(s.graph(), common_params, input, Size2D(_conv_width, _conv_height), _ofm, _conv_info, _num_groups, s.hints().convolution_method_hint, - std::move(_weights), std::move(_bias)); + std::move(_weights), std::move(_bias), std::move(_weights_quant_info), std::move(_out_quant_info)); } private: - unsigned int _conv_width; - unsigned int _conv_height; - unsigned int _ofm; - const PadStrideInfo _conv_info; - unsigned int _num_groups; - ITensorAccessorUPtr _weights; - ITensorAccessorUPtr _bias; + unsigned int _conv_width; + unsigned int _conv_height; + unsigned int _ofm; + const PadStrideInfo _conv_info; + unsigned int _num_groups; + ITensorAccessorUPtr _weights; + ITensorAccessorUPtr _bias; + const QuantizationInfo _weights_quant_info; + const QuantizationInfo _out_quant_info; }; /** Depthwise Convolution Layer */ @@ -216,17 +224,20 @@ public: * @param[in] weights Accessor to get kernel weights from. * @param[in] bias Accessor to get kernel bias from. * @param[in] conv_info Padding and stride information. + * @param[in] quant_info (Optional) Quantization info used for weights */ - DepthwiseConvolutionLayer(unsigned int conv_width, - unsigned int conv_height, - ITensorAccessorUPtr weights, - ITensorAccessorUPtr bias, - PadStrideInfo conv_info) + DepthwiseConvolutionLayer(unsigned int conv_width, + unsigned int conv_height, + ITensorAccessorUPtr weights, + ITensorAccessorUPtr bias, + PadStrideInfo conv_info, + const QuantizationInfo quant_info = QuantizationInfo()) : _conv_width(conv_width), _conv_height(conv_height), _conv_info(std::move(conv_info)), _weights(std::move(weights)), - _bias(std::move(bias)) + _bias(std::move(bias)), + _quant_info(std::move(quant_info)) { } @@ -237,15 +248,16 @@ public: return GraphBuilder::add_depthwise_convolution_node(s.graph(), common_params, input, Size2D(_conv_width, _conv_height), _conv_info, s.hints().depthwise_convolution_method_hint, - std::move(_weights), std::move(_bias)); + std::move(_weights), std::move(_bias), std::move(_quant_info)); } private: - unsigned int _conv_width; - unsigned int _conv_height; - const PadStrideInfo _conv_info; - ITensorAccessorUPtr _weights; - ITensorAccessorUPtr _bias; + unsigned int _conv_width; + unsigned int _conv_height; + const PadStrideInfo _conv_info; + ITensorAccessorUPtr _weights; + ITensorAccessorUPtr _bias; + const QuantizationInfo _quant_info; }; /** Flatten Layer */ diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h index 70fefbeeab..d029895609 100644 --- a/arm_compute/graph/nodes/ConvolutionLayerNode.h +++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h @@ -36,10 +36,11 @@ class ConvolutionLayerNode final : public INode public: /** Constructor * - * @param[in] info Convolution layer attributes - * @param[in] method (Optional) Convolution method to use + * @param[in] info Convolution layer attributes + * @param[in] method (Optional) Convolution method to use + * @param[in] out_quant_info (Optional) Output quantization info */ - ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT); + ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, QuantizationInfo out_quant_info = QuantizationInfo()); /** Sets the convolution layer method to use * * @param[in] method Method to use for convolution @@ -78,6 +79,7 @@ public: private: PadStrideInfo _info; ConvolutionMethod _method; + QuantizationInfo _out_quant_info; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 5e05e93855..3c9fb0ea5f 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -106,6 +106,8 @@ private: int32_t _b_offset; bool _run_vector_matrix_multiplication; bool _dot_product_path; + bool _is_first_run; + bool _reshape_b_only_on_first_run; }; } #endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */ diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp index cb49ffdfb4..ddf6175c4a 100644 --- a/examples/graph_mobilenet_qasymm8.cpp +++ b/examples/graph_mobilenet_qasymm8.cpp @@ -28,6 +28,7 @@ #include <cstdlib> +using namespace arm_compute; using namespace arm_compute::utils; using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; @@ -40,8 +41,6 @@ using namespace arm_compute::graph_utils; class GraphMobileNetQASYMM8Example : public Example { public: - //FIXME: Missing quantization info to the tensor descriptor (Giorgio is working on it) -#if 0 void do_setup(int argc, char **argv) override { std::string data_path; /* Path to the trainable data */ @@ -93,8 +92,8 @@ public: }; // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); // Parse arguments if(argc < 2) @@ -123,54 +122,56 @@ public: } graph << target_hint - << arm_compute::graph::Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::QASYMM8, in_quant_info), - get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input)) + << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info), + get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input)) << ConvolutionLayer( 3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"), get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), - 1, WeightsInfo(), - conv_weights_quant_info.at(0), - mid_quant_info) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) - << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0)) - << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), - point_weights_quant_info.at(1)) - << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), - point_weights_quant_info.at(2)) - << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), - point_weights_quant_info.at(3)) - << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), - point_weights_quant_info.at(4)) - << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), - point_weights_quant_info.at(5)) - << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), - point_weights_quant_info.at(6)) - << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), - point_weights_quant_info.at(7)) - << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), - point_weights_quant_info.at(8)) - << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), - point_weights_quant_info.at(9)) - << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), - point_weights_quant_info.at(10)) - << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), - point_weights_quant_info.at(11)) - << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), + 1, conv_weights_quant_info.at(0), mid_quant_info) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)); + graph << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0)); + graph << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), + point_weights_quant_info.at(1)); + graph << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), + point_weights_quant_info.at(2)); + graph << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), + point_weights_quant_info.at(3)); + graph << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), + point_weights_quant_info.at(4)); + graph << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), + point_weights_quant_info.at(5)); + graph << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), + point_weights_quant_info.at(6)); + graph << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), + point_weights_quant_info.at(7)); + graph << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), + point_weights_quant_info.at(8)); + graph << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), + point_weights_quant_info.at(9)); + graph << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), + point_weights_quant_info.at(10)); + graph << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), + point_weights_quant_info.at(11)); + graph << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), point_weights_quant_info.at(12)) << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << ConvolutionLayer( 1U, 1U, 1001U, get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"), get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"), - PadStrideInfo(1U, 1U, 0U, 0U), 1, WeightsInfo(), conv_weights_quant_info.at(1)) + PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1)) << ReshapeLayer(TensorShape(1001U)) << SoftmaxLayer() - << arm_compute::graph::Tensor(get_output_accessor(label, 5)); + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_function_memory_manager = true; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -179,7 +180,7 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "MobileNetV1_QASYMM8" }; /** This function produces a depthwise separable convolution node (i.e. depthwise + pointwise layers) with ReLU6 activation after each layer. * @@ -199,29 +200,23 @@ private: QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info) { std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_"; - SubGraph sg; + SubStream sg(graph); sg << DepthwiseConvolutionLayer( 3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_weights.npy"), get_weights_accessor(data_path, total_path + "depthwise_bias.npy"), - dwc_pad_stride_info, - true, - depth_weights_quant_info) + dwc_pad_stride_info, depth_weights_quant_info) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) << ConvolutionLayer( 1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), get_weights_accessor(data_path, total_path + "pointwise_bias.npy"), - conv_pad_stride_info, - 1, WeightsInfo(), - point_weights_quant_info) + conv_pad_stride_info, 1, point_weights_quant_info) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)); return BranchLayer(std::move(sg)); } -#endif /* if 0 */ - Stream graph { 0, "MobileNetV1_QASYMM8" }; }; /** Main program for MobileNetQASYMM8 * diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp index 539d0f84b3..676938a231 100644 --- a/src/core/TensorInfo.cpp +++ b/src/core/TensorInfo.cpp @@ -322,7 +322,7 @@ ITensorInfo &TensorInfo::set_data_type(DataType data_type) { _data_type = data_type; _format = Format::UNKNOWN; - return *this; + return set_tensor_shape(tensor_shape()); // Force total size and strides to update } ITensorInfo &TensorInfo::set_num_channels(int num_channels) diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp index 0d1bdc3596..4ad34e789c 100644 --- a/src/graph/GraphBuilder.cpp +++ b/src/graph/GraphBuilder.cpp @@ -206,7 +206,9 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info, unsigned int num_groups, ConvolutionMethod method, - ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor) + ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, + const QuantizationInfo weights_quant_info, + const QuantizationInfo out_quant_info) { CHECK_NODEIDX_PAIR(input, g); ARM_COMPUTE_ERROR_ON(depth == 0); @@ -220,7 +222,13 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa // Create weights node TensorDescriptor w_desc = input_tensor_desc; w_desc.shape = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z() / num_groups, depth); - NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor)); + + if(!weights_quant_info.empty()) + { + w_desc.quant_info = weights_quant_info; + } + + NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor)); // Create bias nodes NodeID b_nid = EmptyNodeID; @@ -234,7 +242,7 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa if(num_groups == 1) { // Create convolution node and connect - NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method); + NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, out_quant_info); g.add_connection(input.node_id, input.index, conv_nid, 0); g.add_connection(w_nid, 0, conv_nid, 1); if(has_bias) @@ -270,7 +278,7 @@ NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, PadStrideInfo conv_info, DepthwiseConvolutionMethod method, - ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor) + ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo quant_info) { CHECK_NODEIDX_PAIR(input, g); ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0)); @@ -283,7 +291,13 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, // Create weights node TensorDescriptor w_desc = input_tensor_desc; w_desc.shape = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z()); - NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor)); + + if(!quant_info.empty()) + { + w_desc.quant_info = quant_info; + } + + NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor)); // Create bias nodes NodeID b_nid = EmptyNodeID; diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index f10eb33a98..92cb6936c3 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -126,7 +126,7 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::CL); // Create backend tensor handle - TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type); + TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info); auto backend_tensor_handle = support::cpp14::make_unique<CLTensorHandle>(info); return std::move(backend_tensor_handle); diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 1b448fefd2..ad73a797e3 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -154,10 +154,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ICLTensor *input = get_backing_tensor(node.input(0)); - ICLTensor *weights = get_backing_tensor(node.input(1)); - ICLTensor *biases = get_backing_tensor(node.input(2)); - ICLTensor *output = get_backing_tensor(node.output(0)); + ICLTensor *input = get_backing_tensor(node.input(0)); + ICLTensor *weights = get_backing_tensor(node.input(1)); + ICLTensor *biases = get_backing_tensor(node.input(2)); + ICLTensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const ConvolutionMethod conv_algorithm = node.convolution_method(); @@ -190,6 +196,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() @@ -251,10 +259,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ICLTensor *input = get_backing_tensor(node.input(0)); - ICLTensor *weights = get_backing_tensor(node.input(1)); - ICLTensor *biases = get_backing_tensor(node.input(2)); - ICLTensor *output = get_backing_tensor(node.output(0)); + ICLTensor *input = get_backing_tensor(node.input(0)); + ICLTensor *weights = get_backing_tensor(node.input(1)); + ICLTensor *biases = get_backing_tensor(node.input(2)); + ICLTensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method(); @@ -275,6 +289,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp index 8cd9994744..a55215f058 100644 --- a/src/graph/backends/GLES/GCDeviceBackend.cpp +++ b/src/graph/backends/GLES/GCDeviceBackend.cpp @@ -87,7 +87,7 @@ std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tens ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::GC); // Create backend tensor handle - TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type); + TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info); auto backend_tensor_handle = support::cpp14::make_unique<GCTensorHandle>(info); return std::move(backend_tensor_handle); diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 12e7c042d4..d3c5737e68 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -154,10 +154,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - IGCTensor *input = get_backing_tensor(node.input(0)); - IGCTensor *weights = get_backing_tensor(node.input(1)); - IGCTensor *biases = get_backing_tensor(node.input(2)); - IGCTensor *output = get_backing_tensor(node.output(0)); + IGCTensor *input = get_backing_tensor(node.input(0)); + IGCTensor *weights = get_backing_tensor(node.input(1)); + IGCTensor *biases = get_backing_tensor(node.input(2)); + IGCTensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const ConvolutionMethod conv_algorithm = node.convolution_method(); @@ -180,6 +186,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() @@ -241,10 +249,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - IGCTensor *input = get_backing_tensor(node.input(0)); - IGCTensor *weights = get_backing_tensor(node.input(1)); - IGCTensor *biases = get_backing_tensor(node.input(2)); - IGCTensor *output = get_backing_tensor(node.output(0)); + IGCTensor *input = get_backing_tensor(node.input(0)); + IGCTensor *weights = get_backing_tensor(node.input(1)); + IGCTensor *biases = get_backing_tensor(node.input(2)); + IGCTensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method(); @@ -264,6 +278,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp index aaf05829bb..9123196540 100644 --- a/src/graph/backends/NEON/NEDeviceBackend.cpp +++ b/src/graph/backends/NEON/NEDeviceBackend.cpp @@ -93,7 +93,7 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tens ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::NEON); // Create backend tensor handle - TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type); + TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info); auto backend_tensor_handle = support::cpp14::make_unique<NETensorHandle>(info); return std::move(backend_tensor_handle); diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 228af9ca6f..906378c565 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -140,10 +140,16 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ITensor *input = get_backing_tensor(node.input(0)); - ITensor *weights = get_backing_tensor(node.input(1)); - ITensor *biases = get_backing_tensor(node.input(2)); - ITensor *output = get_backing_tensor(node.output(0)); + ITensor *input = get_backing_tensor(node.input(0)); + ITensor *weights = get_backing_tensor(node.input(1)); + ITensor *biases = get_backing_tensor(node.input(2)); + ITensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const ConvolutionMethod conv_algorithm = node.convolution_method(); @@ -175,6 +181,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() @@ -234,10 +242,16 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ITensor *input = get_backing_tensor(node.input(0)); - ITensor *weights = get_backing_tensor(node.input(1)); - ITensor *biases = get_backing_tensor(node.input(2)); - ITensor *output = get_backing_tensor(node.output(0)); + ITensor *input = get_backing_tensor(node.input(0)); + ITensor *weights = get_backing_tensor(node.input(1)); + ITensor *biases = get_backing_tensor(node.input(2)); + ITensor *output = get_backing_tensor(node.output(0)); + + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + biases->info()->set_data_type(DataType::S32); + } + const PadStrideInfo conv_info = node.convolution_info(); const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method(); @@ -258,6 +272,8 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name << " Data Type: " << input->info()->data_type() + << " Input QuantInfo: " << input->info()->quantization_info() + << " Weights QuantInfo: " << weights->info()->quantization_info() << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp index 461728487f..eb0c6a1c1a 100644 --- a/src/graph/nodes/ConvolutionLayerNode.cpp +++ b/src/graph/nodes/ConvolutionLayerNode.cpp @@ -31,8 +31,8 @@ namespace arm_compute { namespace graph { -ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method) - : _info(std::move(info)), _method(method) +ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, QuantizationInfo out_quant_info) + : _info(std::move(info)), _method(method), _out_quant_info(out_quant_info) { _input_edges.resize(3, EmptyEdgeID); _outputs.resize(1, NullTensorID); @@ -90,6 +90,12 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const TensorDescriptor output_info = src->desc(); TensorShape output_shape = compute_output_shape(src->desc().shape, weights->desc().shape, _info); output_info.shape = output_shape; + + if(!_out_quant_info.empty()) + { + output_info.quant_info = _out_quant_info; + } + return output_info; } diff --git a/src/graph/nodes/SoftmaxLayerNode.cpp b/src/graph/nodes/SoftmaxLayerNode.cpp index 4c21ac6ad0..b6241e6654 100644 --- a/src/graph/nodes/SoftmaxLayerNode.cpp +++ b/src/graph/nodes/SoftmaxLayerNode.cpp @@ -63,7 +63,10 @@ TensorDescriptor SoftmaxLayerNode::configure_output(size_t idx) const const Tensor *src = input(0); ARM_COMPUTE_ERROR_ON(src == nullptr); - return src->desc(); + TensorDescriptor out_desc = src->desc(); + out_desc.quant_info = QuantizationInfo(1.f / 256.f, 0); + + return out_desc; } Status SoftmaxLayerNode::validate() diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 5f747bc477..711b006ede 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -102,7 +102,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor matrix_b = &_tmp_b; _memory_group.manage(&_tmp_a); - _memory_group.manage(&_tmp_b); + if(!_reshape_b_only_on_first_run) + { + _memory_group.manage(&_tmp_b); + } // Configure interleave kernel _mtx_a_reshape_kernel.configure(a, &_tmp_a, mult_interleave4x4_height); @@ -119,7 +122,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor { TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32); _vector_sum_col.allocator()->init(info_vector_sum_col); - _memory_group.manage(&_vector_sum_col); + if(!_reshape_b_only_on_first_run) + { + _memory_group.manage(&_vector_sum_col); + } // Configure Matrix B reduction kernel _mtx_b_reduction_kernel.configure(b, &_vector_sum_col); diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 18e6e919c3..e0859be93e 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -107,7 +107,10 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe // Manage intermediate buffers _memory_group.manage(&_tmp_a); - _memory_group.manage(&_tmp_b); + if(!_reshape_b_only_on_first_run) + { + _memory_group.manage(&_tmp_b); + } int m = a->info()->dimension(1); int n = b->info()->dimension(0); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 7f25c2e717..3c48d691ed 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -109,14 +109,6 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); } - // Checks performed when biases are present - if(append_bias) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); - ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3)); - ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); - } - if(transpose1xW) { TensorInfo weights_reshaped = weights->clone()->set_tensor_shape(get_reshaped_weights_shape(weights, append_bias)); @@ -344,7 +336,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM - if(!_is_fully_connected_convolution && !run_optimised) + if(!_is_fully_connected_convolution && !run_optimised && _is_interleaved) { TensorShape shape_interleaved(shape_im2col); shape_interleaved.set(0, shape_interleaved.x() * 4); @@ -362,7 +354,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position()); info_gemm.set_quantization_info(output->info()->quantization_info()); _gemm_output.allocator()->init(info_gemm); - _memory_group.manage(&_gemm_output); + + // FIXME: enabling memory manager for _gemm_output gives incorrect results (maybe bound to the assembly kernel in GEMMLowp?) + // _memory_group.manage(&_gemm_output); // Configure kernels // Configure im2col @@ -491,7 +485,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI reshaped_weights->set_tensor_shape(get_reshaped_weights_shape_conv(weights, append_bias, is_fully_connected_convolution)); ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, biases, reshaped_weights.get(), !is_fully_connected_convolution /* 1xW transpose */)); } - else + else if(!is_quantized) { TensorShape reshaped_weights_shape; diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 7372c6ca57..cbec73fc31 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -43,19 +43,19 @@ using namespace arm_compute::misc::shape_calculator; NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), _asm_glue_unsigned(), _asm_glue_signed(), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _workspace(), _a_offset(0), _b_offset(0), _run_vector_matrix_multiplication(false), - _dot_product_path(false) + _dot_product_path(false), _is_first_run(true), _reshape_b_only_on_first_run(false) { } void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, ITensor *output, const GEMMInfo &gemm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_UNUSED(gemm_info); ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), output->info(), gemm_info)); _a_offset = a->info()->quantization_info().offset; _b_offset = b->info()->quantization_info().offset; _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; + _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); #ifdef __aarch64__ switch(a->info()->data_type()) @@ -98,7 +98,10 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _tmp_a.allocator()->init(info_a); _tmp_b.allocator()->init(info_b); _memory_group.manage(&_tmp_a); - _memory_group.manage(&_tmp_b); + if(!_reshape_b_only_on_first_run) + { + _memory_group.manage(&_tmp_b); + } // Configure interleave kernel { @@ -129,7 +132,10 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32); _vector_sum_col.allocator()->init(info_vector_sum_col); - _memory_group.manage(&_vector_sum_col); + if(!_reshape_b_only_on_first_run) + { + _memory_group.manage(&_vector_sum_col); + } // Configure Matrix B reduction kernel _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false); @@ -252,7 +258,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY); } - if(_mtx_b_reshape_kernel) + if(_mtx_b_reshape_kernel && (_is_first_run || !_reshape_b_only_on_first_run)) { NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } @@ -278,7 +284,7 @@ void NEGEMMLowpMatrixMultiplyCore::run() } // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if(_a_offset != 0) + if(_a_offset != 0 && (_is_first_run || !_reshape_b_only_on_first_run)) { NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); } @@ -287,4 +293,6 @@ void NEGEMMLowpMatrixMultiplyCore::run() NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY); _memory_group.release(); + + _is_first_run = false; } |