From 16dddd2af57a71ca10d62a4412d014f859720d2c Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Wed, 27 May 2020 15:03:48 +0100 Subject: COMPMID-3381: Implement graph example for YoLo v3 output detector Add sub/exp/splitv support in graph api Signed-off-by: Sheri Zhang Change-Id: I4e08cc19a46655717068b12c93d67e619a595d9a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3309 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/graph/backends/CL/CLFunctionsFactory.cpp | 8 +++ src/graph/backends/CL/CLNodeValidator.cpp | 18 ++++++ src/graph/backends/NEON/NEFunctionFactory.cpp | 12 +++- src/graph/backends/NEON/NENodeValidator.cpp | 18 ++++++ src/graph/mutators/NodeFusionMutator.cpp | 6 ++ src/graph/mutators/SplitLayerSubTensorMutator.cpp | 6 +- src/graph/nodes/EltwiseLayerNode.cpp | 62 ++++++++++++++++++++ src/graph/nodes/SplitLayerNode.cpp | 71 ++++++++++++++++++----- 8 files changed, 181 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 312e09a49a..cf494e9a67 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -65,6 +65,12 @@ struct CLEltwiseFunctions using Multiplication = CLPixelWiseMultiplication; }; +/** Collection of CL unary element-wise functions */ +struct CLUnaryEltwiseFunctions +{ + using Exp = CLExpLayer; +}; + /** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */ struct CLFusedLayerTypes { @@ -252,6 +258,8 @@ std::unique_ptr CLFunctionFactory::create(INode *node, GraphContext & return detail::create_detection_post_process_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: return detail::create_eltwise_layer(*polymorphic_downcast(node)); + case NodeType::UnaryEltwiseLayer: + return detail::create_unary_eltwise_layer(*polymorphic_downcast(node)); case NodeType::FlattenLayer: return detail::create_flatten_layer(*polymorphic_downcast(node)); case NodeType::FullyConnectedLayer: diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp index ddb8e3d1ac..15b54aedee 100644 --- a/src/graph/backends/CL/CLNodeValidator.cpp +++ b/src/graph/backends/CL/CLNodeValidator.cpp @@ -38,6 +38,20 @@ namespace graph { namespace backends { +/** Collection of CL element-wise functions */ +struct CLEltwiseLayerFunctions +{ + using ArithmeticAddition = CLArithmeticAddition; + using ArithmeticSubtraction = CLArithmeticSubtraction; + using PixelWiseMultiplication = CLPixelWiseMultiplication; +}; + +/** Collection of CL unary element-wise functions */ +struct CLUnaryEltwiseLayerFunctions +{ + using ExpLayer = CLExpLayer; +}; + Status CLNodeValidator::validate(INode *node) { if(node == nullptr) @@ -91,6 +105,10 @@ Status CLNodeValidator::validate(INode *node) return detail::validate_upsample_layer(*polymorphic_downcast(node)); case NodeType::YOLOLayer: return detail::validate_yolo_layer(*polymorphic_downcast(node)); + case NodeType::EltwiseLayer: + return detail::validate_eltwise_Layer(*polymorphic_downcast(node)); + case NodeType::UnaryEltwiseLayer: + return detail::validate_unary_eltwise_layer(*polymorphic_downcast(node)); default: return Status{}; } diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 454215e7ec..0b3036cb4e 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -53,7 +53,7 @@ struct NETargetInfo Target NETargetInfo::TargetType = Target::NEON; -/** Collection of CL convolution functions */ +/** Collection of NEON convolution functions */ struct NEConvolutionLayerFunctions { using GenericConvolutionLayer = NEConvolutionLayer; @@ -62,7 +62,7 @@ struct NEConvolutionLayerFunctions using WinogradConvolutionLayer = NEWinogradConvolutionLayer; }; -/** Collection of CL element-wise functions */ +/** Collection of NEON element-wise functions */ struct NEEltwiseFunctions { using Addition = NEArithmeticAddition; @@ -70,6 +70,12 @@ struct NEEltwiseFunctions using Multiplication = NEPixelWiseMultiplication; }; +/** Collection of NEON unary element-wise functions */ +struct NEUnaryEltwiseFunctions +{ + using Exp = NEExpLayer; +}; + /** Function and tensor types to be used inside a NEON fused convolution/batch normalization layer */ struct NEFusedLayerTypes { @@ -143,6 +149,8 @@ std::unique_ptr NEFunctionFactory::create(INode *node, GraphContext & return detail::create_detection_post_process_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: return detail::create_eltwise_layer(*polymorphic_downcast(node)); + case NodeType::UnaryEltwiseLayer: + return detail::create_unary_eltwise_layer(*polymorphic_downcast(node)); case NodeType::FlattenLayer: return detail::create_flatten_layer(*polymorphic_downcast(node)); case NodeType::FullyConnectedLayer: diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp index 0a3107292b..d4af3133be 100644 --- a/src/graph/backends/NEON/NENodeValidator.cpp +++ b/src/graph/backends/NEON/NENodeValidator.cpp @@ -38,6 +38,20 @@ namespace graph { namespace backends { +/** Collection of NEON element-wise functions */ +struct NEEltwiseLayerFunctions +{ + using ArithmeticAddition = NEArithmeticAddition; + using ArithmeticSubtraction = NEArithmeticSubtraction; + using PixelWiseMultiplication = NEPixelWiseMultiplication; +}; + +/** Collection of NEON unary element-wise functions */ +struct NEUnaryEltwiseLayerFunctions +{ + using ExpLayer = NEExpLayer; +}; + Status NENodeValidator::validate(INode *node) { if(node == nullptr) @@ -91,6 +105,10 @@ Status NENodeValidator::validate(INode *node) return detail::validate_upsample_layer(*polymorphic_downcast(node)); case NodeType::YOLOLayer: return detail::validate_yolo_layer(*polymorphic_downcast(node)); + case NodeType::EltwiseLayer: + return detail::validate_eltwise_Layer(*polymorphic_downcast(node)); + case NodeType::UnaryEltwiseLayer: + return detail::validate_unary_eltwise_layer(*polymorphic_downcast(node)); default: return Status{}; } diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index ae53b8ff75..7528520cc7 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -226,6 +226,12 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set return; } + // EltwiseLayerNode can only be fused when dataype is float + if(n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type)) + { + return; + } + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id() << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl); diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp index 3ba73071ed..76180856c3 100644 --- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp +++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -82,7 +82,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) auto *split_node = arm_compute::utils::cast::polymorphic_downcast(node); - const unsigned int axis = split_node->axis(); + const int axis = split_node->axis(); const unsigned int num_splits = split_node->num_splits(); const bool extend_parent = (axis < 2); @@ -92,7 +92,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) Tensor *output_tensor = node->output(i); const TensorShape output_shape = output_tensor->desc().shape; Coordinates coords; - std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); + std::tie(std::ignore, coords) = split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i); backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target); std::unique_ptr handle = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent); diff --git a/src/graph/nodes/EltwiseLayerNode.cpp b/src/graph/nodes/EltwiseLayerNode.cpp index 92d183e693..40dcef13fb 100644 --- a/src/graph/nodes/EltwiseLayerNode.cpp +++ b/src/graph/nodes/EltwiseLayerNode.cpp @@ -57,6 +57,11 @@ ActivationLayerInfo EltwiseLayerNode::fused_activation() const return descriptor.fused_activation; } +QuantizationInfo EltwiseLayerNode::output_quant_info() const +{ + return descriptor.out_quant_info; +} + void EltwiseLayerNode::set_fused_activation(ActivationLayerInfo fused_activation) { descriptor.fused_activation = fused_activation; @@ -100,5 +105,62 @@ void EltwiseLayerNode::accept(INodeVisitor &v) { v.visit(*this); } + +UnaryEltwiseLayerNode::UnaryEltwiseLayerNode(const descriptors::UnaryEltwiseLayerDescriptor &descriptor) + : descriptor(descriptor) +{ + _input_edges.resize(1, EmptyEdgeID); + _outputs.resize(1, NullTensorID); +} + +descriptors::UnaryEltwiseLayerDescriptor UnaryEltwiseLayerNode::eltwise_descriptor() const +{ + return descriptor; +} + +void UnaryEltwiseLayerNode::set_fused_activation(ActivationLayerInfo fused_activation) +{ + descriptor.fused_activation = fused_activation; +} + +bool UnaryEltwiseLayerNode::forward_descriptors() +{ + if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID)) + { + Tensor *dst = output(0); + ARM_COMPUTE_ERROR_ON(dst == nullptr); + dst->desc() = configure_output(0); + return true; + } + return false; +} + +TensorDescriptor UnaryEltwiseLayerNode::configure_output(size_t idx) const +{ + ARM_COMPUTE_UNUSED(idx); + + const Tensor *src = input(0); + ARM_COMPUTE_ERROR_ON(src == nullptr); + + auto output_info = src->desc(); + + if(!descriptor.out_quant_info.empty()) + { + output_info.set_quantization_info(descriptor.out_quant_info); + } + + return output_info; +} + +NodeType UnaryEltwiseLayerNode::type() const +{ + return NodeType::UnaryEltwiseLayer; +} + +void UnaryEltwiseLayerNode::accept(INodeVisitor &v) +{ + v.visit(*this); +} + } // namespace graph } // namespace arm_compute diff --git a/src/graph/nodes/SplitLayerNode.cpp b/src/graph/nodes/SplitLayerNode.cpp index 5d46c9dcc9..7bc69c4667 100644 --- a/src/graph/nodes/SplitLayerNode.cpp +++ b/src/graph/nodes/SplitLayerNode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/graph/nodes/SplitLayerNode.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/INodeVisitor.h" @@ -31,8 +32,8 @@ namespace arm_compute { namespace graph { -SplitLayerNode::SplitLayerNode(unsigned int num_splits, unsigned int axis) - : _num_splits(num_splits), _axis(axis) +SplitLayerNode::SplitLayerNode(unsigned int num_splits, int axis, std::vector size_splits) + : _num_splits(num_splits), _axis(axis), _size_splits(size_splits) { _input_edges.resize(1, EmptyEdgeID); _outputs.resize(num_splits, NullTensorID); @@ -49,15 +50,34 @@ unsigned int SplitLayerNode::axis() const } std::pair SplitLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor, - unsigned int num_splits, unsigned int axis, unsigned int idx) + unsigned int num_splits, int axis, unsigned int idx) { - const unsigned int split_size = input_descriptor.shape[axis] / num_splits; - + // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis). + int num_dimension = static_cast(input_descriptor.shape.num_dimensions()); + int tmp_axis = wrap_around(axis, num_dimension); + Coordinates coords; TensorDescriptor output_descriptor = input_descriptor; - output_descriptor.shape.set(axis, split_size); - - Coordinates coords; - coords.set(axis, idx * split_size); + int split_size = input_descriptor.shape[tmp_axis] / num_splits; + if(_size_splits.empty()) + { + output_descriptor.shape.set(tmp_axis, split_size); + coords.set(tmp_axis, idx * split_size); + } + else + { + int split_size = _size_splits[idx]; + if(split_size == -1) + { + split_size = input_descriptor.shape[tmp_axis]; + for(unsigned int i = 0; i < _size_splits.size() - 1; ++i) + split_size -= _size_splits[i]; + } + output_descriptor.shape.set(tmp_axis, split_size); + int coord_value = 0; + for(unsigned int i = 0; i < idx; ++i) + coord_value += _size_splits[i]; + coords.set(tmp_axis, coord_value); + } return std::make_pair(output_descriptor, coords); } @@ -89,18 +109,39 @@ TensorDescriptor SplitLayerNode::configure_output(size_t idx) const const Tensor *src = input(0); ARM_COMPUTE_ERROR_ON(src == nullptr); - TensorDescriptor output_info; - std::tie(output_info, std::ignore) = compute_output_descriptor(src->desc(), _num_splits, _axis, idx); + TensorDescriptor input_descriptor = src->desc(); + TensorDescriptor output_descriptor = input_descriptor; - return output_info; + // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis). + int num_dimension = static_cast(src->desc().shape.num_dimensions()); + int tmp_axis = wrap_around(_axis, num_dimension); + + int split_size = (_size_splits.empty()) ? (input_descriptor.shape[tmp_axis] / _num_splits) : _size_splits[idx]; + if(split_size == -1) + { + split_size = input_descriptor.shape[tmp_axis]; + for(unsigned int i = 0; i < _size_splits.size() - 1; ++i) + split_size -= _size_splits[i]; + } + output_descriptor.shape.set(tmp_axis, split_size); + + return output_descriptor; } Status SplitLayerNode::validate() const { const Tensor *src = input(0); ARM_COMPUTE_RETURN_ERROR_ON(src == nullptr); - ARM_COMPUTE_RETURN_ERROR_ON(_axis >= src->desc().shape.num_dimensions()); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->desc().shape[_axis] % _num_splits, "Split should be exact"); + int num_dimension = static_cast(src->desc().shape.num_dimensions()); + ARM_COMPUTE_RETURN_ERROR_ON(_axis < (-num_dimension) || _axis >= num_dimension); + + // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis). + int tmp_axis = wrap_around(_axis, num_dimension); + + if(_size_splits.empty()) + { + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->desc().shape[tmp_axis] % _num_splits, "Split should be exact"); + } return Status{}; } -- cgit v1.2.1