From 5dea19e58a5521b05e95375c8618a37072697bc0 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 8 Nov 2019 12:13:48 +0000 Subject: COMPMID-2579: Fuse batch normalization with convolution and depthwise convolution at graph level on NEON Change-Id: Ib263a680bbd2dc1a4947102ee8d6da76b95f02bf Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/2252 Reviewed-by: Georgios Pinitas Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/graph/backends/FunctionHelpers.h | 32 ++++++++++++++++------ .../FusedConvolutionBatchNormalizationFunction.h | 4 +-- ...epthwiseConvolutionBatchNormalizationFunction.h | 4 +-- src/graph/backends/CL/CLFunctionsFactory.cpp | 5 ++-- src/graph/backends/GLES/GCFunctionsFactory.cpp | 1 + src/graph/backends/NEON/NEFunctionFactory.cpp | 4 +-- src/graph/mutators/NodeFusionMutator.cpp | 13 ++------- 7 files changed, 36 insertions(+), 27 deletions(-) diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index ee257e3abf..02bfe9dc22 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -174,11 +174,12 @@ std::unique_ptr create_batch_normalization_layer(BatchNormalizationLa * @tparam TargetInfo Target-specific information * * @param[in] node Node to create the backend function for + * @param[in] ctx Graph context * * @return Backend batch normalization layer function */ template -std::unique_ptr create_fused_convolution_batch_normalization_layer(FusedConvolutionBatchNormalizationNode &node) +std::unique_ptr create_fused_convolution_batch_normalization_layer(FusedConvolutionBatchNormalizationNode &node, GraphContext &ctx) { validate_node(node, 7 /* expected inputs */, 1 /* expected outputs */); @@ -199,9 +200,16 @@ std::unique_ptr create_fused_convolution_batch_normalization_layer(Fu const ActivationLayerInfo fused_act = node.fused_activation(); const float epsilon = node.epsilon(); + // Create and configure function (we assume that functions have been validated before creation) + std::shared_ptr mm = get_memory_manager(ctx, TargetInfo::TargetType); + std::unique_ptr func; + std::string func_name; + + using FType = FusedConvolutionBatchNormalizationFunction; + // Create and configure function - auto func = support::cpp14::make_unique>(); - func->configure(input, weights, biases, output, mean, var, beta, gamma, epsilon, conv_info, num_groups, fast_math, fused_act); + std::tie(func, func_name) = create_named_memory_managed_function( + std::string("FusedConvolutionBatchNormalizationLayer"), mm, input, weights, biases, output, mean, var, beta, gamma, epsilon, conv_info, num_groups, fast_math, fused_act); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " @@ -214,7 +222,7 @@ std::unique_ptr create_fused_convolution_batch_normalization_layer(Fu << " Output shape: " << output->info()->tensor_shape() << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "") << std::endl); - return std::move(func); + return func; } /** Create a backend fused depthwise convolution batch normalization layer function @@ -223,11 +231,12 @@ std::unique_ptr create_fused_convolution_batch_normalization_layer(Fu * @tparam TargetInfo Target-specific information * * @param[in] node Node to create the backend function for + * @param[in] ctx Graph context * * @return Backend fused depthwise convolution batch normalization layer function */ template -std::unique_ptr create_fused_depthwise_convolution_batch_normalization_layer(FusedDepthwiseConvolutionBatchNormalizationNode &node) +std::unique_ptr create_fused_depthwise_convolution_batch_normalization_layer(FusedDepthwiseConvolutionBatchNormalizationNode &node, GraphContext &ctx) { validate_node(node, 7 /* expected inputs */, 1 /* expected outputs */); @@ -247,9 +256,16 @@ std::unique_ptr create_fused_depthwise_convolution_batch_normalizatio const ActivationLayerInfo fused_act = node.fused_activation(); const float epsilon = node.epsilon(); + // Create and configure function (we assume that functions have been validated before creation) + std::shared_ptr mm = get_memory_manager(ctx, TargetInfo::TargetType); + std::unique_ptr func; + std::string func_name; + + using FType = FusedDepthwiseConvolutionBatchNormalizationFunction; + // Create and configure function - auto func = support::cpp14::make_unique>(); - func->configure(input, weights, biases, output, mean, var, beta, gamma, epsilon, conv_info, depth_multiplier, fused_act); + std::tie(func, func_name) = create_named_memory_managed_function( + std::string("FusedDepthwiseConvolutionBatchNormalizationLayer"), mm, input, weights, biases, output, mean, var, beta, gamma, epsilon, conv_info, depth_multiplier, fused_act); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " @@ -262,7 +278,7 @@ std::unique_ptr create_fused_depthwise_convolution_batch_normalizatio << " Output shape: " << output->info()->tensor_shape() << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "") << std::endl); - return std::move(func); + return func; } /** Create a backend bounding box transform layer function diff --git a/arm_compute/graph/backends/FusedConvolutionBatchNormalizationFunction.h b/arm_compute/graph/backends/FusedConvolutionBatchNormalizationFunction.h index a6da76bb06..0af3abc547 100644 --- a/arm_compute/graph/backends/FusedConvolutionBatchNormalizationFunction.h +++ b/arm_compute/graph/backends/FusedConvolutionBatchNormalizationFunction.h @@ -42,8 +42,8 @@ public: using TensorType = typename TargetInfo::TensorType; using TensorConcreteType = typename TargetInfo::TensorConcreteType; - FusedConvolutionBatchNormalizationFunction() - : _conv_layer(), _fused_batch_norm_layer(), _fused_bias(), _is_prepared(false) + FusedConvolutionBatchNormalizationFunction(std::shared_ptr memory_manager = nullptr) + : _conv_layer(memory_manager), _fused_batch_norm_layer(), _fused_bias(), _is_prepared(false) { } diff --git a/arm_compute/graph/backends/FusedDepthwiseConvolutionBatchNormalizationFunction.h b/arm_compute/graph/backends/FusedDepthwiseConvolutionBatchNormalizationFunction.h index 6f70d3c3a0..14474f4ee5 100644 --- a/arm_compute/graph/backends/FusedDepthwiseConvolutionBatchNormalizationFunction.h +++ b/arm_compute/graph/backends/FusedDepthwiseConvolutionBatchNormalizationFunction.h @@ -42,8 +42,8 @@ public: using TensorType = typename TargetInfo::TensorType; using TensorConcreteType = typename TargetInfo::TensorConcreteType; - FusedDepthwiseConvolutionBatchNormalizationFunction() - : _depth_conv_layer(), _fused_batch_norm_layer(), _fused_bias(), _is_prepared(false) + FusedDepthwiseConvolutionBatchNormalizationFunction(std::shared_ptr memory_manager = nullptr) + : _depth_conv_layer(memory_manager), _fused_batch_norm_layer(), _fused_bias(), _is_prepared(false) { } diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index d53b634bb1..ca6c837ab8 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" @@ -254,9 +255,9 @@ std::unique_ptr CLFunctionFactory::create(INode *node, GraphContext & case NodeType::FullyConnectedLayer: return detail::create_fully_connected_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedConvolutionBatchNormalizationLayer: - return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer: - return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::GenerateProposalsLayer: return detail::create_generate_proposals_layer(*polymorphic_downcast(node), ctx); case NodeType::NormalizationLayer: diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 13543dbf15..b9562c70cb 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 12f44e303e..e0fd32f67d 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -218,9 +218,9 @@ std::unique_ptr NEFunctionFactory::create(INode *node, GraphContext & case NodeType::FullyConnectedLayer: return detail::create_fully_connected_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedConvolutionBatchNormalizationLayer: - return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer: - return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::NormalizationLayer: return detail::create_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::PermuteLayer: diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index 61d9479fca..abd6436d74 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -309,21 +309,12 @@ void NodeFusionMutator::mutate(Graph &g) return (output_qasymm8 && same_qinfo) || !output_qasymm8; }; - Target target = g.nodes()[0].get()->output(0)->desc().target; - // Fusion mutations detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, qs8_prec, detail::fuse_node_with_activation, supported_fused_activations); - - // Currently fuse batch normalization brings performance uplift only on OpenCL with FP32 data type - // TODO (COMPMID-2524): Fuse batch normalization with convolution and depthwise convolution at graph level for NEON - FP32 - if(target == Target::CL) - { - //Depthwise Convolution and Batch Normalization Fusion active only for CL - detail::fuse_layer(g, empty_prec, detail::fuse_convolution_with_batch_normalization); - detail::fuse_layer(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); - } + detail::fuse_layer(g, empty_prec, detail::fuse_convolution_with_batch_normalization); + detail::fuse_layer(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } } // namespace graph } // namespace arm_compute -- cgit v1.2.1