From 5dea19e58a5521b05e95375c8618a37072697bc0 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 8 Nov 2019 12:13:48 +0000 Subject: COMPMID-2579: Fuse batch normalization with convolution and depthwise convolution at graph level on NEON Change-Id: Ib263a680bbd2dc1a4947102ee8d6da76b95f02bf Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/2252 Reviewed-by: Georgios Pinitas Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/graph/backends/CL/CLFunctionsFactory.cpp | 5 +++-- src/graph/backends/GLES/GCFunctionsFactory.cpp | 1 + src/graph/backends/NEON/NEFunctionFactory.cpp | 4 ++-- src/graph/mutators/NodeFusionMutator.cpp | 13 ++----------- 4 files changed, 8 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index d53b634bb1..ca6c837ab8 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CPP/CPPFunctions.h" @@ -254,9 +255,9 @@ std::unique_ptr CLFunctionFactory::create(INode *node, GraphContext & case NodeType::FullyConnectedLayer: return detail::create_fully_connected_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedConvolutionBatchNormalizationLayer: - return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer: - return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::GenerateProposalsLayer: return detail::create_generate_proposals_layer(*polymorphic_downcast(node), ctx); case NodeType::NormalizationLayer: diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 13543dbf15..b9562c70cb 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/backends/FunctionHelpers.h" #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 12f44e303e..e0fd32f67d 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -218,9 +218,9 @@ std::unique_ptr NEFunctionFactory::create(INode *node, GraphContext & case NodeType::FullyConnectedLayer: return detail::create_fully_connected_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedConvolutionBatchNormalizationLayer: - return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer: - return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node)); + return detail::create_fused_depthwise_convolution_batch_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::NormalizationLayer: return detail::create_normalization_layer(*polymorphic_downcast(node), ctx); case NodeType::PermuteLayer: diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index 61d9479fca..abd6436d74 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -309,21 +309,12 @@ void NodeFusionMutator::mutate(Graph &g) return (output_qasymm8 && same_qinfo) || !output_qasymm8; }; - Target target = g.nodes()[0].get()->output(0)->desc().target; - // Fusion mutations detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, qs8_prec, detail::fuse_node_with_activation, supported_fused_activations); - - // Currently fuse batch normalization brings performance uplift only on OpenCL with FP32 data type - // TODO (COMPMID-2524): Fuse batch normalization with convolution and depthwise convolution at graph level for NEON - FP32 - if(target == Target::CL) - { - //Depthwise Convolution and Batch Normalization Fusion active only for CL - detail::fuse_layer(g, empty_prec, detail::fuse_convolution_with_batch_normalization); - detail::fuse_layer(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); - } + detail::fuse_layer(g, empty_prec, detail::fuse_convolution_with_batch_normalization); + detail::fuse_layer(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } } // namespace graph } // namespace arm_compute -- cgit v1.2.1