From 60e98253f1e3df1723e7b8f4c996b544aa7c7205 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 22 Oct 2018 16:17:20 +0100 Subject: COMPMID-1451: Fuse activation in DepthwiseConvolution. Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651 Tested-by: bsgcomp Reviewed-by: Gian Marco Iodice --- arm_compute/graph/backends/FunctionHelpers.h | 11 +++++++---- .../graph/nodes/DepthwiseConvolutionLayerNode.h | 14 ++++++++++++++ .../CL/functions/CLDepthwiseConvolutionLayer.h | 11 +++++++++-- .../functions/GCDepthwiseConvolutionLayer.h | 8 +++++++- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 21 +++++++++++++++++---- 5 files changed, 54 insertions(+), 11 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index a1cadcbf4c..1968ec3923 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -397,8 +397,10 @@ std::unique_ptr create_depthwise_convolution_layer(DepthwiseConvoluti biases->info()->set_data_type(DataType::S32); } - const PadStrideInfo conv_info = node.convolution_info(); - const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method(); + const PadStrideInfo conv_info = node.convolution_info(); + const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method(); + const unsigned int depth_multiplier = 1; + const ActivationLayerInfo fused_act = node.fused_activation(); // Create and configure function (we assume that functions have been validated before creation) std::unique_ptr func; @@ -407,13 +409,13 @@ std::unique_ptr create_depthwise_convolution_layer(DepthwiseConvoluti { std::tie(func, func_name) = create_named_function( std::string("DepthwiseConvolutionLayer3x3"), - input, weights, biases, output, conv_info); + input, weights, biases, output, conv_info, depth_multiplier, fused_act); } else { std::tie(func, func_name) = create_named_function( std::string("DepthwiseConvolutionLayer"), - input, weights, biases, output, conv_info); + input, weights, biases, output, conv_info, depth_multiplier, fused_act); } // Log info @@ -431,6 +433,7 @@ std::unique_ptr create_depthwise_convolution_layer(DepthwiseConvoluti << " Input shape: " << input->info()->tensor_shape() << " Weights shape: " << weights->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape() + << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "") << std::endl); return func; } diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h index 1a173c5421..7fa44b798f 100644 --- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h +++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h @@ -58,6 +58,16 @@ public: * @return Convolution information */ PadStrideInfo convolution_info() const; + /** Returns fused activation + * + * @return Fused activation + */ + ActivationLayerInfo fused_activation() const; + /** Sets fused activation + * + * @param[in] fused_activation Fused activation to set + */ + void set_fused_activation(ActivationLayerInfo fused_activation); /** Computes depthwise convolution output descriptor * * @param[in] input_descriptor Input descriptor @@ -76,9 +86,13 @@ public: TensorDescriptor configure_output(size_t idx) const override; void accept(INodeVisitor &v) override; +public: + static constexpr NodeType node_type = NodeType::DepthwiseConvolutionLayer; + private: PadStrideInfo _info; DepthwiseConvolutionMethod _method; + ActivationLayerInfo _fused_activation; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 0547a6a6a8..96a0d236f5 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -35,6 +35,7 @@ #include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute @@ -121,8 +122,10 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer * @@ -133,10 +136,12 @@ public: * @param[in] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overriden: void run() override; @@ -148,6 +153,7 @@ private: CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel; + CLActivationLayer _activationlayer_function; CLFillBorderKernel _v2mm_input_fill_border; CLFillBorderKernel _v2mm_weights_fill_border; CLTensor _input_reshaped; @@ -156,6 +162,7 @@ private: CLTensor _output_reshaped; bool _is_prepared; bool _is_quantized; + bool _is_activationlayer_enabled; const ICLTensor *_original_weights; }; } diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h index c99485634c..5eccc4d9e8 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h @@ -28,6 +28,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute @@ -54,8 +55,10 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override final; @@ -64,6 +67,9 @@ private: std::unique_ptr _kernel; GCFillBorderKernel _border_handler; GCTensorShiftKernel _shift_handler; + GCActivationLayer _activationlayer_function; + + bool _is_activationlayer_enabled; }; } #endif /*__ARM_COMPUTE_GCDEPTHWISECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index b7398f628a..288d5136d2 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -35,6 +35,7 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" @@ -62,8 +63,10 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3 * @@ -74,10 +77,12 @@ public: * @param[in] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overriden: void run() override; @@ -89,6 +94,7 @@ private: NEPermute _permute_input; NEPermute _permute_weights; NEPermute _permute_output; + NEActivationLayer _activationlayer_function; Tensor _accumulator; Tensor _permuted_input; Tensor _permuted_weights; @@ -100,6 +106,7 @@ private: bool _is_nchw; bool _is_first_run; bool _permute; + bool _is_activationlayer_enabled; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: @@ -132,8 +139,10 @@ public: * Data type supported: Same as @p input, S32 when input is QASYMM8. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer * @@ -144,10 +153,12 @@ public: * Data type supported: Same as @p input, S32 when input is QASYMM8. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overriden: void run() override; @@ -164,6 +175,7 @@ private: NEPermute _permute_input; NEPermute _permute_weights; NEPermute _permute_output; + NEActivationLayer _activationlayer_function; Tensor _input_reshaped; Tensor _weights_reshaped; Tensor _v2mm_output; @@ -174,6 +186,7 @@ private: bool _is_prepared; bool _is_quantized; bool _is_nhwc; + bool _is_activationlayer_enabled; const ITensor *_original_weights; }; } -- cgit v1.2.1