From f3622becf1f0d6bf5147ebb7d6d0f14d5252860a Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 29 Jul 2019 14:27:16 +0100 Subject: COMPMID-1979: Fuse Activation Function in CLGEMM - part 4 Fused activation function in CLGEMM Change-Id: I644fdf09349325c0b3a2cd5fef2a3ea2c974149d Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/1640 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/core/Types.h | 19 +++++++++++--- arm_compute/runtime/CL/functions/CLGEMM.h | 2 -- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 29 ++++++++++------------ 3 files changed, 29 insertions(+), 21 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index b4d94eced4..2c17f273a5 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1775,7 +1775,8 @@ public: _gemmlowp_output_stage(), _fp_mixed_precision(false), _broadcast_bias(false), - _pretranpose_B(true) + _pretranpose_B(true), + _activation_info() { } /** Constructor @@ -1791,9 +1792,11 @@ public: * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. + * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication */ GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false, - GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false) noexcept + GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false, + const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), @@ -1803,7 +1806,8 @@ public: _gemmlowp_output_stage(gemmlowp_output_stage), _fp_mixed_precision(fp_mixed_precision), _broadcast_bias(broadcast_bias), - _pretranpose_B(reshape_b_only_on_first_run) + _pretranpose_B(reshape_b_only_on_first_run), + _activation_info(activation_info) { } /** Flag which specifies if the matrix A has been reshaped @@ -1896,6 +1900,14 @@ public: { _pretranpose_B = flag; } + /** Activation layer to apply after the matrix multiplication + * + * @return ActivationLayerInfo object + */ + ActivationLayerInfo activation_info() const + { + return _activation_info; + } private: bool _is_a_reshaped; @@ -1908,6 +1920,7 @@ private: bool _fp_mixed_precision; bool _broadcast_bias; bool _pretranpose_B; + ActivationLayerInfo _activation_info; }; /** Winograd information */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 8c462fa4cb..e2a92a8a37 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -127,7 +127,6 @@ private: CLMemoryGroup _memory_group; CLGEMMMatrixMultiplyKernel _mm_kernel; - CLGEMMMatrixAdditionKernel _ma_kernel; CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel; CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel; CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel; @@ -135,7 +134,6 @@ private: CLTensor _tmp_a; CLTensor _tmp_b; const ICLTensor *_original_b; - bool _run_addition; bool _reshape_b_only_on_first_run; bool _is_prepared; GEMMType _gemm_type; diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index e9a3f9bf2b..027727c7f7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -163,8 +163,10 @@ private: * except for input of QASYMM8 type where output should be of S32 type. * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D + * @param[in] act_info Activation to apply after the matrix multiplication */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth = 1); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, + const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. @@ -176,22 +178,21 @@ private: * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. - * @param[in] run_addition Flag which specifies if @ref CLGEMMMatrixMatrixMultiplyAddition to be run. + * @param[in] act_info Activation to apply after the matrix multiplication * * @return a status */ static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, bool skip_im2col, bool run_addition); + int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info); private: - CLMemoryGroup _memory_group; - CLConvolutionLayerReshapeWeights _reshape_weights; - CLIm2ColKernel _im2col_kernel; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLCol2ImKernel _col2im_kernel; - CLActivationLayer _activationlayer_function; - CLSaturatedArithmeticOperationKernel _add_bias_kernel; + CLMemoryGroup _memory_group; + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _im2col_kernel; + CLGEMM _mm_gemm; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + CLCol2ImKernel _col2im_kernel; + CLActivationLayer _activationlayer_function; const ICLTensor *_original_weights; @@ -199,15 +200,11 @@ private: CLTensor _weights_reshaped; CLTensor _gemm_output; - DataLayout _data_layout; - - bool _append_bias; bool _skip_im2col; bool _skip_col2im; bool _is_quantized; - bool _is_activationlayer_enabled; + bool _fuse_activation; bool _is_prepared; - bool _run_addition; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */ -- cgit v1.2.1