From f3622becf1f0d6bf5147ebb7d6d0f14d5252860a Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 29 Jul 2019 14:27:16 +0100 Subject: COMPMID-1979: Fuse Activation Function in CLGEMM - part 4 Fused activation function in CLGEMM Change-Id: I644fdf09349325c0b3a2cd5fef2a3ea2c974149d Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/1640 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/runtime/CL/functions/CLGEMM.h | 2 -- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 29 ++++++++++------------ 2 files changed, 13 insertions(+), 18 deletions(-) (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 8c462fa4cb..e2a92a8a37 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -127,7 +127,6 @@ private: CLMemoryGroup _memory_group; CLGEMMMatrixMultiplyKernel _mm_kernel; - CLGEMMMatrixAdditionKernel _ma_kernel; CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel; CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel; CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel; @@ -135,7 +134,6 @@ private: CLTensor _tmp_a; CLTensor _tmp_b; const ICLTensor *_original_b; - bool _run_addition; bool _reshape_b_only_on_first_run; bool _is_prepared; GEMMType _gemm_type; diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index e9a3f9bf2b..027727c7f7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -163,8 +163,10 @@ private: * except for input of QASYMM8 type where output should be of S32 type. * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D + * @param[in] act_info Activation to apply after the matrix multiplication */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth = 1); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, + const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. @@ -176,22 +178,21 @@ private: * @param[in] gemmlowp_output_stage GEMMLowp output stage info * @param[in] gemm_3d_depth Depth of GEMM 3D * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. - * @param[in] run_addition Flag which specifies if @ref CLGEMMMatrixMatrixMultiplyAddition to be run. + * @param[in] act_info Activation to apply after the matrix multiplication * * @return a status */ static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, bool skip_im2col, bool run_addition); + int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info); private: - CLMemoryGroup _memory_group; - CLConvolutionLayerReshapeWeights _reshape_weights; - CLIm2ColKernel _im2col_kernel; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLCol2ImKernel _col2im_kernel; - CLActivationLayer _activationlayer_function; - CLSaturatedArithmeticOperationKernel _add_bias_kernel; + CLMemoryGroup _memory_group; + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _im2col_kernel; + CLGEMM _mm_gemm; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + CLCol2ImKernel _col2im_kernel; + CLActivationLayer _activationlayer_function; const ICLTensor *_original_weights; @@ -199,15 +200,11 @@ private: CLTensor _weights_reshaped; CLTensor _gemm_output; - DataLayout _data_layout; - - bool _append_bias; bool _skip_im2col; bool _skip_col2im; bool _is_quantized; - bool _is_activationlayer_enabled; + bool _fuse_activation; bool _is_prepared; - bool _run_addition; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */ -- cgit v1.2.1