From ebc3a90721fe4a41b8e141466894d4d7185c01b7 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 16 Nov 2018 16:04:25 +0000 Subject: COMPMID-1706: Fuse the bias addition within CLGEMM Change-Id: I378f2023f4fa010f195f76716ac07aa86279bfae Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/280 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h | 4 ++-- arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arm_compute/runtime/CL') diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index d6d88cec55..e800dd7cbb 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -136,7 +136,7 @@ private: CLGEMM _mm_gemm; CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; - CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to add bias in CLFullyConnectedLayer CLTensor _flatten_output; CLTensor _gemmlowp_output; CLTensor _converted_weights_output; diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index d7694a8328..b304576f33 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -163,7 +163,7 @@ private: * @param[in, out] output Output tensor. Data types supported: Same as @p input, * except for input of QASYMM8 type where output should be of S32 type. * @param[in] gemmlowp_output_stage GEMMLowp output stage info - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) + * @param[in] gemm_3d_depth Depth of GEMM 3D */ void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines @@ -175,13 +175,14 @@ private: * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. * @param[in] gemmlowp_output_stage GEMMLowp output stage info - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) - * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) + * @param[in] gemm_3d_depth Depth of GEMM 3D + * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. + * @param[in] run_addition Flag which specifies if @ref CLGEMMMatrixMatrixMultiplyAddition to be run. * * @return a status */ static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth = 1, bool skip_im2col = false); + int gemm_3d_depth, bool skip_im2col, bool run_addition); private: CLMemoryGroup _memory_group; @@ -207,6 +208,7 @@ private: bool _is_quantized; bool _is_activationlayer_enabled; bool _is_prepared; + bool _run_addition; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */ -- cgit v1.2.1