From 68a3f56627b04acdefebe67d645727dd83889766 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 26 Jul 2018 11:44:03 +0100 Subject: COMPMID-1276 - Allow GEMM to work with 3D input tensor Skipped im2col in CLGEMMConvolutionLayer for 1x1 convolutions with NHWC data layout Change-Id: I894e6b952ed8605e8f3ffc0ffc25c24730d4664c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141909 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Georgios Pinitas --- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 09daa5f568..7c272a348b 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -26,8 +26,8 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" @@ -82,19 +82,13 @@ private: }; /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: - * - * Note: weights already reshaped for quantized asymmetric is not supported * * -# @ref CLIm2ColKernel - * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) - * -# @ref CLCol2ImKernel - * - * if the weights are already reshaped: - * -# @ref CLGEMMInterleave4x4Kernel - * -# @ref CLGEMMMatrixMultiplyKernel - * else - * -# @ref CLGEMM + * -# @ref CLGEMM (if the data type is FP32 or FP16) + * -# @ref CLGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8) + * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8) + * -# @ref CLArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) + * -# @ref CLCol2ImKernel (if NCHW data layout) */ class CLGEMMConvolutionLayer : public IFunction { @@ -172,10 +166,11 @@ private: * @param[in] output Output tensor. Data types supported: Same as @p input, * except for input of QASYMM8 type where output should be of S32 type. * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) + * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) * * @return a status */ - static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int gemm_3d_depth = 1); + static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int gemm_3d_depth = 1, bool skip_im2col = false); private: CLMemoryGroup _memory_group; @@ -186,6 +181,7 @@ private: CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; CLCol2ImKernel _col2im_kernel; CLActivationLayer _activationlayer_function; + CLArithmeticAdditionKernel _add_bias_kernel; const ICLTensor *_original_weights; @@ -196,6 +192,7 @@ private: DataLayout _data_layout; + bool _append_bias; bool _skip_im2col; bool _is_quantized; bool _is_activationlayer_enabled; -- cgit v1.2.1