From 20d7848b1a0447dced362b3df57e9d30aebac5d4 Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Thu, 11 Jan 2018 15:10:58 +0000 Subject: COMPMID-816 - Enabled CLConvolutionLayer to use CLGEMM function instead of CLGEMMMatrixMultiplyKernel kernel. Change-Id: If035fa3d1fb3ff4012442bcd908c370d21aa6657 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115990 Tested-by: Jenkins Reviewed-by: Pablo Tello Reviewed-by: Anthony Barbier --- .../runtime/CL/functions/CLConvolutionLayer.h | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLConvolutionLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 3fe6604db9..f6672cef1d 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -76,15 +77,20 @@ private: bool _transpose1xW; }; -/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels: +/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: + * + * Note: weights already reshaped for quantized asymmetric is not supported * - * -# @ref CLWeightsReshapeKernel (executed only once for each configuration) - * -# @ref CLGEMMTranspose1xWKernel (executed only once for each configuration) * -# @ref CLIm2ColKernel - * -# @ref CLGEMMInterleave4x4Kernel - * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) + * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale (if quantized asymmetric) * -# @ref CLCol2ImKernel + * + * if the weights are already reshaped: + * -# @ref CLGEMMInterleave4x4Kernel + * -# @ref CLGEMMMatrixMultiplyKernel + * else + * -# @ref CLGEMM */ class CLConvolutionLayer : public IFunction { @@ -119,20 +125,21 @@ private: * except for input of QASYMM8 type where output should be of S32 type. * @param is_interleaved_transposed Flag that signals if matrix is interleaved transposed */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed = true); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed, bool are_weights_reshaped); private: CLMemoryGroup _memory_group; CLConvolutionLayerReshapeWeights _reshape_weights; - CLIm2ColKernel _input_im2col_kernel; - CLGEMMInterleave4x4Kernel _input_interleave_kernel; + CLIm2ColKernel _im2col_kernel; + CLGEMMInterleave4x4Kernel _interleave_kernel; CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMM _mm_gemm; CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; - CLCol2ImKernel _output_col2im_kernel; + CLCol2ImKernel _col2im_kernel; - CLTensor _input_im2col_reshaped; - CLTensor _input_interleaved_reshaped; + CLTensor _im2col_output; + CLTensor _interleave_output; CLTensor _weights_reshaped; CLTensor _weights_transposed; CLTensor _gemm_output; -- cgit v1.2.1