From edfa9f463bed084f8b0953557202b2a1e56da817 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 15 Aug 2017 11:45:22 +0100 Subject: COMPMID-477 - Optimized batched case in CLConvolutionLayer Change-Id: I4ef18f49f1da0cb816aaa0762466b940792c15ed Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84162 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- src/runtime/CL/functions/CLConvolutionLayer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp') diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index b1b83985d0..0bbec94e78 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -197,9 +197,12 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig // Configure kernels _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); + + // Configure matrix multiply if(_is_fully_connected_convolution) { - _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f); + // The matrix A and Matrix B have not been reshaped + _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f, false); } else { -- cgit v1.2.1