From fcd52fbc578a2f5e6a1df4c823284621cc55645a Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Tue, 28 Nov 2017 10:31:43 +0000 Subject: COMPMID-661: Vectorize im2col and add lws heuristics for convolution kernels #46 Change-Id: Idaab987384d6a12a114f609abd50446fd94536b2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110879 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- src/runtime/CL/functions/CLConvolutionLayer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp') diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index a3be6f4144..8d45416b30 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -202,6 +202,8 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig _memory_group.manage(&_gemm_output); // Configure kernels + + _input_im2col_kernel.set_target(CLScheduler::get().target()); _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); // Configure matrix multiply @@ -217,6 +219,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig _input_interleaved_reshaped.allocator()->allocate(); } _input_im2col_reshaped.allocator()->allocate(); + _output_col2im_kernel.set_target(CLScheduler::get().target()); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); _gemm_output.allocator()->allocate(); -- cgit v1.2.1