aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLConvolutionLayer.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-11-28 10:31:43 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commitfcd52fbc578a2f5e6a1df4c823284621cc55645a (patch)
treeb6e7430b2e69fa26fa2405723f827a7e7dc73447 /src/runtime/CL/functions/CLConvolutionLayer.cpp
parent666635c68ebbb182d1db4a85f33ed5325d472a65 (diff)
downloadComputeLibrary-fcd52fbc578a2f5e6a1df4c823284621cc55645a.tar.gz
COMPMID-661: Vectorize im2col and add lws heuristics for convolution kernels #46
Change-Id: Idaab987384d6a12a114f609abd50446fd94536b2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110879 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index a3be6f4144..8d45416b30 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -202,6 +202,8 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
_memory_group.manage(&_gemm_output);
// Configure kernels
+
+ _input_im2col_kernel.set_target(CLScheduler::get().target());
_input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
// Configure matrix multiply
@@ -217,6 +219,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
_input_interleaved_reshaped.allocator()->allocate();
}
_input_im2col_reshaped.allocator()->allocate();
+ _output_col2im_kernel.set_target(CLScheduler::get().target());
_output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
_gemm_output.allocator()->allocate();