From a2ea75360b1193318dc8441bbd9120eb747041ae Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Tue, 28 Nov 2017 10:33:22 +0000 Subject: COMPMID-661 Add Bifrost lws heuristics for several depthwise_convolution kernels #49 Change-Id: Ibfa1c1cc9fc8501b22a18ecd519758f4aeb301eb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110880 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier Reviewed-by: Gian Marco Iodice Reviewed-by: Georgios Pinitas --- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp') diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index c23941426e..ad9ac0ecd6 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -73,6 +73,14 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu } _kernel = static_cast(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts)); + // Configure the local work size for Bifrost with a value obtained + // via exhaustive autotuning for the MobileNets tensor shapes. + const GPUTarget gpu_target = get_arch_from_target(get_target()); + if(gpu_target == GPUTarget::BIFROST) + { + _lws_hint = cl::NDRange(1, 2, 1); + } + // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); // The CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped @@ -105,7 +113,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); } -- cgit v1.2.1