diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-11-28 10:33:22 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:41:36 +0000 |
commit | a2ea75360b1193318dc8441bbd9120eb747041ae (patch) | |
tree | 15d1b8f062be484bd2c5649a089d0711c7b121ca /src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | |
parent | d912fd8eaaa56aac90f2b0b118c76f24ba8efa02 (diff) | |
download | ComputeLibrary-a2ea75360b1193318dc8441bbd9120eb747041ae.tar.gz |
COMPMID-661 Add Bifrost lws heuristics for several depthwise_convolution kernels #49
Change-Id: Ibfa1c1cc9fc8501b22a18ecd519758f4aeb301eb
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110880
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index c23941426e..ad9ac0ecd6 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -73,6 +73,14 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu } _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts)); + // Configure the local work size for Bifrost with a value obtained + // via exhaustive autotuning for the MobileNets tensor shapes. + const GPUTarget gpu_target = get_arch_from_target(get_target()); + if(gpu_target == GPUTarget::BIFROST) + { + _lws_hint = cl::NDRange(1, 2, 1); + } + // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); // The CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped @@ -105,7 +113,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); } |