diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-11-28 10:33:22 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:41:36 +0000 |
commit | a2ea75360b1193318dc8441bbd9120eb747041ae (patch) | |
tree | 15d1b8f062be484bd2c5649a089d0711c7b121ca /src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | |
parent | d912fd8eaaa56aac90f2b0b118c76f24ba8efa02 (diff) | |
download | ComputeLibrary-a2ea75360b1193318dc8441bbd9120eb747041ae.tar.gz |
COMPMID-661 Add Bifrost lws heuristics for several depthwise_convolution kernels #49
Change-Id: Ibfa1c1cc9fc8501b22a18ecd519758f4aeb301eb
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110880
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 70af5d63cf..951bc144aa 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -63,6 +63,14 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_mv", build_opts)); + // Configure the local work size for Bifrost with a value obtained + // via exhaustive autotuning for the MobileNets tensor shapes. + const GPUTarget gpu_target = get_arch_from_target(get_target()); + if(gpu_target == GPUTarget::BIFROST) + { + _lws_hint = cl::NDRange(1, 1, 1); + } + // Configure kernel window const unsigned int num_elems_read_per_iteration = 4; @@ -119,7 +127,7 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); add_3D_tensor_argument(idx_0, _input0, slice_in); add_1D_tensor_argument(idx_2, _output, slice_out); - enqueue(queue, *this, slice_in); + enqueue(queue, *this, slice_in, _lws_hint); } while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); } |