aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-11-28 10:31:43 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commitfcd52fbc578a2f5e6a1df4c823284621cc55645a (patch)
treeb6e7430b2e69fa26fa2405723f827a7e7dc73447 /src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
parent666635c68ebbb182d1db4a85f33ed5325d472a65 (diff)
downloadComputeLibrary-fcd52fbc578a2f5e6a1df4c823284621cc55645a.tar.gz
COMPMID-661: Vectorize im2col and add lws heuristics for convolution kernels #46
Change-Id: Idaab987384d6a12a114f609abd50446fd94536b2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110879 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index d39dcdb336..16706dd748 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -68,7 +68,19 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
GPUTarget arch_target = get_arch_from_target(get_target());
// Configure LWS hint
- _lws_hint = (output->info()->dimension(1) == 196) ? cl::NDRange(1, 7) : cl::NDRange(8, 8);
+ if(arch_target == GPUTarget::BIFROST && input1->info()->dimension(1) == 24)
+ {
+ // LWS optimized for the 11x11 AlexNet convolution on Bifrost.
+ _lws_hint = cl::NDRange(2, 2);
+ }
+ else if(output->info()->dimension(1) == 196)
+ {
+ _lws_hint = cl::NDRange(1, 7);
+ }
+ else
+ {
+ _lws_hint = cl::NDRange(8, 8);
+ }
// Create build options
CLBuildOptions build_opts;