From fcd52fbc578a2f5e6a1df4c823284621cc55645a Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Tue, 28 Nov 2017 10:31:43 +0000 Subject: COMPMID-661: Vectorize im2col and add lws heuristics for convolution kernels #46 Change-Id: Idaab987384d6a12a114f609abd50446fd94536b2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110879 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index d39dcdb336..16706dd748 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -68,7 +68,19 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen GPUTarget arch_target = get_arch_from_target(get_target()); // Configure LWS hint - _lws_hint = (output->info()->dimension(1) == 196) ? cl::NDRange(1, 7) : cl::NDRange(8, 8); + if(arch_target == GPUTarget::BIFROST && input1->info()->dimension(1) == 24) + { + // LWS optimized for the 11x11 AlexNet convolution on Bifrost. + _lws_hint = cl::NDRange(2, 2); + } + else if(output->info()->dimension(1) == 196) + { + _lws_hint = cl::NDRange(1, 7); + } + else + { + _lws_hint = cl::NDRange(8, 8); + } // Create build options CLBuildOptions build_opts; -- cgit v1.2.1