From 57f249b08fd65af761d5c8bfe62de117d67a14c7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 6 Dec 2017 17:21:12 +0000 Subject: COMPMID-661: Add Bifrost lws heuristics dwc with pad=1 [new_dev] #52 Change-Id: I8eee02a2f092622d504e9f38cf1b3461cf4a2553 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112208 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- .../CLDepthwiseConvolutionLayer3x3Kernel.cpp | 36 ++++++++++++++++------ 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp index 003f1f8330..ddc3a2dd25 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -145,18 +145,36 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con const GPUTarget gpu_target = get_arch_from_target(get_target()); if(gpu_target == GPUTarget::BIFROST) { - const size_t width = input->info()->dimension(0); - if(width >= 56) // 56 or 112 + // Assume uniform padding and striding. + const size_t pad = _conv_pad_left; + const size_t stride = _conv_stride_x; + const size_t width = input->info()->dimension(0); + if(pad == 1) { - _lws_hint = cl::NDRange(8, 5, 2); + const size_t width_by_stride = width / stride; + if(width_by_stride == 28) // 56/2 or 28/1 + { + _lws_hint = cl::NDRange(7, 4, 3); + } + else if(width_by_stride == 14) // 28/2 or 14/1 + { + _lws_hint = cl::NDRange(7, 7, 4); + } } - else if(width >= 14) // 14 or 28 + else if(pad == 0) { - _lws_hint = cl::NDRange(1, 5, 2); - } - else // 7 - { - _lws_hint = cl::NDRange(1, 1, 2); + if(width >= 56) // 56 or 112 + { + _lws_hint = cl::NDRange(8, 5, 2); + } + else if(width >= 14) // 14 or 28 + { + _lws_hint = cl::NDRange(1, 5, 2); + } + else // 7 + { + _lws_hint = cl::NDRange(1, 1, 2); + } } } -- cgit v1.2.1