From 7657224de2b697a8a92cccf26d98e53ccd7c1a03 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 4 Apr 2018 17:44:26 +0100 Subject: COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- src/core/CL/cl_kernels/depthwise_convolution_quantized.cl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/core/CL/cl_kernels/depthwise_convolution_quantized.cl') diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index a0c0a8b1fb..ccb3a1ffe2 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -126,6 +126,8 @@ __kernel void depthwise_convolution_3x3_quantized_nchw( int bias_value = *((__global int *)(vector_offset(&biases, get_global_id(2)))); #endif //defined(HAS_BIAS) + src.ptr -= (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; + uchar3 w0 = vload3(0, weights.ptr + 0 * weights_stride_y); uchar3 w1 = vload3(0, weights.ptr + 1 * weights_stride_y); uchar3 w2 = vload3(0, weights.ptr + 2 * weights_stride_y); -- cgit v1.2.1