From 7657224de2b697a8a92cccf26d98e53ccd7c1a03 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 4 Apr 2018 17:44:26 +0100 Subject: COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/core/GLES_COMPUTE/cs_shaders') diff --git a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs index adfc126c95..134cc1060f 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -108,6 +108,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); @@ -263,6 +265,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); -- cgit v1.2.1