diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-08-07 11:53:30 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | c6aa49b6709edada24b1ab3bc1308e0974f9e057 (patch) | |
tree | 0509684f9f402a33d0494c1fa1f34c18f956fcf1 /src/core/CL/cl_kernels/convolution_layer.cl | |
parent | be4100605230868b5cc50cabee395613dbfb62cd (diff) | |
download | ComputeLibrary-c6aa49b6709edada24b1ab3bc1308e0974f9e057.tar.gz |
COMPMID-1344 Add grouping support to CLWeightsReshapeKernel
Change-Id: Idde333308db71087ec234b3fd1eb4e36a44db46c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143049
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/convolution_layer.cl')
-rw-r--r-- | src/core/CL/cl_kernels/convolution_layer.cl | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl index 2b83e5adf1..9335b047fe 100644 --- a/src/core/CL/cl_kernels/convolution_layer.cl +++ b/src/core/CL/cl_kernels/convolution_layer.cl @@ -23,10 +23,11 @@ */ #include "helpers.h" -#if defined(DATA_TYPE) +#if defined(DATA_TYPE) && defined(NUM_GROUPS) /** This kernel reshapes the tensor's low three dimensions to single column * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short + * @note The number of groups should be given as a preprocessor argument using -DNUM_GROUPS=number. e.g. -DNUM_GROUPS=2 * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -50,6 +51,7 @@ * @param[in] height The height of the input tensor * @param[in] depth The depth of the input tensor * @param[in] total_filters Total number of filters. 4th dimension of the weights matrix + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) */ __kernel void reshape_to_columns_nchw( TENSOR3D_DECLARATION(src), @@ -57,7 +59,7 @@ __kernel void reshape_to_columns_nchw( #ifdef HAS_BIAS VECTOR_DECLARATION(bias), #endif /* HAS_BIAS */ - uint width, uint height, uint depth, uint total_filters) + uint width, uint height, uint depth, uint total_filters, uint dst_stride_z) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); bool is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1)); @@ -71,25 +73,39 @@ __kernel void reshape_to_columns_nchw( if(is_last_thread) { - for(uint i = 0; i < total_filters; ++i) + for(uint g = 0; g < NUM_GROUPS; ++g) { - *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr); + __global uchar *curr_group_dst = tmp_dst_ptr; + + for(uint i = 0; i < total_filters / NUM_GROUPS; ++i) + { + *((__global DATA_TYPE *)curr_group_dst) = *((__global DATA_TYPE *)tmp_src_ptr); #ifdef HAS_BIAS - *((__global DATA_TYPE *)(tmp_dst_ptr + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr)); - tmp_bias_ptr += bias_stride_x; + *((__global DATA_TYPE *)(curr_group_dst + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr)); + tmp_bias_ptr += bias_stride_x; #endif /* HAS_BIAS */ - tmp_src_ptr += depth * src_stride_z; - tmp_dst_ptr += dst_stride_x; + tmp_src_ptr += depth * src_stride_z; + curr_group_dst += dst_stride_x; + } + + tmp_dst_ptr += dst_stride_z; } } else { - for(uint i = 0; i < total_filters; ++i) + for(uint g = 0; g < NUM_GROUPS; ++g) { - *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr); - tmp_src_ptr += depth * src_stride_z; - tmp_dst_ptr += dst_stride_x; + __global uchar *curr_group_dst = tmp_dst_ptr; + + for(uint i = 0; i < total_filters / NUM_GROUPS; ++i) + { + *((__global DATA_TYPE *)curr_group_dst) = *((__global DATA_TYPE *)tmp_src_ptr); + tmp_src_ptr += depth * src_stride_z; + curr_group_dst += dst_stride_x; + } + + tmp_dst_ptr += dst_stride_z; } } } @@ -127,7 +143,7 @@ __kernel void reshape_to_columns_nhwc( #ifdef HAS_BIAS VECTOR_DECLARATION(bias), #endif /* HAS_BIAS */ - uint depth, uint width, uint height, uint total_filters) + uint depth, uint width, uint height, uint total_filters, uint dst_stride_z) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); bool is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1)); @@ -163,4 +179,4 @@ __kernel void reshape_to_columns_nhwc( } } } -#endif // defined(DATA_TYPE)
\ No newline at end of file +#endif // defined(DATA_TYPE) && defined(NUM_GROUPS)
\ No newline at end of file |