diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-07-11 16:54:04 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | 7ff47a313d62011923c5307cc52510b4ba41a631 (patch) | |
tree | 1ac596f79202d51bfe2c8217c9a2e9f12b68337e /src/core/CL/cl_kernels/fill_border.cl | |
parent | da37e2fa661f85090bd4a3b93c07178df268bd8c (diff) | |
download | ComputeLibrary-7ff47a313d62011923c5307cc52510b4ba41a631.tar.gz |
COMPMID-443: Use 3D tensors for fill_border_image
2x performance improvement on some GoogLeNet Pooling tests
Change-Id: If75336aa6308731a06462a73cd9209d24574509e
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80342
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/fill_border.cl')
-rw-r--r-- | src/core/CL/cl_kernels/fill_border.cl | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl index df635869b1..5fbe3ea070 100644 --- a/src/core/CL/cl_kernels/fill_border.cl +++ b/src/core/CL/cl_kernels/fill_border.cl @@ -36,18 +36,20 @@ * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_stride_z Stride between images if batching images (in bytes) + * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image * @param[in] width Width of the valid region of the image * @param[in] height Height of the valid region of the image * @param[in] start_pos XY coordinate indicating the start point of the valid region */ __kernel void fill_image_borders_replicate( - IMAGE_DECLARATION(buf), + TENSOR3D_DECLARATION(buf), uint width, uint height, int2 start_pos) { - Image buf = CONVERT_TO_IMAGE_STRUCT_NO_STEP(buf); + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf); // Update pointer to point to the starting point of the valid region buf.ptr += start_pos.y * buf.stride_y + start_pos.x * buf.stride_x; @@ -109,6 +111,8 @@ __kernel void fill_image_borders_replicate( * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_stride_z Stride between images if batching images (in bytes) + * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image * @param[in] width Width of the valid region of the image * @param[in] height Height of the valid region of the image @@ -116,13 +120,13 @@ __kernel void fill_image_borders_replicate( * @param[in] constant_value Constant value to use to fill the edges */ __kernel void fill_image_borders_constant( - IMAGE_DECLARATION(buf), + TENSOR3D_DECLARATION(buf), uint width, uint height, int2 start_pos, DATA_TYPE constant_value) { - Image buf = CONVERT_TO_IMAGE_STRUCT_NO_STEP(buf); + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf); // Update pointer to point to the starting point of the valid region buf.ptr += start_pos.y * buf.stride_y + start_pos.x * buf.stride_x; |