aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/fill_border.cl
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-07-11 16:54:04 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commit7ff47a313d62011923c5307cc52510b4ba41a631 (patch)
tree1ac596f79202d51bfe2c8217c9a2e9f12b68337e /src/core/CL/cl_kernels/fill_border.cl
parentda37e2fa661f85090bd4a3b93c07178df268bd8c (diff)
downloadComputeLibrary-7ff47a313d62011923c5307cc52510b4ba41a631.tar.gz
COMPMID-443: Use 3D tensors for fill_border_image
2x performance improvement on some GoogLeNet Pooling tests Change-Id: If75336aa6308731a06462a73cd9209d24574509e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80342 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Steven Niu <steven.niu@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/fill_border.cl')
-rw-r--r--src/core/CL/cl_kernels/fill_border.cl12
1 files changed, 8 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl
index df635869b1..5fbe3ea070 100644
--- a/src/core/CL/cl_kernels/fill_border.cl
+++ b/src/core/CL/cl_kernels/fill_border.cl
@@ -36,18 +36,20 @@
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] buf_stride_z Stride between images if batching images (in bytes)
+ * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] width Width of the valid region of the image
* @param[in] height Height of the valid region of the image
* @param[in] start_pos XY coordinate indicating the start point of the valid region
*/
__kernel void fill_image_borders_replicate(
- IMAGE_DECLARATION(buf),
+ TENSOR3D_DECLARATION(buf),
uint width,
uint height,
int2 start_pos)
{
- Image buf = CONVERT_TO_IMAGE_STRUCT_NO_STEP(buf);
+ Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf);
// Update pointer to point to the starting point of the valid region
buf.ptr += start_pos.y * buf.stride_y + start_pos.x * buf.stride_x;
@@ -109,6 +111,8 @@ __kernel void fill_image_borders_replicate(
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] buf_stride_z Stride between images if batching images (in bytes)
+ * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] width Width of the valid region of the image
* @param[in] height Height of the valid region of the image
@@ -116,13 +120,13 @@ __kernel void fill_image_borders_replicate(
* @param[in] constant_value Constant value to use to fill the edges
*/
__kernel void fill_image_borders_constant(
- IMAGE_DECLARATION(buf),
+ TENSOR3D_DECLARATION(buf),
uint width,
uint height,
int2 start_pos,
DATA_TYPE constant_value)
{
- Image buf = CONVERT_TO_IMAGE_STRUCT_NO_STEP(buf);
+ Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf);
// Update pointer to point to the starting point of the valid region
buf.ptr += start_pos.y * buf.stride_y + start_pos.x * buf.stride_x;