aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/helpers.h
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-07-11 16:54:04 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commit7ff47a313d62011923c5307cc52510b4ba41a631 (patch)
tree1ac596f79202d51bfe2c8217c9a2e9f12b68337e /src/core/CL/cl_kernels/helpers.h
parentda37e2fa661f85090bd4a3b93c07178df268bd8c (diff)
downloadComputeLibrary-7ff47a313d62011923c5307cc52510b4ba41a631.tar.gz
COMPMID-443: Use 3D tensors for fill_border_image
2x performance improvement on some GoogLeNet Pooling tests Change-Id: If75336aa6308731a06462a73cd9209d24574509e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80342 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Steven Niu <steven.niu@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/helpers.h')
-rw-r--r--src/core/CL/cl_kernels/helpers.h29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index cf3cb78e04..29a43f769b 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -78,6 +78,9 @@
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
+#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
+ update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
+
#define CONVERT_TO_TENSOR3D_STRUCT(name) \
update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
name##_stride_z, name##_step_z)
@@ -157,6 +160,32 @@ Image inline update_image_workitem_ptr(__global uchar *ptr, uint offset_first_el
return img;
}
+/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
+ *
+ * @param[in] ptr Pointer to the starting postion of the buffer
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x Stride of the image in X dimension (in bytes)
+ * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] stride_z Stride of the image in Z dimension (in bytes)
+ * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
+ *
+ * @return A 3D tensor object
+ */
+Image inline update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+ Image img =
+ {
+ .ptr = ptr,
+ .offset_first_element_in_bytes = offset_first_element_in_bytes,
+ .stride_x = stride_x,
+ .stride_y = stride_y
+ };
+ img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
+ return img;
+}
+
/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
*
* @param[in] ptr Pointer to the starting postion of the buffer