aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/tile_helpers.h
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2021-12-10 12:34:02 +0000
committerSiCong Li <sicong.li@arm.com>2021-12-23 17:01:14 +0000
commit3e155a52f19db046f83e30c25182460b00d108c7 (patch)
treef3ec747af21c8ee7b95d6dc88bf6ea4b4aa44659 /src/core/CL/cl_kernels/tile_helpers.h
parentf76a502a73ca628e2a2556abeaa60ed17bb68d97 (diff)
downloadComputeLibrary-3e155a52f19db046f83e30c25182460b00d108c7.tar.gz
Rework gemm_reshape_lhs_ with new macros
Resolves COMPMID-4892 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I52f23ca293506fc693ae829daccc6e889a050752 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6833 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h36
1 files changed, 36 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 30c37da1d0..5706248e98 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -496,6 +496,42 @@
}) \
})
+/** Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load
+ *
+ * @note If WIDTH1_CONDITION is true, the load will use the WIDTH1 length for the store
+ * @note The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones
+ *
+ * @param[in] DATA_TYPE Data type
+ * @param[in] HEIGHT Number of dst rows
+ * @param[in] WIDTH0 Store width to use if WIDTH1_CONDITION = false
+ * @param[in] WIDTH1 Store width to use if WIDTH1_CONDITION = true
+ * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image).
+ * In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
+ * @param[in] TENSOR Tensor basename
+ * @param[in] X Starting X position
+ * @param[in] STRIDE_Y Stride Y (in bytes) used to load each row.
+ * @param[in] WIDTH1_CONDITION Condition to select the WIDTH1 store
+ * @param[out] dst Output tile
+ * @param[out] indirect_y Indirect Y index tile
+ */
+#define T_LOAD_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, dst, indirect_y) \
+ ({ \
+ if(WIDTH1_CONDITION) \
+ { \
+ LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
+ { \
+ VLOAD_PARTIAL(WIDTH0, WIDTH1) \
+ (dst[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
+ }) \
+ } \
+ else \
+ { \
+ LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \
+ { \
+ dst[HEIGHT - 1 - _i].v = V_LOAD(DATA_TYPE, WIDTH0, TENSOR_TYPE, TENSOR, X, (indirect_y[HEIGHT - 1 - _i].v), STRIDE_Y); \
+ }) \
+ } \
+ })
/** Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout
*
* @param[in] DATA_TYPE Data type