diff options
author | Adnan AlSinan <adnan.alsinan@arm.com> | 2021-12-10 12:34:02 +0000 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2021-12-23 17:01:14 +0000 |
commit | 3e155a52f19db046f83e30c25182460b00d108c7 (patch) | |
tree | f3ec747af21c8ee7b95d6dc88bf6ea4b4aa44659 /src/core/CL/cl_kernels/tile_helpers.h | |
parent | f76a502a73ca628e2a2556abeaa60ed17bb68d97 (diff) | |
download | ComputeLibrary-3e155a52f19db046f83e30c25182460b00d108c7.tar.gz |
Rework gemm_reshape_lhs_ with new macros
Resolves COMPMID-4892
Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Change-Id: I52f23ca293506fc693ae829daccc6e889a050752
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6833
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r-- | src/core/CL/cl_kernels/tile_helpers.h | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index 30c37da1d0..5706248e98 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -496,6 +496,42 @@ }) \ }) +/** Load a tile from global memory (tensor) using an indirect Y index tile and conditionally use a different length for the load + * + * @note If WIDTH1_CONDITION is true, the load will use the WIDTH1 length for the store + * @note The vectors are stored in reverse order so the invalid rows are overwritten by the valid ones + * + * @param[in] DATA_TYPE Data type + * @param[in] HEIGHT Number of dst rows + * @param[in] WIDTH0 Store width to use if WIDTH1_CONDITION = false + * @param[in] WIDTH1 Store width to use if WIDTH1_CONDITION = true + * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). + * In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16) + * @param[in] TENSOR Tensor basename + * @param[in] X Starting X position + * @param[in] STRIDE_Y Stride Y (in bytes) used to load each row. + * @param[in] WIDTH1_CONDITION Condition to select the WIDTH1 store + * @param[out] dst Output tile + * @param[out] indirect_y Indirect Y index tile + */ +#define T_LOAD_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, dst, indirect_y) \ + ({ \ + if(WIDTH1_CONDITION) \ + { \ + LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \ + { \ + VLOAD_PARTIAL(WIDTH0, WIDTH1) \ + (dst[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \ + }) \ + } \ + else \ + { \ + LOOP_UNROLLING(int, _i, 0, 1, HEIGHT, \ + { \ + dst[HEIGHT - 1 - _i].v = V_LOAD(DATA_TYPE, WIDTH0, TENSOR_TYPE, TENSOR, X, (indirect_y[HEIGHT - 1 - _i].v), STRIDE_Y); \ + }) \ + } \ + }) /** Load a tile from global memory (tensor) when the tensor is stored using a NHWC layout * * @param[in] DATA_TYPE Data type |