diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-01-15 09:58:09 +0000 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2021-01-15 12:02:41 +0000 |
commit | d05d56da900c5c1226e94a2941fb7ca51a6f4d45 (patch) | |
tree | 38321b0f60bd48e1db8dade883267828fcb0f08a | |
parent | 50e98bbdfbdbfe3db5e2e6bc50b6a0c5add4763c (diff) | |
download | ComputeLibrary-d05d56da900c5c1226e94a2941fb7ca51a6f4d45.tar.gz |
[Nightly Failure] Fix DeconvolutionLayer OpenCL kernel compilation
- Add case for VEC_SIZE == 3 in the TRANSPOSED_U macro
Resolves: COMPMID-4094
Change-Id: I31870e589e66d895f9bf65c87aa04f32038365c0
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4864
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | src/core/CL/cl_kernels/transpose.cl | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/transpose.cl b/src/core/CL/cl_kernels/transpose.cl index 832572bf0e..82db2908b5 100644 --- a/src/core/CL/cl_kernels/transpose.cl +++ b/src/core/CL/cl_kernels/transpose.cl @@ -40,6 +40,11 @@ { \ u0, u1 \ } +#elif VEC_SIZE_Y == 3 +#define TRANSPOSED_U(val) \ + { \ + u0, u1, u2 \ + } #elif VEC_SIZE_Y == 4 #define TRANSPOSED_U(val) \ { \ @@ -68,6 +73,11 @@ { \ u0.val, u1.val \ } +#elif VEC_SIZE_Y == 3 +#define TRANSPOSED_U(val) \ + { \ + u0.val, u1.val, u2.val \ + } #elif VEC_SIZE_Y == 4 #define TRANSPOSED_U(val) \ { \ @@ -142,9 +152,11 @@ __kernel void transpose(IMAGE_DECLARATION(src), #if VEC_SIZE_Y > 2 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X) u2 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y)); +#endif /* VEC_SIZE_Y > 2 */ +#if VEC_SIZE_Y > 3 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X) u3 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_y)); -#endif /* VEC_SIZE_Y > 2 */ +#endif /* VEC_SIZE_Y > 3 */ #if VEC_SIZE_Y > 4 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X) u4 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_y)); |