aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-01-15 09:58:09 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2021-01-15 12:02:41 +0000
commitd05d56da900c5c1226e94a2941fb7ca51a6f4d45 (patch)
tree38321b0f60bd48e1db8dade883267828fcb0f08a
parent50e98bbdfbdbfe3db5e2e6bc50b6a0c5add4763c (diff)
downloadComputeLibrary-d05d56da900c5c1226e94a2941fb7ca51a6f4d45.tar.gz
[Nightly Failure] Fix DeconvolutionLayer OpenCL kernel compilation
- Add case for VEC_SIZE == 3 in the TRANSPOSED_U macro Resolves: COMPMID-4094 Change-Id: I31870e589e66d895f9bf65c87aa04f32038365c0 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4864 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/transpose.cl14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/transpose.cl b/src/core/CL/cl_kernels/transpose.cl
index 832572bf0e..82db2908b5 100644
--- a/src/core/CL/cl_kernels/transpose.cl
+++ b/src/core/CL/cl_kernels/transpose.cl
@@ -40,6 +40,11 @@
{ \
u0, u1 \
}
+#elif VEC_SIZE_Y == 3
+#define TRANSPOSED_U(val) \
+ { \
+ u0, u1, u2 \
+ }
#elif VEC_SIZE_Y == 4
#define TRANSPOSED_U(val) \
{ \
@@ -68,6 +73,11 @@
{ \
u0.val, u1.val \
}
+#elif VEC_SIZE_Y == 3
+#define TRANSPOSED_U(val) \
+ { \
+ u0.val, u1.val, u2.val \
+ }
#elif VEC_SIZE_Y == 4
#define TRANSPOSED_U(val) \
{ \
@@ -142,9 +152,11 @@ __kernel void transpose(IMAGE_DECLARATION(src),
#if VEC_SIZE_Y > 2
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
u2 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y));
+#endif /* VEC_SIZE_Y > 2 */
+#if VEC_SIZE_Y > 3
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
u3 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_y));
-#endif /* VEC_SIZE_Y > 2 */
+#endif /* VEC_SIZE_Y > 3 */
#if VEC_SIZE_Y > 4
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
u4 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_y));