From c6b7913b894f8191878c7b73626fbeb65850131c Mon Sep 17 00:00:00 2001 From: Adnan AlSinan Date: Thu, 21 Jul 2022 11:06:05 +0100 Subject: Fix direct convolution cases that were failing on Odroid - Affects OpenCL backend. - Resolves COMPMID-5416 Signed-off-by: Adnan AlSinan Change-Id: I8953f9ac5c1ec9edf99399a651a544df4276ccf1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7951 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- Android.bp | 1 - src/core/CL/cl_kernels/nhwc/direct_convolution.cl | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Android.bp b/Android.bp index 9d056f99b6..ad28cc35b3 100644 --- a/Android.bp +++ b/Android.bp @@ -159,7 +159,6 @@ arm_compute_library_defaults { "-DARM_COMPUTE_ENABLE_NEON", "-Wno-unused-parameter", "-DNO_DOT_IN_TOOLCHAIN", - "-no-integrated-as", "-Wno-implicit-fallthrough" ], rtti: true, diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl index d34e24b436..e602fbb525 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl @@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, K0, a); TILE(WEI_DATA_TYPE, N0, K0, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); @@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, 1, a); TILE(WEI_DATA_TYPE, N0, 1, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); -- cgit v1.2.1