aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2022-07-21 11:06:05 +0100
committerAdnan AlSinan <adnan.alsinan@arm.com>2022-07-21 13:48:22 +0000
commitc6b7913b894f8191878c7b73626fbeb65850131c (patch)
treec855890738fc7752a64fd20225aefa80e3f9cfef
parent5967ee26113af9652a215a1a875f8617f8f35f34 (diff)
downloadComputeLibrary-c6b7913b894f8191878c7b73626fbeb65850131c.tar.gz
Fix direct convolution cases that were failing on Odroid
- Affects OpenCL backend. - Resolves COMPMID-5416 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I8953f9ac5c1ec9edf99399a651a544df4276ccf1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7951 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl12
2 files changed, 12 insertions, 1 deletions
diff --git a/Android.bp b/Android.bp
index 9d056f99b6..ad28cc35b3 100644
--- a/Android.bp
+++ b/Android.bp
@@ -159,7 +159,6 @@ arm_compute_library_defaults {
"-DARM_COMPUTE_ENABLE_NEON",
"-Wno-unused-parameter",
"-DNO_DOT_IN_TOOLCHAIN",
- "-no-integrated-as",
"-Wno-implicit-fallthrough"
],
rtti: true,
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index d34e24b436..e602fbb525 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, K0, a);
TILE(WEI_DATA_TYPE, N0, K0, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
@@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, 1, a);
TILE(WEI_DATA_TYPE, N0, 1, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);