diff options
author | Adnan AlSinan <adnan.alsinan@arm.com> | 2022-07-21 11:06:05 +0100 |
---|---|---|
committer | Adnan AlSinan <adnan.alsinan@arm.com> | 2022-07-21 13:48:22 +0000 |
commit | c6b7913b894f8191878c7b73626fbeb65850131c (patch) | |
tree | c855890738fc7752a64fd20225aefa80e3f9cfef /src/core/CL/cl_kernels/nhwc | |
parent | 5967ee26113af9652a215a1a875f8617f8f35f34 (diff) | |
download | ComputeLibrary-c6b7913b894f8191878c7b73626fbeb65850131c.tar.gz |
Fix direct convolution cases that were failing on Odroid
- Affects OpenCL backend.
- Resolves COMPMID-5416
Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Change-Id: I8953f9ac5c1ec9edf99399a651a544df4276ccf1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7951
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc')
-rw-r--r-- | src/core/CL/cl_kernels/nhwc/direct_convolution.cl | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl index d34e24b436..e602fbb525 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl @@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, K0, a); TILE(WEI_DATA_TYPE, N0, K0, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); @@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, 1, a); TILE(WEI_DATA_TYPE, N0, 1, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); |