diff options
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc')
-rw-r--r-- | src/core/CL/cl_kernels/nhwc/direct_convolution.cl | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl index d34e24b436..e602fbb525 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl @@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, K0, a); TILE(WEI_DATA_TYPE, N0, K0, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); @@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc( TILE(SRC_DATA_TYPE, M0, 1, a); TILE(WEI_DATA_TYPE, N0, 1, b); + // Initialize tiles LOOP_UNROLLING(int, i, 0, 1, M0, { a[i].v = ZERO_VALUE; }) + LOOP_UNROLLING(int, i, 0, 1, N0, + { + b[i].v = ZERO_VALUE; + }) + // Load tile from the src tensor T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a); |