aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc/direct_convolution.cl')
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl12
1 files changed, 12 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index d34e24b436..e602fbb525 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, K0, a);
TILE(WEI_DATA_TYPE, N0, K0, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
@@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, 1, a);
TILE(WEI_DATA_TYPE, N0, 1, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);