aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.bp1
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl12
2 files changed, 12 insertions, 1 deletions
diff --git a/Android.bp b/Android.bp
index 9d056f99b6..ad28cc35b3 100644
--- a/Android.bp
+++ b/Android.bp
@@ -159,7 +159,6 @@ arm_compute_library_defaults {
"-DARM_COMPUTE_ENABLE_NEON",
"-Wno-unused-parameter",
"-DNO_DOT_IN_TOOLCHAIN",
- "-no-integrated-as",
"-Wno-implicit-fallthrough"
],
rtti: true,
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index d34e24b436..e602fbb525 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -169,11 +169,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, K0, a);
TILE(WEI_DATA_TYPE, N0, K0, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, K0, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
@@ -199,11 +205,17 @@ __kernel void direct_convolution_nhwc(
TILE(SRC_DATA_TYPE, M0, 1, a);
TILE(WEI_DATA_TYPE, N0, 1, b);
+ // Initialize tiles
LOOP_UNROLLING(int, i, 0, 1, M0,
{
a[i].v = ZERO_VALUE;
})
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = ZERO_VALUE;
+ })
+
// Load tile from the src tensor
T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);