Use #if directive instead of regular condition in CLDirectConv2D

Resolve COMPMID-5004 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib3e1b5a891234316c411ea9825ec10c68c4ab5a3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6788 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2021-12-07 13:49:10 +0000
committer: Sheri Zhang <sheri.zhang@arm.com> 2021-12-10 14:48:54 +0000
commit: 8d07127f5c8f0e189ee0db4feb88c0c0b47608d5 (patch)
tree: bca9b835e5f171d973e9bf2ca076f86cd71670ec /src/core/CL/cl_kernels/nhwc
parent: b75d62430e9871fcc6f19cf82879f65d2e7fb201 (diff)
download: ComputeLibrary-8d07127f5c8f0e189ee0db4feb88c0c0b47608d5.tar.gz
1 files changed, 21 insertions, 22 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index 35ff86a4fb..f1b422a68f 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -192,36 +192,35 @@ __kernel void direct_convolution_nhwc(
 
         // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
         // This #if directive should be removed in case of dynamic tensor support
-        if((_ISRC_CHANNELS % K0) != 0)
+#if defined(LEFTOVER_LOOP)
+        // Left-over accumulations
+        for(; k < _ISRC_CHANNELS; ++k)
         {
-            // Left-over accumulations
-            for(; k < _ISRC_CHANNELS; ++k)
-            {
-                TILE(SRC_DATA_TYPE, M0, 1, a);
-                TILE(WEI_DATA_TYPE, N0, 1, b);
+            TILE(SRC_DATA_TYPE, M0, 1, a);
+            TILE(WEI_DATA_TYPE, N0, 1, b);
 
-                LOOP_UNROLLING(int, i, 0, 1, M0,
-                {
-                    a[i].v = ZERO_VALUE;
-                })
+            LOOP_UNROLLING(int, i, 0, 1, M0,
+            {
+                a[i].v = ZERO_VALUE;
+            })
 
-                // Load tile from the src tensor
-                T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
+            // Load tile from the src tensor
+            T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
 
-                // Load tile from the weights tensor
-                // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
-                T_LOAD(WEI_DATA_TYPE, N0, 1, BUFFER, wei, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, wei_stride_y, b);
+            // Load tile from the weights tensor
+            // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
+            T_LOAD(WEI_DATA_TYPE, N0, 1, BUFFER, wei, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, wei_stride_y, b);
 
-                // Compute the matrix multiplication between two tiles
-                T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a, b, c);
+            // Compute the matrix multiplication between two tiles
+            T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a, b, c);
 
-                // Apply the offset correction (operation usually needed for asymmetric quantized computation)
-                // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero
-                T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, 1, SRC_OFFSET, WEI_OFFSET, a, b, c);
+            // Apply the offset correction (operation usually needed for asymmetric quantized computation)
+            // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero
+            T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, 1, SRC_OFFSET, WEI_OFFSET, a, b, c);
 
-                ++ck;
-            }
+            ++ck;
         }
+#endif // defined(LEFTOVER_LOOP)
     }
 
     // Offset correction required for the quantized asymmetric computation
author	Giorgio Arena <giorgio.arena@arm.com>	2021-12-07 13:49:10 +0000
committer	Sheri Zhang <sheri.zhang@arm.com>	2021-12-10 14:48:54 +0000
commit	8d07127f5c8f0e189ee0db4feb88c0c0b47608d5 (patch)
tree	bca9b835e5f171d973e9bf2ca076f86cd71670ec /src/core/CL/cl_kernels/nhwc
parent	b75d62430e9871fcc6f19cf82879f65d2e7fb201 (diff)
download	ComputeLibrary-8d07127f5c8f0e189ee0db4feb88c0c0b47608d5.tar.gz