aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/nhwc
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-12-07 13:49:10 +0000
committerSheri Zhang <sheri.zhang@arm.com>2021-12-10 14:48:54 +0000
commit8d07127f5c8f0e189ee0db4feb88c0c0b47608d5 (patch)
treebca9b835e5f171d973e9bf2ca076f86cd71670ec /src/core/CL/cl_kernels/nhwc
parentb75d62430e9871fcc6f19cf82879f65d2e7fb201 (diff)
downloadComputeLibrary-8d07127f5c8f0e189ee0db4feb88c0c0b47608d5.tar.gz
Use #if directive instead of regular condition in CLDirectConv2D
Resolve COMPMID-5004 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib3e1b5a891234316c411ea9825ec10c68c4ab5a3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6788 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc')
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl43
1 files changed, 21 insertions, 22 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index 35ff86a4fb..f1b422a68f 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -192,36 +192,35 @@ __kernel void direct_convolution_nhwc(
// We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
// This #if directive should be removed in case of dynamic tensor support
- if((_ISRC_CHANNELS % K0) != 0)
+#if defined(LEFTOVER_LOOP)
+ // Left-over accumulations
+ for(; k < _ISRC_CHANNELS; ++k)
{
- // Left-over accumulations
- for(; k < _ISRC_CHANNELS; ++k)
- {
- TILE(SRC_DATA_TYPE, M0, 1, a);
- TILE(WEI_DATA_TYPE, N0, 1, b);
+ TILE(SRC_DATA_TYPE, M0, 1, a);
+ TILE(WEI_DATA_TYPE, N0, 1, b);
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = ZERO_VALUE;
- })
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ a[i].v = ZERO_VALUE;
+ })
- // Load tile from the src tensor
- T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
+ // Load tile from the src tensor
+ T_LOAD_NHWC_INDIRECT(SRC_DATA_TYPE, M0, 1, SRC_TENSOR_TYPE, src, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, src_stride_y, xi, yi, a);
- // Load tile from the weights tensor
- // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
- T_LOAD(WEI_DATA_TYPE, N0, 1, BUFFER, wei, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, wei_stride_y, b);
+ // Load tile from the weights tensor
+ // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
+ T_LOAD(WEI_DATA_TYPE, N0, 1, BUFFER, wei, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, wei_stride_y, b);
- // Compute the matrix multiplication between two tiles
- T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a, b, c);
+ // Compute the matrix multiplication between two tiles
+ T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a, b, c);
- // Apply the offset correction (operation usually needed for asymmetric quantized computation)
- // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero
- T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, 1, SRC_OFFSET, WEI_OFFSET, a, b, c);
+ // Apply the offset correction (operation usually needed for asymmetric quantized computation)
+ // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero
+ T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, 1, SRC_OFFSET, WEI_OFFSET, a, b, c);
- ++ck;
- }
+ ++ck;
}
+#endif // defined(LEFTOVER_LOOP)
}
// Offset correction required for the quantized asymmetric computation