aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-11-25 15:47:37 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-11-29 09:23:04 +0000
commit56d55123527b5bb84a5c3516f161dd4438cdc7d8 (patch)
treebaa3928802cb63d3a2cdbd75a75a84e31f706a22
parentbd2942d7c701a664421ce8ef7145f97b7163201a (diff)
downloadComputeLibrary-56d55123527b5bb84a5c3516f161dd4438cdc7d8.tar.gz
Use loop unrolling only when the kernel height is less than 5
- In the dwc_native_fp_nhwc.cl, loop unrolling should only be enabled when kernel height is less than 5. - No performance regression experimented - The patch reduces the compilation time required for the kernel Resolves COMPMID-4887 Change-Id: I93188b9764cf7d1ad34ac164694f6f1fd37a90e8 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6744 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
index 41da4fff10..4f57a81e7b 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
@@ -134,7 +134,7 @@ __kernel void dwc_native_fp_nhwc(
c[i].v = 0;
})
-#if _IWEI_HEIGHT <= 5
+#if _IWEI_HEIGHT < 5
LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
#else // _IWEI_HEIGHT <= 5
for(int yk = 0; yk < _IWEI_HEIGHT; yk++)
@@ -165,7 +165,7 @@ __kernel void dwc_native_fp_nhwc(
})
})
}
-#if _IWEI_HEIGHT <= 5
+#if _IWEI_HEIGHT < 5
)
#endif // _IWEI_HEIGHT <= 5