From 2ffab6dc8afb3716c189a30f75c33b2f2c35a6d4 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 5 Oct 2022 15:04:23 +0100 Subject: Workaround CL compiler issue on FP16 Resolves: COMPMID-5600 Signed-off-by: Viet-Hoa Do Change-Id: I5196d1639c48d0b8a116d47ed1d6c7334dc8f41e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8374 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/core/CL') diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl index 8a8458798e..345469063a 100644 --- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl +++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl @@ -158,7 +158,11 @@ __kernel void dwc_native_fp_nhwc( { LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH, { +#if GPU_ARCH == GPU_ARCH_MIDGARD + c[m0].v += a[xk + m0].v * b[xk].v; +#else // GPU_ARCH == GPU_ARCH_MIDGARD c[m0].v = fma(a[xk + m0].v, b[xk].v, c[m0].v); +#endif // GPU_ARCH == GPU_ARCH_MIDGARD }) }) } -- cgit v1.2.1