diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-05-12 12:28:58 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-05-17 09:02:15 +0000 |
commit | ed4b8a07e67c7802207c8954a88ad7a91aec79e0 (patch) | |
tree | 771cb0867fa675cf02286006f7fafa2f66a814e2 /src/core/CL/cl_kernels/mean_stddev_normalization.cl | |
parent | 186fe683da63dea2dac06e46a412e354d33cd9c2 (diff) | |
download | ComputeLibrary-ed4b8a07e67c7802207c8954a88ad7a91aec79e0.tar.gz |
Fix MeanStdDevNormalizationLayer reference outputting nan for FP16
- Bring the epsilon up to 1e-3 for FP16 (both backends) since it was causing the reference's variance being negative and its square root being NaN
- Bring the epsilon up to 1e-7 for FP16 NEON test for the same problem on the NEON kernel
- Adjust the CL kernel's vec_size when input tensor's width < 16 and use macros agnostic of vector size for sum reduction
- Add previously mismatching tensor shapes
Resolve COMPMID-4354
Change-Id: I823c871aacb72326f90c86b24cb16c3e2d4bd15e
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5630
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/mean_stddev_normalization.cl')
-rw-r--r-- | src/core/CL/cl_kernels/mean_stddev_normalization.cl | 32 |
1 files changed, 13 insertions, 19 deletions
diff --git a/src/core/CL/cl_kernels/mean_stddev_normalization.cl b/src/core/CL/cl_kernels/mean_stddev_normalization.cl index 4141d3e8b7..76be629934 100644 --- a/src/core/CL/cl_kernels/mean_stddev_normalization.cl +++ b/src/core/CL/cl_kernels/mean_stddev_normalization.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -76,31 +76,25 @@ __kernel void mean_stddev_normalization( sum_sq += data * data; } // Perform reduction -#if VEC_SIZE > 8 - sum.s01234567 += sum.s89abcdef; - sum_sq.s01234567 += sum_sq.s89abcdef; -#endif // VEC_SIZE > 8 -#if VEC_SIZE > 4 - sum.s0123 += sum.s4567; - sum_sq.s0123 += sum_sq.s4567; -#endif // VEC_SIZE > 4 -#if VEC_SIZE > 2 - sum.s01 += sum.s23; - sum_sq.s01 += sum_sq.s23; -#endif // VEC_SIZE > 2 - sum.s0 += sum.s1; - sum_sq.s0 += sum_sq.s1; + sum = SUM_REDUCE(sum, VEC_SIZE); + sum_sq = SUM_REDUCE(sum_sq, VEC_SIZE); + +#if VEC_SIZE > 1 +#define sum sum.s0 +#define sum_sq sum_sq.s0 +#endif // VEC_SIZE > 1 + // Left-overs loop for(; i < WIDTH; ++i) { DATA_TYPE data = *((__global DATA_TYPE *)offset(&in, i, 0)); - sum.s0 += data; - sum_sq.s0 += data * data; + sum += data; + sum_sq += data * data; } - DATA_TYPE mean = sum.s0 / WIDTH; - DATA_TYPE var = (sum_sq.s0 / WIDTH) - (mean * mean); + DATA_TYPE mean = sum / WIDTH; + DATA_TYPE var = (sum_sq / WIDTH) - (mean * mean); DATA_TYPE stddev_inv = 1.f / sqrt(var + EPSILON); i = 0; |