From 1c948d47f55ff8a39aa527f63ea7df93a13dd38e Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 20 Nov 2018 16:03:01 +0000 Subject: COMPMID-1800: (Nightly) Mismatches in SC9863 board for NEON FP16 Fixes for: - ReduceMean, reduction on the X axis for FP16 with 8 elements was performed only up to a certain point. The fix now takes into account the number of elements of the vector and does as many reductions as necessary. - YOLOLayer, activation for FP16 has to be performed on 32 bits until the FP16 approximations is fixed. Change-Id: I75373f4edd37de476e6fe1a56de3ef386b65c619 --- src/core/NEON/kernels/NEReductionOperationKernel.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/NEReductionOperationKernel.cpp') diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index b77219cd79..182e93d177 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -154,7 +154,10 @@ struct RedOpX input); auto carry_addition = wrapper::vpadd(wrapper::vgethigh(vec_sum_value), wrapper::vgetlow(vec_sum_value)); - carry_addition = wrapper::vpadd(carry_addition, carry_addition); + for(int i = 0; i < S / 4; ++i) + { + carry_addition = wrapper::vpadd(carry_addition, carry_addition); + } auto res = wrapper::vgetlane(carry_addition, 0); if(op == ReductionOperation::MEAN_SUM) -- cgit v1.2.1