aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEReductionOperationKernel.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-11-20 16:03:01 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2018-11-21 14:08:19 +0000
commit1c948d47f55ff8a39aa527f63ea7df93a13dd38e (patch)
tree4579798cb95ecaf62b5d7fe61b5d3753301e2804 /src/core/NEON/kernels/NEReductionOperationKernel.cpp
parent8b2814ab7b9dc00278132d74d2f738b843b6c0c7 (diff)
downloadComputeLibrary-1c948d47f55ff8a39aa527f63ea7df93a13dd38e.tar.gz
COMPMID-1800: (Nightly) Mismatches in SC9863 board for NEON FP16
Fixes for: - ReduceMean, reduction on the X axis for FP16 with 8 elements was performed only up to a certain point. The fix now takes into account the number of elements of the vector and does as many reductions as necessary. - YOLOLayer, activation for FP16 has to be performed on 32 bits until the FP16 approximations is fixed. Change-Id: I75373f4edd37de476e6fe1a56de3ef386b65c619
Diffstat (limited to 'src/core/NEON/kernels/NEReductionOperationKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp5
1 files changed, 4 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index b77219cd79..182e93d177 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -154,7 +154,10 @@ struct RedOpX
input);
auto carry_addition = wrapper::vpadd(wrapper::vgethigh(vec_sum_value), wrapper::vgetlow(vec_sum_value));
- carry_addition = wrapper::vpadd(carry_addition, carry_addition);
+ for(int i = 0; i < S / 4; ++i)
+ {
+ carry_addition = wrapper::vpadd(carry_addition, carry_addition);
+ }
auto res = wrapper::vgetlane(carry_addition, 0);
if(op == ReductionOperation::MEAN_SUM)