From 6a5eee7f267290a4894639aa349c8d82c231812a Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Fri, 30 Apr 2021 12:37:04 +0100 Subject: NEReduceMean failed on v8.2 debug build for Android vpadd is not correctly converted by some compilers in debug. Therefore we opted for a serial computation of the elements in the result vector for debug builds Resolves: COMPMID-4420 Change-Id: I2d32af8568852a419226a409e3849d08e4e649c7 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5536 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/NEReductionOperationKernel.cpp | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 2bbd9452f2..476391e1f7 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -390,10 +390,10 @@ struct RedOpX inline void operator()(const Window &in_window, Window &out_window, const ITensor *in, ITensor *out, const ReductionOperation op) { - const TensorInfo in_info = *(in->info()); - const int window_step_x = 16 / sizeof(T); - const auto window_start_x = static_cast(in_window.x().start()); - const auto window_end_x = static_cast(in_window.x().end()); + const size_t input_dim_0 = in->info()->dimension(0); + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast(in_window.x().start()); + const auto window_end_x = static_cast(in_window.x().end()); Window in_win_no_pad = in_window; in_win_no_pad.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -479,13 +479,20 @@ struct RedOpX case ReductionOperation::MEAN_SUM: case ReductionOperation::SUM_SQUARE: { +#ifdef ARM_COMPUTE_DEBUG_ENABLED + auto res = static_cast(0.f); + for(int i = 0; i < S; ++i) + { + res += wrapper::vgetlane(vec_res_value, i); + } +#else // ARM_COMPUTE_DEBUG_ENABLED auto carry_res = wrapper::vpadd(wrapper::vgethigh(vec_res_value), wrapper::vgetlow(vec_res_value)); for(int i = 0; i < S / 4; ++i) { carry_res = wrapper::vpadd(carry_res, carry_res); } auto res = wrapper::vgetlane(carry_res, 0); - +#endif // ARM_COMPUTE_DEBUG_ENABLED if(op == ReductionOperation::SUM_SQUARE) { // Compute left-over elements @@ -505,7 +512,7 @@ struct RedOpX if(op == ReductionOperation::MEAN_SUM) { - res /= in_info.dimension(0); + res /= input_dim_0; } *(reinterpret_cast(output.ptr())) = res; @@ -813,10 +820,14 @@ struct RedOpX_quantized carry_res = wrapper::vadd(carry_res, vec_res_value3); carry_res = wrapper::vadd(carry_res, vec_res_value4); +#ifdef ARM_COMPUTE_DEBUG_ENABLED + const float res_f = wrapper::vgetlane(carry_res, 0) + wrapper::vgetlane(carry_res, 1) + wrapper::vgetlane(carry_res, 2) + wrapper::vgetlane(carry_res, 3); + auto res = static_cast(res_f); +#else // ARM_COMPUTE_DEBUG_ENABLED auto carry_paddition = wrapper::vpadd(wrapper::vgethigh(carry_res), wrapper::vgetlow(carry_res)); carry_paddition = wrapper::vpadd(carry_paddition, carry_paddition); auto res = static_cast(wrapper::vgetlane(carry_paddition, 0)); - +#endif // ARM_COMPUTE_DEBUG_ENABLED // Compute left-over elements for(; x < window_end_x; ++x) { @@ -1575,6 +1586,7 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi default: ARM_COMPUTE_ERROR("Not supported"); } + return; } switch(axis) -- cgit v1.2.1