aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-04-30 12:37:04 +0100
committerManuel Bottini <manuel.bottini@arm.com>2021-05-04 12:37:30 +0000
commit6a5eee7f267290a4894639aa349c8d82c231812a (patch)
tree3ab2ca8e733fc179ea5fc5b6a568b71c349db035
parent0dc0d8eda87a01c11f9caabc0d2f2933737ba469 (diff)
downloadComputeLibrary-6a5eee7f267290a4894639aa349c8d82c231812a.tar.gz
NEReduceMean failed on v8.2 debug build for Android
vpadd is not correctly converted by some compilers in debug. Therefore we opted for a serial computation of the elements in the result vector for debug builds Resolves: COMPMID-4420 Change-Id: I2d32af8568852a419226a409e3849d08e4e649c7 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5536 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp26
1 files changed, 19 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 2bbd9452f2..476391e1f7 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -390,10 +390,10 @@ struct RedOpX
inline void operator()(const Window &in_window, Window &out_window, const ITensor *in, ITensor *out, const ReductionOperation op)
{
- const TensorInfo in_info = *(in->info());
- const int window_step_x = 16 / sizeof(T);
- const auto window_start_x = static_cast<int>(in_window.x().start());
- const auto window_end_x = static_cast<int>(in_window.x().end());
+ const size_t input_dim_0 = in->info()->dimension(0);
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(in_window.x().start());
+ const auto window_end_x = static_cast<int>(in_window.x().end());
Window in_win_no_pad = in_window;
in_win_no_pad.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -479,13 +479,20 @@ struct RedOpX
case ReductionOperation::MEAN_SUM:
case ReductionOperation::SUM_SQUARE:
{
+#ifdef ARM_COMPUTE_DEBUG_ENABLED
+ auto res = static_cast<T>(0.f);
+ for(int i = 0; i < S; ++i)
+ {
+ res += wrapper::vgetlane(vec_res_value, i);
+ }
+#else // ARM_COMPUTE_DEBUG_ENABLED
auto carry_res = wrapper::vpadd(wrapper::vgethigh(vec_res_value), wrapper::vgetlow(vec_res_value));
for(int i = 0; i < S / 4; ++i)
{
carry_res = wrapper::vpadd(carry_res, carry_res);
}
auto res = wrapper::vgetlane(carry_res, 0);
-
+#endif // ARM_COMPUTE_DEBUG_ENABLED
if(op == ReductionOperation::SUM_SQUARE)
{
// Compute left-over elements
@@ -505,7 +512,7 @@ struct RedOpX
if(op == ReductionOperation::MEAN_SUM)
{
- res /= in_info.dimension(0);
+ res /= input_dim_0;
}
*(reinterpret_cast<T *>(output.ptr())) = res;
@@ -813,10 +820,14 @@ struct RedOpX_quantized
carry_res = wrapper::vadd(carry_res, vec_res_value3);
carry_res = wrapper::vadd(carry_res, vec_res_value4);
+#ifdef ARM_COMPUTE_DEBUG_ENABLED
+ const float res_f = wrapper::vgetlane(carry_res, 0) + wrapper::vgetlane(carry_res, 1) + wrapper::vgetlane(carry_res, 2) + wrapper::vgetlane(carry_res, 3);
+ auto res = static_cast<int32_t>(res_f);
+#else // ARM_COMPUTE_DEBUG_ENABLED
auto carry_paddition = wrapper::vpadd(wrapper::vgethigh(carry_res), wrapper::vgetlow(carry_res));
carry_paddition = wrapper::vpadd(carry_paddition, carry_paddition);
auto res = static_cast<int32_t>(wrapper::vgetlane(carry_paddition, 0));
-
+#endif // ARM_COMPUTE_DEBUG_ENABLED
// Compute left-over elements
for(; x < window_end_x; ++x)
{
@@ -1575,6 +1586,7 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi
default:
ARM_COMPUTE_ERROR("Not supported");
}
+ return;
}
switch(axis)