aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2023-02-09 11:52:06 +0000
committerJakub Sujak <jakub.sujak@arm.com>2023-02-15 16:30:43 +0000
commit470cc5dea721716a93fded4d98642f6e9150c69b (patch)
tree7fabcb8c01679e2bdbecc96d41758c9588824e9c
parent6267974c98a03476808030fb88a63eea3eca51e3 (diff)
downloadComputeLibrary-branches/arm_compute_23_02.tar.gz
Fix Intermittent Neon™ ReduceMean QASYMM8 Mismatchv23.02branches/arm_compute_23_02
- Dividing scale by number of elements causes accuracy loss due to limitations in float datatype and truncation to int - Adds rounding after division on aarch64 to negate this. Resolves: [COMPMID-5839] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: I54ef0f7e56f39da1fa5f30378f551b5ca419a61d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/492456 Tested-by: bsgcomp <bsgcomp@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9110 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp28
-rw-r--r--tests/validation/fixtures/ReduceMeanFixture.h8
-rw-r--r--tests/validation/reference/ReductionOperation.cpp54
-rw-r--r--tests/validation/reference/ReductionOperation.h3
4 files changed, 59 insertions, 34 deletions
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index e0f43ab176..19955af493 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -625,13 +625,13 @@ struct RedOpX_quantized
Iterator input(in, in_win_no_pad);
Iterator output(out, out_window);
- const float in_offset = static_cast<float>(iq_info.offset);
+ const auto in_offset = static_cast<float>(iq_info.offset);
const float in_scale = iq_info.scale;
- const float out_offset = static_cast<float>(oq_info.offset);
+ const auto out_offset = static_cast<float>(oq_info.offset);
const float out_scale = oq_info.scale;
- const float num_elements = static_cast<float>(in_info.dimension(0));
+ const auto num_elements = static_cast<float>(in_info.dimension(0));
const float A = in_scale / (out_scale * num_elements);
const float B = out_offset - (in_scale * in_offset) / (out_scale);
@@ -1382,10 +1382,17 @@ struct RedOpYZW_quantized
vec_res_value3_f = wrapper::vmla(vec_B, wrapper::vcvt<float>(vec_res_value3), vec_A);
vec_res_value4_f = wrapper::vmla(vec_B, wrapper::vcvt<float>(vec_res_value4), vec_A);
- vec_res_value1 = wrapper::vcvt<T>(vec_res_value1_f);
- vec_res_value2 = wrapper::vcvt<T>(vec_res_value2_f);
- vec_res_value3 = wrapper::vcvt<T>(vec_res_value3_f);
- vec_res_value4 = wrapper::vcvt<T>(vec_res_value4_f);
+#ifdef __aarch64__
+ vec_res_value1 = wrapper::vcvta<PromotedType>(vec_res_value1_f);
+ vec_res_value2 = wrapper::vcvta<PromotedType>(vec_res_value2_f);
+ vec_res_value3 = wrapper::vcvta<PromotedType>(vec_res_value3_f);
+ vec_res_value4 = wrapper::vcvta<PromotedType>(vec_res_value4_f);
+#else // defined(__aarch64__)
+ vec_res_value1 = wrapper::vcvt<PromotedType>(vec_res_value1_f);
+ vec_res_value2 = wrapper::vcvt<PromotedType>(vec_res_value2_f);
+ vec_res_value3 = wrapper::vcvt<PromotedType>(vec_res_value3_f);
+ vec_res_value4 = wrapper::vcvt<PromotedType>(vec_res_value4_f);
+#endif // __aarch64__
const auto temp16x8t_1 = wrapper::vcombine(wrapper::vqmovn(vec_res_value1), wrapper::vqmovn(vec_res_value2));
const auto temp16x8t_2 = wrapper::vcombine(wrapper::vqmovn(vec_res_value3), wrapper::vqmovn(vec_res_value4));
@@ -1521,7 +1528,12 @@ struct RedOpYZW_quantized
{
case ReductionOperation::MEAN_SUM:
{
+ // Apply previously calculated coefficients (with rounding on aarch64)
+#ifdef __aarch64__
+ const int32_t res = arm_compute::support::cpp11::round(A * (static_cast<float>(res_value_q)) + B);
+#else // defined(__aarch64__)
const int32_t res = A * (static_cast<float>(res_value_q)) + B;
+#endif // __aarch64__
*reinterpret_cast<T *>(output.ptr() + x) = utils::cast::saturate_cast<T>(res);
break;
}
diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h
index 304630e9f5..13354eedc1 100644
--- a/tests/validation/fixtures/ReduceMeanFixture.h
+++ b/tests/validation/fixtures/ReduceMeanFixture.h
@@ -124,7 +124,13 @@ protected:
{
TensorShape output_shape = i == 0 ? src_shape : out.shape();
output_shape.set(axis[i], 1);
- out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, quantization_info_output);
+ bool is_opencl = false;
+
+#ifdef ARM_COMPUTE_OPENCL_ENABLED
+ is_opencl = std::is_same<CLTensor, TensorType>::value; // Round down to zero on opencl to match kernel
+#endif /* ARM_COMPUTE_OPENCL_ENABLED */
+ out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, quantization_info_output, is_opencl ? RoundingPolicy::TO_ZERO : RoundingPolicy::TO_NEAREST_UP);
+
}
if(!keep_dims)
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index ffb79f86c5..e2890afb9f 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,6 @@
* SOFTWARE.
*/
#include "ReductionOperation.h"
-
#include "tests/validation/Helpers.h"
#include <algorithm>
@@ -39,7 +38,7 @@ namespace reference
namespace
{
template <typename T, typename OT>
-OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
+OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy)
{
using type = typename std::remove_cv<OT>::type;
T res;
@@ -99,7 +98,14 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
}
if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
{
- int_res /= reduce_elements;
+ // Only use rounding in aarch64 to be consistent with kernel
+#ifdef __aarch64__
+ // Divide in float format, then rounded to nearest and implicitly cast back to int
+ int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy);
+#else // defined(__aarch64__)
+ ARM_COMPUTE_UNUSED(policy);
+ int_res /= reduce_elements; // Legacy compatibility
+#endif // __aarch64
}
res = static_cast<type>(int_res);
}
@@ -175,7 +181,7 @@ OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOper
} // namespace
template <typename T, typename OT>
-SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, RoundingPolicy policy)
{
// Create reference
const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
@@ -197,7 +203,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + du * reduce_elems;
dst[du] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
}
}
break;
@@ -213,7 +219,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
}
}
}
@@ -232,7 +238,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
}
}
}
@@ -254,7 +260,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
}
}
}
@@ -269,21 +275,21 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
}
template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
ARM_COMPUTE_UNUSED(quantization_info_output);
- return compute_reduction_operation<T, OT>(src, dst_shape, axis, op);
+ return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, policy);
}
template <>
-SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8)
{
// If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
}
else
{
@@ -294,19 +300,19 @@ SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, cons
}
else
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
}
}
template <>
-SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8_SIGNED)
{
// If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
}
else
{
@@ -317,25 +323,25 @@ SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const
}
else
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
}
}
template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
index 9c9e721b29..dd97778834 100644
--- a/tests/validation/reference/ReductionOperation.h
+++ b/tests/validation/reference/ReductionOperation.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_TEST_REDUCTION_OPERATION_H
#include "tests/SimpleTensor.h"
+#include "arm_compute/core/Rounding.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -37,7 +38,7 @@ namespace reference
{
template <typename T, typename OT>
SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
} // namespace test