From eaa01ab593428bc7267ebbe107b2d813a11b64b5 Mon Sep 17 00:00:00 2001
From: Sang-Hoon Park <sang-hoon.park@arm.com>
Date: Mon, 11 Nov 2019 17:33:28 +0000
Subject: COMPMID-2671 use Signed32 for default output data type of ArgMinMax

Signed32 is used as data types before and after reshaping of ArgMinMax.

Change-Id: I230af43a931d4e106de6c72f716ced1dab511084
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2262
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
---
 arm_compute/core/CL/kernels/CLReductionOperationKernel.h   |  8 ++++----
 arm_compute/core/NEON/kernels/NEReductionOperationKernel.h |  8 ++++----
 arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h        |  7 ++++---
 arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h      |  7 ++++---
 src/core/CL/kernels/CLReductionOperationKernel.cpp         |  2 +-
 src/core/NEON/kernels/NEReductionOperationKernel.cpp       |  2 +-
 src/runtime/CL/functions/CLReductionOperation.cpp          |  6 +++---
 src/runtime/NEON/functions/NEReductionOperation.cpp        |  4 ++--
 tests/validation/CL/ArgMinMax.cpp                          |  2 +-
 tests/validation/NEON/ArgMinMax.cpp                        |  2 +-
 tests/validation/fixtures/ArgMinMaxFixture.h               |  8 ++++----
 tests/validation/reference/ReductionOperation.cpp          | 10 +++++-----
 12 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 18a1bab2dc..172ed8985a 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -33,10 +33,10 @@ class ICLTensor;
 
 /** Interface for the reduction operation kernel
  *
- * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
- *       32-bit (U32). It is the user's responsibility to check that the
- *       results do not overflow in case the output data type is set to signed
- *       32-bit integer (S32).
+ * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
+ *       output tensor is signed 32-bit integer (S32). It is the user's responsibility
+ *       to check that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class CLReductionOperationKernel : public ICLKernel
 {
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
index 4b28b8dbcd..0d402fb044 100644
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
@@ -32,10 +32,10 @@ class ITensor;
 
 /** NEON kernel to perform a reduction operation
  *
- * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
- *       32-bit (U32). It is the user's responsibility to check that the
- *       results do not overflow in case the output data type is set to signed
- *       32-bit integer (S32).
+ * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
+ *       output tensor is signed 32-bit integer (S32). It is the user's responsibility
+ *       to check that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class NEReductionOperationKernel : public INEKernel
 {
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index 28feee09ab..1b465a4866 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -38,9 +38,10 @@ class CLReductionOperation;
 /** Function to calculate the index of the minimum or maximum values in a
  *  tensor based on an axis.
  *
- * @note The indices are computed in unsigned 32-bit (U32). It is the user's
- *       responsibility to check that the results do not overflow in case the
- *       output data type is set to signed 32-bit integer (S32).
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class CLArgMinMaxLayer : public IFunction
 {
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index d2ddff2665..ca6794bf82 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -42,9 +42,10 @@ class ITensor;
  * -# @ref NEReductionOperationKernel
  * -# @ref NEFillBorderKernel
  *
- * @note The indices are computed in unsigned 32-bit (U32). It is the user's
- *       responsibility to check that the results do not overflow in case the
- *       output data type is set to signed 32-bit integer (S32).
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class NEArgMinMaxLayer : public IFunction
 {
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index a085ab1683..cbf3923243 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -83,7 +83,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
     // Output tensor auto initialization if not yet initialized
     const bool        is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
     const TensorShape output_shape     = arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, !is_arg_min_max);
-    const DataType    output_data_type = is_arg_min_max ? DataType::U32 : input->data_type();
+    const DataType    output_data_type = is_arg_min_max ? DataType::S32 : input->data_type();
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
 
     const unsigned int num_elems_processed_per_iteration = (is_data_type_quantized(input->data_type()) && (axis == 0)) ? 1 : 16;
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 85abda598d..a2ce0de38b 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -1204,7 +1204,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 
     // Output auto initialization if not yet initialized
     const bool is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
-    DataType   output_data_type = is_arg_min_max ? DataType::U32 : input->data_type();
+    DataType   output_data_type = is_arg_min_max ? DataType::S32 : input->data_type();
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
 
     unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 447c15b1e8..3aa5a813b6 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -86,7 +86,7 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf
     const auto input_data_type    = input->data_type();
     const auto input_num_channles = input->num_channels();
     const auto input_qinfo        = input->quantization_info();
-    const auto output_data_type   = is_arg_min_max ? DataType::U32 : output->data_type();
+    const auto output_data_type   = is_arg_min_max ? DataType::S32 : output->data_type();
 
     auto initialize_tensorinfo = [](TensorInfo & ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo)
     {
@@ -208,7 +208,7 @@ ICLTensor *CLReductionOperation::configure_intermediate_result_vector(ICLTensor
 
     if(is_arg_min_max)
     {
-        _results_vector.back().info()->set_data_type(DataType::U32).set_is_resizable(true).reset_padding();
+        _results_vector.back().info()->set_data_type(DataType::S32).set_is_resizable(true).reset_padding();
     }
 
     return _is_reshape_required ? &_results_vector.back() : output;
@@ -229,7 +229,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
     if(_is_reshape_required)
     {
         const TensorShape output_shape     = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
-        const auto        output_data_type = is_arg_min_max ? DataType::U32 : input->info()->data_type();
+        const auto        output_data_type = is_arg_min_max ? DataType::S32 : input->info()->data_type();
         auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
     }
 
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 09cd765d4b..397fe21cb9 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -80,7 +80,7 @@ Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInf
         const auto input_num_channles = input->num_channels();
         const auto input_qinfo        = input->quantization_info();
         const auto is_arg_min_max     = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN);
-        const auto output_data_type   = is_arg_min_max ? DataType::U32 : output->data_type();
+        const auto output_data_type   = is_arg_min_max ? DataType::S32 : output->data_type();
 
         info_before_reshape.set_data_type(output_data_type).set_tensor_shape(shape_before_reshape).set_num_channels(input_num_channles).set_quantization_info(input_qinfo);
 
@@ -110,7 +110,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i
     {
         const auto output_internal_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis);
         const auto output_external_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
-        const auto output_data_type      = is_arg_min_max ? DataType::U32 : input->info()->data_type();
+        const auto output_data_type      = is_arg_min_max ? DataType::S32 : input->info()->data_type();
         const auto num_channels          = input->info()->num_channels();
         const auto qinfo                 = input->info()->quantization_info();
 
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 845fdbf493..5b2e6f34c6 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -56,7 +56,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
         }),
         framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),
-                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32),
+                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::U32)
         })),
diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp
index 642a69ba5f..174cb19b42 100644
--- a/tests/validation/NEON/ArgMinMax.cpp
+++ b/tests/validation/NEON/ArgMinMax.cpp
@@ -56,7 +56,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
         }),
         framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32),
-                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32),
+                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::F32)
         })),
         framework::dataset::make("Axis", { 4, 0, 2, 0 })),
diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h
index f8fe4ff1ee..a4d03fba02 100644
--- a/tests/validation/fixtures/ArgMinMaxFixture.h
+++ b/tests/validation/fixtures/ArgMinMaxFixture.h
@@ -114,7 +114,7 @@ protected:
         return dst;
     }
 
-    SimpleTensor<uint32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
+    SimpleTensor<int32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
     {
         // Create reference
         SimpleTensor<T> src{ src_shape, data_type, 1, q_info };
@@ -123,11 +123,11 @@ protected:
         fill(src);
 
         TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false);
-        return reference::reduction_operation<T, uint32_t>(src, output_shape, axis, op);
+        return reference::reduction_operation<T, int32_t>(src, output_shape, axis, op);
     }
 
-    TensorType             _target{};
-    SimpleTensor<uint32_t> _reference{};
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index 965365db9d..330a3b82ec 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -179,7 +179,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
 {
     // Create reference
     const bool         is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
-    DataType           output_data_type = is_arg_min_max ? DataType::U32 : src.data_type();
+    DataType           output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
     SimpleTensor<OT>   dst{ dst_shape, output_data_type, 1, src.quantization_info() };
     const unsigned int src_width    = src.shape().x();
     const unsigned int src_height   = src.shape().y();
@@ -292,10 +292,10 @@ SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, cons
 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
 } // namespace reference
 } // namespace validation
-- 
cgit v1.2.1