From 9637b2e4fc33b2264aa5586dd6b2ed1045db5075 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Mon, 23 Sep 2019 16:49:49 +0100
Subject: COMPMID-2671: Change ArgMinMax NEON/CL output type to Signed32

Change-Id: I718f3884928271c5b0afb259d5bfe9df284f18e6
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1995
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CL/kernels/CLReductionOperationKernel.h   | 12 +++++++++---
 arm_compute/core/NEON/kernels/NEReductionOperationKernel.h | 14 ++++++++++----
 arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h        | 14 ++++++++++----
 arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h      | 13 +++++++++----
 src/core/CL/kernels/CLReductionOperationKernel.cpp         |  7 ++++---
 src/core/NEON/kernels/NEReductionOperationKernel.cpp       |  2 +-
 6 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index aba11e1ad1..18a1bab2dc 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -31,7 +31,13 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** Interface for the reduction operation kernel */
+/** Interface for the reduction operation kernel
+ *
+ * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
+ *       32-bit (U32). It is the user's responsibility to check that the
+ *       results do not overflow in case the output data type is set to signed
+ *       32-bit integer (S32).
+ */
 class CLReductionOperationKernel : public ICLKernel
 {
 public:
@@ -51,7 +57,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/S32/F16/F32.
-     * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input, U32/S32 for ARG_MIX/ARG_MAX.
      *                    Output will have the same number of dimensions as input.
      * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0,1,2,3
      * @param[in]  op     Reduction operation to perform.
@@ -62,7 +68,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
      *
      * @param[in] input  Source tensor info. Data types supported: QASYMM8/S32/F16/F32.
-     * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
+     * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input, U32/S32 for ARG_MIX/ARG_MAX.
      *                   Output will have the same number of dimensions as input.
      * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0,1,2,3
      * @param[in] op     Reduction operation to perform.
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
index a4cb330445..4b28b8dbcd 100644
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,13 @@ namespace arm_compute
 {
 class ITensor;
 
-/** NEON kernel to perform a reduction operation */
+/** NEON kernel to perform a reduction operation
+ *
+ * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
+ *       32-bit (U32). It is the user's responsibility to check that the
+ *       results do not overflow in case the output data type is set to signed
+ *       32-bit integer (S32).
+ */
 class NEReductionOperationKernel : public INEKernel
 {
 public:
@@ -54,7 +60,7 @@ public:
     /** Set the source, destination of the kernel
      *
      * @param[in]  input  Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW.
-     * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input.
+     * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
      *                    Output will have the same number of dimensions as input.
      * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0
      * @param[in]  op     Reduction operation to perform.
@@ -64,7 +70,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
      *
      * @param[in] input  Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW.
-     * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input.
+     * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
      *                   Output will have the same number of dimensions as input.
      * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0
      * @param[in] op     Reduction operation to perform.
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index b3a85948a8..2384ebcd37 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,7 +32,13 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis. */
+/** Function to calculate the index of the minimum or maximum values in a
+ *  tensor based on an axis.
+ *
+ * @note The indices are computed in unsigned 32-bit (U32). It is the user's
+ *       responsibility to check that the results do not overflow in case the
+ *       output data type is set to signed 32-bit integer (S32).
+ */
 class CLArgMinMaxLayer : public ICLSimpleFunction
 {
 public:
@@ -40,7 +46,7 @@ public:
      *
      * @param[in]  input  Input source tensor. Data types supported: F16/F32.
      * @param[in]  axis   Axis to find max/min index.
-     * @param[out] output Output source tensor. Data types supported: U32.
+     * @param[out] output Output source tensor. Data types supported: U32/S32.
      * @param[in]  op     Operation to perform: min or max
      */
     void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
@@ -48,7 +54,7 @@ public:
      *
      * @param[in] input  Input source tensor info. Data types supported: F16/F32.
      * @param[in] axis   Axis to find max/min index.
-     * @param[in] output Output source tensor info. Data types supported: U32.
+     * @param[in] output Output source tensor info. Data types supported: U32/S32.
      * @param[in] op     Operation to perform: min or max
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 55b39e45ec..85bf7d92c9 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -32,14 +32,19 @@
 
 namespace arm_compute
 {
-class IsTensor;
+class ITensor;
 
-/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+/** Function to calculate the index of the minimum or maximum values in a
+ *  tensor based on an axis.
+ *
  *  This function calls the following NEON kernels:
  *
  * -# @ref NEReductionOperationKernel
  * -# @ref NEFillBorderKernel
  *
+ * @note The indices are computed in unsigned 32-bit (U32). It is the user's
+ *       responsibility to check that the results do not overflow in case the
+ *       output data type is set to signed 32-bit integer (S32).
  */
 class NEArgMinMaxLayer : public IFunction
 {
@@ -50,7 +55,7 @@ public:
      *
      * @param[in]  input  Input source tensor. Data types supported: QASYMM8/S32/F16/F32.
      * @param[in]  axis   Axis to find max/min index.
-     * @param[out] output Output source tensor. Data types supported: U32.
+     * @param[out] output Output source tensor. Data types supported: U32/S32.
      * @param[in]  op     Operation to perform: min or max
      */
     void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op);
@@ -58,7 +63,7 @@ public:
      *
      * @param[in] input  Input source tensor info. Data types supported: QASYMM8/S32/F16/F32.
      * @param[in] axis   Axis to find max/min index.
-     * @param[in] output Output source tensor info. Data types supported: U32.
+     * @param[in] output Output source tensor info. Data types supported: U32/S32.
      * @param[in] op     Operation to perform: min or max
      *
      * @return a status
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index ee33a72792..8e92b591d1 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -36,8 +36,8 @@
 
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 // OpenCL kernel requires input width to be a power of 2 for x-axis.
@@ -65,7 +65,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
         if(op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN)
         {
             ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8, "Not supported operation for QASYMM8");
-            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32);
+            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32);
         }
         else
         {
@@ -376,3 +376,4 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
             ARM_COMPUTE_ERROR("Not supported");
     }
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index ffa4fa3565..85abda598d 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -1186,7 +1186,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
         }
         else
         {
-            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32);
+            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32);
         }
 
         const TensorShape output_shape         = arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis);
-- 
cgit v1.2.1