From 9637b2e4fc33b2264aa5586dd6b2ed1045db5075 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 23 Sep 2019 16:49:49 +0100 Subject: COMPMID-2671: Change ArgMinMax NEON/CL output type to Signed32 Change-Id: I718f3884928271c5b0afb259d5bfe9df284f18e6 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/1995 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CL/kernels/CLReductionOperationKernel.h | 12 +++++++++--- arm_compute/core/NEON/kernels/NEReductionOperationKernel.h | 14 ++++++++++---- arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h | 14 ++++++++++---- arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h | 13 +++++++++---- src/core/CL/kernels/CLReductionOperationKernel.cpp | 7 ++++--- src/core/NEON/kernels/NEReductionOperationKernel.cpp | 2 +- 6 files changed, 43 insertions(+), 19 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h index aba11e1ad1..18a1bab2dc 100644 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h @@ -31,7 +31,13 @@ namespace arm_compute { class ICLTensor; -/** Interface for the reduction operation kernel */ +/** Interface for the reduction operation kernel + * + * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned + * 32-bit (U32). It is the user's responsibility to check that the + * results do not overflow in case the output data type is set to signed + * 32-bit integer (S32). + */ class CLReductionOperationKernel : public ICLKernel { public: @@ -51,7 +57,7 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: QASYMM8/S32/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input, U32/S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 * @param[in] op Reduction operation to perform. @@ -62,7 +68,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. * * @param[in] input Source tensor info. Data types supported: QASYMM8/S32/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input, U32/S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 * @param[in] op Reduction operation to perform. diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h index a4cb330445..4b28b8dbcd 100644 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,13 @@ namespace arm_compute { class ITensor; -/** NEON kernel to perform a reduction operation */ +/** NEON kernel to perform a reduction operation + * + * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned + * 32-bit (U32). It is the user's responsibility to check that the + * results do not overflow in case the output data type is set to signed + * 32-bit integer (S32). + */ class NEReductionOperationKernel : public INEKernel { public: @@ -54,7 +60,7 @@ public: /** Set the source, destination of the kernel * * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. - * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input. + * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -64,7 +70,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. * * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. - * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input. + * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index b3a85948a8..2384ebcd37 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,13 @@ namespace arm_compute { class ICLTensor; -/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis. */ +/** Function to calculate the index of the minimum or maximum values in a + * tensor based on an axis. + * + * @note The indices are computed in unsigned 32-bit (U32). It is the user's + * responsibility to check that the results do not overflow in case the + * output data type is set to signed 32-bit integer (S32). + */ class CLArgMinMaxLayer : public ICLSimpleFunction { public: @@ -40,7 +46,7 @@ public: * * @param[in] input Input source tensor. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. - * @param[out] output Output source tensor. Data types supported: U32. + * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max */ void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); @@ -48,7 +54,7 @@ public: * * @param[in] input Input source tensor info. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. - * @param[in] output Output source tensor info. Data types supported: U32. + * @param[in] output Output source tensor info. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 55b39e45ec..85bf7d92c9 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -32,14 +32,19 @@ namespace arm_compute { -class IsTensor; +class ITensor; -/** Function to calculate the index of the minimum or maximum values in a tensor based on an axis. +/** Function to calculate the index of the minimum or maximum values in a + * tensor based on an axis. + * * This function calls the following NEON kernels: * * -# @ref NEReductionOperationKernel * -# @ref NEFillBorderKernel * + * @note The indices are computed in unsigned 32-bit (U32). It is the user's + * responsibility to check that the results do not overflow in case the + * output data type is set to signed 32-bit integer (S32). */ class NEArgMinMaxLayer : public IFunction { @@ -50,7 +55,7 @@ public: * * @param[in] input Input source tensor. Data types supported: QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. - * @param[out] output Output source tensor. Data types supported: U32. + * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max */ void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op); @@ -58,7 +63,7 @@ public: * * @param[in] input Input source tensor info. Data types supported: QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. - * @param[in] output Output source tensor info. Data types supported: U32. + * @param[in] output Output source tensor info. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max * * @return a status diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index ee33a72792..8e92b591d1 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -36,8 +36,8 @@ #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { // OpenCL kernel requires input width to be a power of 2 for x-axis. @@ -65,7 +65,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u if(op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8, "Not supported operation for QASYMM8"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32); } else { @@ -376,3 +376,4 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR("Not supported"); } } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index ffa4fa3565..85abda598d 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -1186,7 +1186,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u } else { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32); } const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis); -- cgit v1.2.1