From c5b6d8822da0fae1807dda46674f68d00db8e321 Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Fri, 26 Jun 2020 14:46:59 +0100 Subject: COMPMID-3562: Support QASYMM8_SIGNED in CLArgMinMaxLayerKernel Signed-off-by: Sheri Zhang Change-Id: I6c6efde06f000834b0b770889e3eb5ee0d14b027 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3476 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- .../core/CL/kernels/CLArgMinMaxLayerKernel.h | 6 +++--- .../runtime/CL/functions/CLArgMinMaxLayer.h | 6 +++--- docs/00_introduction.dox | 3 +++ src/core/CL/cl_kernels/arg_min_max.cl | 10 ++++----- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 2 +- src/runtime/CL/functions/CLArgMinMaxLayer.cpp | 3 +++ tests/validation/CL/ArgMinMax.cpp | 25 ++++++++++++++++++++++ 7 files changed, 43 insertions(+), 12 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h index 94e8baed13..099238fa75 100644 --- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h @@ -56,7 +56,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: S32/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 * Has to be nullptr for the first iteration * @param[out] output Destination tensor. Data types supported: U32/S32 @@ -68,7 +68,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S32/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 * Has to be nullptr for the first iteration * @param[out] output Destination tensor. Data types supported: U32/S32 @@ -80,7 +80,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. * - * @param[in] input Source tensor info. Data types supported: S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32 * Has to be nullptr for the first iteration * @param[in] output Destination tensor info. Data types supported: U32/S32 diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index b0d29bcefe..997bb79a47 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -55,7 +55,7 @@ public: CLArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN @@ -64,7 +64,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN @@ -72,7 +72,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer * - * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Input source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[in] output Output source tensor info. Data types supported: U32/S32. * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 8387774ef6..969bc45379 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -246,6 +246,9 @@ v20.08 Public major release "axis" has been renamed to "reduce_end_axis", which is the last axis (inclusive) before which all dimensions are reduced/collapsed. The default "axis" (now "reduce_end_axis") value for @ref NESoftmaxLayer and @ref NELogSoftmaxLayer is changed from -1 to 0. The default "axis" (now "reduce_end_axis") value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer is changed from 1 to 0. + - Added new data type QASYMM8_SIGNED support for: + - @ref CLArgMinMaxLayer + - @ref CLArgMinMaxLayerKernel v20.05 Public major release - Various bug fixes. diff --git a/src/core/CL/cl_kernels/arg_min_max.cl b/src/core/CL/cl_kernels/arg_min_max.cl index 104d30d8f3..6c90f555f6 100644 --- a/src/core/CL/cl_kernels/arg_min_max.cl +++ b/src/core/CL/cl_kernels/arg_min_max.cl @@ -193,7 +193,7 @@ inline DATA_TYPE_OUTPUT arg_idx_max(__global const DATA_TYPE *input, const int x * @note The arg_max flag must be passed at compile time using -DARG_MAX if we want to compute the ArgMax * @note The arg_min flag must be passed at compile time using -DARG_MIN if we want to compute the ArgMin * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -262,7 +262,7 @@ __kernel void arg_min_max_x( // Perform parallel reduction for(unsigned int i = middle; i > 0; i >>= 1) { - if( lid < i && lid + i < lsize) + if(lid < i && lid + i < lsize) { DATA_TYPE tmp0 = *(src_in_row + local_results[lid]); DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]); @@ -297,7 +297,7 @@ __kernel void arg_min_max_x( * @note The data type of the select results must be passed at compile time using -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128 * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -345,7 +345,7 @@ __kernel void arg_min_max_y( * @note The data type of the select results must be passed at compile time using -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -398,7 +398,7 @@ __kernel void arg_min_max_z( * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index b86e43e6fb..269e2743d7 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -46,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *prev_outp { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index 5b4c694f33..cb2b290adf 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" +#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" @@ -41,6 +42,8 @@ CLArgMinMaxLayer::CLArgMinMaxLayer(std::shared_ptr memory_manage Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid reduction operation"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast(TensorShape::num_max_dimensions), "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index e5decb86d3..17305f9730 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -182,6 +182,31 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference); } TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, + CLArgMinMaxQuantizedValidationFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + CLArgMinMaxQuantizedValidationFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_SIGNED + TEST_SUITE_END() // Quantized TEST_SUITE_END() // ArgMinMax TEST_SUITE_END() // CL -- cgit v1.2.1