diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2020-08-28 13:25:31 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2020-08-28 14:51:30 +0000 |
commit | 1c76c1ddcd1294ee8149bd74ecf6f62963408286 (patch) | |
tree | d6633cf8b331d0edeed9247f8d805d8bcf2019b9 | |
parent | 5bdde8509542e942e908e9d508dd39c73194abfb (diff) | |
download | ComputeLibrary-1c76c1ddcd1294ee8149bd74ecf6f62963408286.tar.gz |
COMPMID-3504: Add support for BOOL in NEON comparison operators
Change-Id: I81b0c2482bc20b1ab5124ed6179bb94cbced7875
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3869
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEElementwiseOperations.h | 12 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | 41 | ||||
-rw-r--r-- | tests/validation/NEON/Comparisons.cpp | 20 |
3 files changed, 57 insertions, 16 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 7d9dac761f..66d8db5d0d 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -270,7 +270,7 @@ private: /** Basic function to run @ref NEComparisonOperationKernel. * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ class NEElementwiseComparison : public IFunction @@ -290,7 +290,7 @@ public: NEElementwiseComparison &operator=(NEElementwiseComparison &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U8. * @param[in] op Comparison Operation to be performed. @@ -298,7 +298,7 @@ public: void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op); /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U8. * @param[in] op Comparison Operation to be performed. @@ -317,7 +317,7 @@ private: /** Basic function to run @ref NEComparisonOperationKernel * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ template <ComparisonOperation op> @@ -338,14 +338,14 @@ public: NEElementwiseComparisonStatic &operator=(NEElementwiseComparisonStatic &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U16/U32. */ void configure(ITensor *input1, ITensor *input2, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U16/U32. * diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 014a564bf1..db4f5923bc 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -446,6 +446,21 @@ inline int elementwise_arithm_op_quantized_signed_broadcast_loop(int window_star } template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> +inline int elementwise_comp_op_8_loop(int window_start_x, int window_end_x, int window_step_x, + const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr) +{ + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto a = wrapper::vloadq(input1_ptr + x); + const auto b = wrapper::vloadq(input2_ptr + x); + const auto res = elementwise_comp_op<op, InputVectorType, uint8x16_t>(a, b); + wrapper::vstore(output_ptr + x, res); + } + return x; +} + +template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> inline int elementwise_comp_op_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr) { @@ -526,6 +541,19 @@ inline int elementwise_comp_op_quantized_signed_loop(int window_start_x, int win } template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> +inline int elementwise_comp_op_broadcast_8_loop(int window_start_x, int window_end_x, int window_step_x, + const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder) +{ + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint8x16_t>(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder); + wrapper::vstore(output_ptr + x, a); + } + return x; +} + +template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> inline int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder) { @@ -995,6 +1023,15 @@ void elementwise_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITe } template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> +void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window, + &elementwise_comp_op_scalar<op, InputScalarType>, + &elementwise_comp_op_broadcast_8_loop<op, InputScalarType, InputVectorType>, + &elementwise_comp_op_8_loop<op, InputScalarType, InputVectorType>); +} + +template <ComparisonOperation op, typename InputScalarType, typename InputVectorType> void elementwise_comp_op_16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window, @@ -1101,6 +1138,7 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso { static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = { + { "op_U8_U8_U8", &elementwise_comp_op_8<op, uint8_t, uint8x16_t> }, { "op_F32_F32_U8", &elementwise_comp_op_32<op, float, float32x4_t> }, { "op_S16_S16_U8", &elementwise_comp_op_16<op, int16_t, int16x8_t> }, { "op_S32_S32_U8", &elementwise_comp_op_32<op, int32_t, int32x4_t> }, @@ -1122,7 +1160,6 @@ NEElementwiseOperationKernel::NEElementwiseOperationKernel() Status NEElementwiseOperationKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); @@ -1194,6 +1231,7 @@ void NEArithmeticOperationKernel::configure(ArithmeticOperation op, const ITenso Status NEArithmeticOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); // Validate in case of configured output if(output.total_size() > 0) { @@ -1285,6 +1323,7 @@ void NEComparisonOperationKernel::configure(ComparisonOperation op, const ITenso Status NEComparisonOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32); // Validate in case of configured output if(output.total_size() > 0) { diff --git a/tests/validation/NEON/Comparisons.cpp b/tests/validation/NEON/Comparisons.cpp index 8dc78d870c..b77bcdd4f0 100644 --- a/tests/validation/NEON/Comparisons.cpp +++ b/tests/validation/NEON/Comparisons.cpp @@ -43,15 +43,6 @@ namespace validation { namespace { -const auto configure_dataset = combine(datasets::SmallShapes(), - framework::dataset::make("DataType", { DataType::QASYMM8, - DataType::QASYMM8_SIGNED, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - DataType::F16, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - DataType::F32 - })); - const auto run_small_dataset = combine(datasets::ComparisonOperations(), datasets::SmallShapes()); const auto run_small_broadcast_dataset = combine(datasets::ComparisonOperations(), datasets::SmallShapesBroadcast()); const auto run_large_dataset = combine(datasets::ComparisonOperations(), datasets::LargeShapes()); @@ -94,6 +85,17 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( template <typename T> using NEComparisonFixture = ComparisonValidationFixture<Tensor, Accessor, NEElementwiseComparison, T>; +TEST_SUITE(Bool) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEComparisonFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(run_small_dataset, framework::dataset::make("DataType", DataType::U8))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) |