From 74a16960f3cf1b8a93a70c349a8fc05fe45d4b97 Mon Sep 17 00:00:00 2001 From: morgolock Date: Wed, 15 Jan 2020 11:40:49 +0000 Subject: COMPMID-2994: Add support QASYMM8_SIGNED in NEComparisonOperationKernel Change-Id: I56be8c6a18b6d73d7c903fb9d64d205e125358f2 Signed-off-by: morgolock Reviewed-on: https://review.mlplatform.org/c/2589 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- .../NEON/kernels/NEElementwiseOperationKernel.h | 6 +- .../NEON/functions/NEElementwiseOperations.h | 12 +-- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 85 ++++++++++++++++++++++ tests/validation/NEON/Comparisons.cpp | 15 +++- tests/validation/reference/Comparisons.cpp | 25 ++++++- 5 files changed, 132 insertions(+), 11 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h index 43e4966484..61c25e1a2a 100644 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -189,7 +189,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. * @param[in] output Output tensor. Data types supported: U16/U32. */ @@ -198,7 +198,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U16/U32. * diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index eec9bb9fa9..e5af6bc841 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -164,7 +164,7 @@ public: /** Basic function to run @ref NEComparisonOperationKernel. * - * @note The tensor data type for the inputs must be QASYMM8/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ class NEElementwiseComparison : public INESimpleFunction @@ -172,7 +172,7 @@ class NEElementwiseComparison : public INESimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U16/U32. * @param[in] op Comparison Operation to be performed. @@ -180,7 +180,7 @@ public: void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op); /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U16/U32. * @param[in] op Comparison Operation to be performed. @@ -192,7 +192,7 @@ public: /** Basic function to run @ref NEComparisonOperationKernel * - * @note The tensor data type for the inputs must be QASYMM8/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ template @@ -201,14 +201,14 @@ class NEElementwiseComparisonStatic : public INESimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U16/U32. */ void configure(ITensor *input1, ITensor *input2, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U16/U32. * diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index b8e6f0cc69..444ee8e0d3 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -534,6 +534,24 @@ inline int elementwise_comp_op_quantized_loop(int window_start_x, int window_end return x; } +template +inline int elementwise_comp_op_quantized_signed_loop(int window_start_x, int window_end_x, int window_step_x, + const int8_t *input1_ptr, const int8_t *input2_ptr, uint8_t *output_ptr, + int32x4_t voffset1, int32x4_t voffset2, float32x4_t vscale1, float32x4_t vscale2, + float32x4_t voffseto, float32x4_t invvscaleo) +{ + ARM_COMPUTE_UNUSED(voffseto, invvscaleo); + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const float32x4x4_t af = load_quantized_signed(input1_ptr + x, voffset1, vscale1); + const float32x4x4_t bf = load_quantized_signed(input2_ptr + x, voffset2, vscale2); + const uint32x4x4_t rf = elementwise_comp_op(af, bf); + store_quantized(output_ptr + x, rf); + } + return x; +} + template inline int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder) @@ -772,6 +790,66 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o } } +void elementwise_comp_quantized_signed(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, + uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), + int (*neon_func)(int, int, int, const int8_t *, const int8_t *, uint8_t *, + int32x4_t, int32x4_t, float32x4_t, float32x4_t, + float32x4_t, float32x4_t)) +{ + // Create input windows + Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()); + Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()); + + // Clear X Dimension on execution window as we handle manually + Window win = window; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); + + const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale); + { + const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform(); + + // Input1 quantization info + const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale); + + // Input2 quantization info + const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale); + + // Clear X Dimension on execution window as we handle manually + input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); + input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input1(in1, input1_win); + Iterator input2(in2, input2_win); + Iterator output(out, win); + + execute_window_loop(win, [&](const Coordinates &) + { + const auto input1_ptr = reinterpret_cast(input1.ptr()); + const auto input2_ptr = reinterpret_cast(input2.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr, output_ptr, voffset1, voffset2, + vscale1, vscale2, voffseto, invvscaleo); + for(; x < window_end_x; ++x) + { + const float afs = dequantize_qasymm8_signed(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8_signed(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo); + } + }, + input1, input2, output); + } +} + void elementwise_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), int (*broadcast_func)(int, int, int, const int8_t *, float32x4x4_t, int8_t *, int32x4_t, float32x4_t, @@ -931,6 +1009,12 @@ void elementwise_comp_op_quantized(const ITensor *in1, const ITensor *in2, ITens &elementwise_comp_op_quantized_loop); } +template +void elementwise_comp_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + elementwise_comp_quantized_signed(in1, in2, out, window, &elementwise_comp_op_quantized_scalar, &elementwise_comp_op_quantized_signed_loop); +} + std::function configure_func(const ITensor *input1, const ITensor *input2, ITensor *output, std::map map_function) @@ -981,6 +1065,7 @@ configure_comp_func(const ITensor *input1, const ITensor *input2, ITensor *outpu { "op_F32_F32_U8", &elementwise_comp_op_32 }, { "op_S16_S16_U8", &elementwise_comp_op_16 }, { "op_S32_S32_U8", &elementwise_comp_op_32 }, + { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_U8", &elementwise_comp_op_quantized_signed }, { "op_QASYMM8_QASYMM8_U8", &elementwise_comp_op_quantized } }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/tests/validation/NEON/Comparisons.cpp b/tests/validation/NEON/Comparisons.cpp index c3b1c69523..38e440e649 100644 --- a/tests/validation/NEON/Comparisons.cpp +++ b/tests/validation/NEON/Comparisons.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,7 @@ namespace { const auto configure_dataset = combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED, #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -167,6 +168,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(Accessor(_target), _reference); } TEST_SUITE_END() +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEComparisonQuantizedFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo() })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.3f, 2) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE_END() // Comparison diff --git a/tests/validation/reference/Comparisons.cpp b/tests/validation/reference/Comparisons.cpp index 6d08daf89d..c0c86b1933 100644 --- a/tests/validation/reference/Comparisons.cpp +++ b/tests/validation/reference/Comparisons.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -139,6 +139,29 @@ SimpleTensor compare(ComparisonOperation op, const SimpleTensor +SimpleTensor compare(ComparisonOperation op, const SimpleTensor &src1, const SimpleTensor &src2) +{ + SimpleTensor dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8); + + Coordinates id_src1{}; + Coordinates id_src2{}; + Coordinates id_dst{}; + + if(src1.data_type() == DataType::QASYMM8_SIGNED) + { + SimpleTensor src1_tmp = convert_from_asymmetric(src1); + SimpleTensor src2_tmp = convert_from_asymmetric(src2); + BroadcastUnroll::unroll(op, src1_tmp, src2_tmp, dst, id_src1, id_src2, id_dst); + } + else + { + // DataType::U8 + BroadcastUnroll::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst); + } + return dst; +} + template SimpleTensor compare(ComparisonOperation op, const SimpleTensor &src1, const SimpleTensor &src2); template SimpleTensor compare(ComparisonOperation op, const SimpleTensor &src1, const SimpleTensor &src2); -- cgit v1.2.1