From fb6aaeb3b02ad3210d57c1d945998f93c11de474 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 27 Nov 2019 15:26:44 +0000 Subject: COMPMID-2773 [NE] add support for QASYMM8_SIGNED to QuantizationLayer Change-Id: Ib692a79228fd85ee600c212d77439ca38d71f332 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/2377 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- arm_compute/core/NEON/NEAsymm.h | 2 - .../core/NEON/kernels/NEQuantizationLayerKernel.h | 2 +- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 49 +++++++++++++++------- tests/validation/NEON/QuantizationLayer.cpp | 18 ++++++++ tests/validation/reference/QuantizationLayer.cpp | 27 +++++++----- 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index c09a7d9028..e4f4250d16 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -695,7 +695,6 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn * * @return A neon vector holding the quantized values */ - inline int8x16_t vquantize_signed(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) { const float scale = qi.scale; @@ -716,7 +715,6 @@ inline int8x16_t vquantize_signed(const float32x4x4_t &qv, const UniformQuantiza vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), #endif //__aarch64__ - } }; const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index 3a50c07ded..1a9b533640 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -84,7 +84,7 @@ private: * * @param[in] window Region on which to execute the kernel. */ - template + template void run_quantize_qasymm8(const Window &window); /** Function to apply QASYMM16 quantization on a tensor. * diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 6a9c4ae14c..2beb730448 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -40,13 +40,15 @@ namespace arm_compute { namespace { +constexpr auto window_step = 16; + Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); return Status{}; @@ -69,6 +71,25 @@ inline const float32x4x4_t load_value(const float16_t *input_ptr) } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +template +using vector_type = wrapper::traits::neon_vector_t; + +template +vector_type vquantize_qasymm8(const float32x4x4_t &qv, const UniformQuantizationInfo &qi); + +template <> +vector_type vquantize_qasymm8(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) +{ + return vquantize(qv, qi); +} + +template <> +vector_type vquantize_qasymm8(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) +{ + return vquantize_signed(qv, qi); +} + } // namespace NEQuantizationLayerKernel::NEQuantizationLayerKernel() @@ -86,13 +107,15 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output) static std::map quant_map_f32 = { - { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC static std::map quant_map_f16 = { - { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, }; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ @@ -127,12 +150,11 @@ Status NEQuantizationLayerKernel::validate(const ITensorInfo *input, const ITens return Status{}; } -template +template void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window) { - constexpr auto window_step = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); #ifdef __aarch64__ @@ -149,18 +171,18 @@ void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window) Iterator output(_output, win_collapsed); execute_window_loop(win_collapsed, [&](const Coordinates &) { - auto input_ptr = reinterpret_cast(input.ptr()); - auto output_ptr = reinterpret_cast(output.ptr()); + auto input_ptr = reinterpret_cast(input.ptr()); + auto output_ptr = reinterpret_cast(output.ptr()); int x = window_start_x; for(; x <= (window_end_x - window_step); x += window_step) { - wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo)); + wrapper::vstore(&output_ptr[x], vquantize_qasymm8(load_value(&input_ptr[x]), uqinfo)); } // Compute left-over elements for(; x < window_end_x; ++x) { - output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy); + output_ptr[x] = Qasymm8QuantizationHelper::quantize(input_ptr[x], uqinfo, rounding_policy); } }, input, output); @@ -169,9 +191,8 @@ void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window) template void NEQuantizationLayerKernel::run_quantize_qasymm16(const Window &window) { - constexpr auto window_step = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); #ifdef __aarch64__ diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp index 49118f7dc5..a4af2a2886 100644 --- a/tests/validation/NEON/QuantizationLayer.cpp +++ b/tests/validation/NEON/QuantizationLayer.cpp @@ -100,6 +100,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(QuantizationS template using NEQuantizationLayerQASYMM8Fixture = QuantizationValidationFixture; template +using NEQuantizationLayerQASYMM8SignedFixture = QuantizationValidationFixture; +template using NEQuantizationLayerQASYMM16Fixture = QuantizationValidationFixture; TEST_SUITE(Float) @@ -112,6 +114,14 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture // Validate output validate(Accessor(_target), _reference, tolerance_u8); } +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8Signed, NEQuantizationLayerQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); +} FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })), @@ -147,6 +157,14 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture, // Validate output validate(Accessor(_target), _reference, tolerance_u8); } +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8Signed, NEQuantizationLayerQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); +} FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })), diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index ae23f7ec27..35d44ffa49 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -40,26 +40,31 @@ SimpleTensor quantization_layer(const SimpleTensor &src, DataType out SimpleTensor dst{ src.shape(), output_data_type, 1, quantization_info }; const UniformQuantizationInfo qinfo = quantization_info.uniform(); + +#ifdef __aarch64__ + constexpr auto rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; +#else // __aarch64__ + constexpr auto rounding_policy = RoundingPolicy::TO_ZERO; +#endif // __aarch64__ + switch(output_data_type) { case DataType::QASYMM8: for(int i = 0; i < src.num_elements(); ++i) { -#ifdef __aarch64__ - dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_NEAREST_EVEN); -#else // __aarch64__ - dst[i] = quantize_qasymm8((src[i]), qinfo, RoundingPolicy::TO_ZERO); -#endif // __aarch64__ + dst[i] = quantize_qasymm8((src[i]), qinfo, rounding_policy); + } + break; + case DataType::QASYMM8_SIGNED: + for(int i = 0; i < src.num_elements(); ++i) + { + dst[i] = quantize_qasymm8_signed((src[i]), qinfo, rounding_policy); } break; case DataType::QASYMM16: for(int i = 0; i < src.num_elements(); ++i) { -#ifdef __aarch64__ - dst[i] = quantize_qasymm16((src[i]), qinfo, RoundingPolicy::TO_NEAREST_EVEN); -#else // __aarch64__ - dst[i] = quantize_qasymm16((src[i]), qinfo, RoundingPolicy::TO_ZERO); -#endif // __aarch64__ + dst[i] = quantize_qasymm16((src[i]), qinfo, rounding_policy); } break; default: @@ -72,6 +77,8 @@ template SimpleTensor quantization_layer(const SimpleTensor &src, template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); +template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); +template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1