From 4370cffc7fb0da7fb486b9d06d24e16169521876 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Fri, 7 Feb 2020 16:31:59 +0000 Subject: COMPMID-3034: Add NERequantizationLayerKernel Change-Id: I3f098c3c2c2031d8cbe7326eab88a4e78bda867f Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2704 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park --- arm_compute/core/NEON/NEMath.h | 11 ++- arm_compute/core/NEON/NEMath.inl | 14 +++- .../core/NEON/kernels/NEQuantizationLayerKernel.h | 12 ++-- arm_compute/core/QuantizationInfo.h | 44 ++++++++++++ .../runtime/NEON/functions/NEQuantizationLayer.h | 10 +-- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 78 ++++++++++++++-------- tests/validation/NEON/QuantizationLayer.cpp | 72 ++++++++++++++++++-- 7 files changed, 194 insertions(+), 47 deletions(-) diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index 54f8252250..3905f67e29 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -173,6 +173,15 @@ float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in); */ float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in); +/** Converts to float32x4x4_t from the specified templated 16 elements vectors + * + * @param[in] in Vector of float to be converted + * + * @return Converted vector of float + */ +template +float32x4x4_t convert_to_float32x4x4(const T &in); + /** Converts from two float32x4x3_t to just one uint8x8x3_t * * @param[in] in1 First input vector of float to be converted diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 5d8b82c281..49870d06a8 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -345,6 +345,18 @@ inline float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in) return out; } +template <> +inline float32x4x4_t convert_to_float32x4x4(const uint8x16_t &in) +{ + return convert_uint8x16_to_float32x4x4(in); +} + +template <> +inline float32x4x4_t convert_to_float32x4x4(const int8x16_t &in) +{ + return convert_int8x16_to_float32x4x4(in); +} + inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) { out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index 1a9b533640..087e767b73 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,16 +56,16 @@ public: ~NEQuantizationLayerKernel() = default; /** Set the input, output. * - * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16. + * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. * * @note Output auto initialization is not supported by this kernel */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. * * @return a status */ @@ -80,7 +80,7 @@ private: * @param[in] window Region on which to execute the kernel. */ using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); - /** Function to apply QASYMM8 quantization on a tensor. + /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor. * * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 06ba665c6b..f859beb87a 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -516,5 +516,49 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) { return dequantize_qasymm16(value, qinfo.uniform()); } + +/* + * In case of requantization of a quantized input tensor to an output tensor with another quantization + * instead of applying dequantization and then a quantization functions, we just compute new scale and + * offset. + * + * Assuming: + * - q_i as input quantized value + * - q_o as output quantized value + * - z_i as input quantization offset value + * - z_o as output quantization offset value + * - s_i as input quantization scale value + * - s_o as output quantization scale value + * - z_n as new quantization offset value + * - s_n as new quantization scale value + * + * q_o = ( q_i - z_i ) * s_i / s_o + z_o + * + * We can rewrite the formula as: + * + * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o + * + * q_o = q_i / s_n + z_n + * + * Where: + * + * s_n = s_o / s_i + * + * z_n = - z_i * s_i / s_o + z_o + * + */ +inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out) +{ + float scale_to_apply = uqinfo_out.scale; + int32_t offset_to_apply = uqinfo_out.offset; + + scale_to_apply /= uqinfo_in.scale; + // In order to minimize flooring we convert the offset to a float, + // then compute the new offset in the float domain, + // finally we convert it back as int32_t + offset_to_apply -= static_cast(static_cast(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale); + return UniformQuantizationInfo(scale_to_apply, offset_to_apply); +} + } // namespace arm_compute #endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */ diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index 1cf83e87ad..fc317be81e 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,14 +48,14 @@ public: NEQuantizationLayer() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16 + * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer * - * @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16 + * @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 * * @return a status */ diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 2beb730448..113abad6b6 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/NEON/NEAsymm.h" +#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -46,7 +47,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); @@ -54,6 +55,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) return Status{}; } +template +inline float32x4x4_t load_value(const T *input_ptr) +{ + using Tx16_t = typename wrapper::traits::neon_vector::type; + return arm_compute::convert_to_float32x4x4(wrapper::vloadq(input_ptr)); +} + +template <> inline float32x4x4_t load_value(const float *input_ptr) { return { wrapper::vloadq(input_ptr), @@ -62,7 +71,8 @@ inline float32x4x4_t load_value(const float *input_ptr) wrapper::vloadq(input_ptr + 12) }; } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline const float32x4x4_t load_value(const float16_t *input_ptr) +template <> +inline float32x4x4_t load_value(const float16_t *input_ptr) { return { vcvt_f32_f16(wrapper::vload(input_ptr)), vcvt_f32_f16(wrapper::vload(input_ptr + 4)), @@ -105,34 +115,38 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output) _input = input; _output = output; - static std::map quant_map_f32 = + static const std::map quant_map = { - { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, - { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, - { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, - }; + { "op_QASYMM8_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_QASYMM8_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_QASYMM8_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16 }, + + { "op_QASYMM8_SIGNED_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_QASYMM8_SIGNED_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_QASYMM8_SIGNED_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16 }, + + { "op_F32_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_F32_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_F32_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16 }, + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - static std::map quant_map_f16 = - { - { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, - { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, - { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, - }; + { "op_F16_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_F16_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { "op_F16_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16 }, #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ + }; + + std::string function_to_call("op_"); + function_to_call += string_from_data_type(_input->info()->data_type()) + "_"; + function_to_call += string_from_data_type(_output->info()->data_type()); + + auto it = quant_map.find(function_to_call); - switch(input->info()->data_type()) + if(it == quant_map.end()) { - case DataType::F32: - _func = quant_map_f32[output->info()->data_type()]; - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - _func = quant_map_f16[output->info()->data_type()]; - break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - ARM_COMPUTE_ERROR("Unsupported input data type."); + ARM_COMPUTE_ERROR("Unsupported combination of input and output data types"); } + _func = it->second; // Configure kernel window Window win_config = calculate_max_window(*input->info(), Steps()); @@ -156,7 +170,12 @@ void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window) const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); + const UniformQuantizationInfo uqinfo_in = _input->info()->quantization_info().uniform(); + UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); + if(is_data_type_quantized_asymmetric(_input->info()->data_type())) + { + uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); + } #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -194,7 +213,12 @@ void NEQuantizationLayerKernel::run_quantize_qasymm16(const Window &window) const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); + const UniformQuantizationInfo uqinfo_in = _input->info()->quantization_info().uniform(); + UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); + if(is_data_type_quantized_asymmetric(_input->info()->data_type())) + { + uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); + } #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp index a4af2a2886..a5372b897c 100644 --- a/tests/validation/NEON/QuantizationLayer.cpp +++ b/tests/validation/NEON/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,11 +43,11 @@ namespace validation namespace { /** Tolerance for quantization */ -constexpr AbsoluteTolerance tolerance_u8(1); -constexpr AbsoluteTolerance tolerance_u16(1); - -const auto QuantizationSmallShapes = concat(datasets::Small3DShapes(), datasets::Small4DShapes()); -const auto QuantizationLargeShapes = concat(datasets::Large3DShapes(), datasets::Large4DShapes()); +constexpr AbsoluteTolerance tolerance_u8(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8 data types */ +constexpr AbsoluteTolerance tolerance_s8(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8_SIGNED data types */ +constexpr AbsoluteTolerance tolerance_u16(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM16 data types */ +const auto QuantizationSmallShapes = concat(datasets::Small3DShapes(), datasets::Small4DShapes()); +const auto QuantizationLargeShapes = concat(datasets::Large3DShapes(), datasets::Large4DShapes()); } // namespace TEST_SUITE(NEON) @@ -56,7 +56,7 @@ TEST_SUITE(QuantizationLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Wrong input data type + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Wrong output data type TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Wrong output data type TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32), // Missmatching shapes TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Valid @@ -193,6 +193,64 @@ TEST_SUITE_END() // FP16 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +template +using NEQuantizationLayerQASYMM8GenFixture = QuantizationValidationGenericFixture; +template +using NEQuantizationLayerQASYMM8_SIGNEDGenFixture = QuantizationValidationGenericFixture; +template +using NEQuantizationLayerQASYMM16GenFixture = QuantizationValidationGenericFixture; +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); +} +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNEDGenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10), QuantizationInfo(2.0f, -25) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_s8); +} +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16GenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(4.0f, 23) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u16); +} +TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNEDGenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_s8); +} +FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, 10), QuantizationInfo(2.0f, -25) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // QuantizationLayer TEST_SUITE_END() // NEON } // namespace validation -- cgit v1.2.1