From d817647a4fabc8eccd0e64f54465e378a4239b32 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 4 Dec 2019 09:46:28 +0000 Subject: COMPMID-2767 [NEON] add support for QASYMM8_SIGNED to DequantizationLayer Change-Id: If5b21d1e656b21baf39346c2fd74e8edc75007f5 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/2429 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- arm_compute/core/NEON/NEAsymm.h | 24 ++++++++++++++++++++ .../NEON/kernels/NEDequantizationLayerKernel.cpp | 19 +++++++++------- tests/AssetsLibrary.h | 1 + tests/validation/NEON/DequantizationLayer.cpp | 26 +++++++++++++++++----- .../fixtures/DequantizationLayerFixture.h | 3 +++ tests/validation/reference/DequantizationLayer.cpp | 21 ++++++++++++----- 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index 234d48882c..67adcef9b1 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -451,6 +451,30 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs return vdequantized_input; } +/** Dequantize a vector of 16 values stored as signed asymmetric. + * + * @param[in] qv Input values to be dequantized. + * @param[in] scale Quantization scaling factor. + * @param[in] offset Zero quantization offset. + * + * @return Dequantized values in a neon vector + */ +inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale, int32_t offset) +{ + const int32x4_t voffset = vdupq_n_s32(offset); + const float32x4_t vscale = vdupq_n_f32(scale); + const float32x4x4_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), + } + }; + return vdequantized_input; +} + /** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values. * * @param[in] qv Input values to be dequantized. diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index f555df3828..947f257bcb 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -43,7 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -116,7 +116,7 @@ inline void store_result(float16_t *ptr, const float32x4x2_t &v) } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -template +template void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window) { const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); @@ -137,8 +137,8 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win execute_window_loop(win_collapsed, [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); int x = window_start_x; for(; x <= (window_end_x - window_step_x); x += window_step_x) @@ -146,14 +146,14 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win const auto vin = wrapper::vloadq(in_ptr + x); const auto vdeq = vdequantize(vin, scale, offset); - store_result(reinterpret_cast(out_ptr + x), vdeq); + store_result(reinterpret_cast(out_ptr + x), vdeq); } // Compute left-over elements for(; x < window_end_x; ++x) { - uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale, offset)); + auto val = *(in_ptr + x); + *(out_ptr + x) = static_cast(Qasymm8QuantizationHelper::dequantize(val, qinfo)); } }, in, out); @@ -340,7 +340,10 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window switch(input->info()->data_type()) { case DataType::QASYMM8: - run_dequantization_qasymm8(input, output, window); + run_dequantization_qasymm8(input, output, window); + break; + case DataType::QASYMM8_SIGNED: + run_dequantization_qasymm8(input, output, window); break; case DataType::QSYMM8_PER_CHANNEL: input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc(input, output, window) : run_dequantization_qsymm8_per_channel_nchw(input, output, window); diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index 29d9cc6d49..c4892748f4 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -846,6 +846,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t } case DataType::S8: case DataType::QSYMM8: + case DataType::QASYMM8_SIGNED: { ARM_COMPUTE_ERROR_ON(!(std::is_same::value)); std::uniform_int_distribution distribution_s8(low, high); diff --git a/tests/validation/NEON/DequantizationLayer.cpp b/tests/validation/NEON/DequantizationLayer.cpp index 0dce76a933..4389419d73 100644 --- a/tests/validation/NEON/DequantizationLayer.cpp +++ b/tests/validation/NEON/DequantizationLayer.cpp @@ -55,6 +55,14 @@ const auto dataset_quant_f32 = combine(combine(combine(datasets::SmallShapes(), const auto dataset_quant_f16 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })); +const auto dataset_quant_asymm_signed_f32 = combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("QuantizedTypes", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })); +const auto dataset_quant_asymm_signed_f16 = combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("QuantizedTypes", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })); const auto dataset_quant_per_channel_f32 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedPerChannelTypes()), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })); @@ -73,6 +81,12 @@ const auto dataset_quant_per_channel_nightly_f32 = combine(combine(combine(datas const auto dataset_quant_per_channel_nightly_f16 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedPerChannelTypes()), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })); + +const auto dataset_precommit_f16 = concat(concat(dataset_quant_f16, dataset_quant_per_channel_f16), dataset_quant_asymm_signed_f16); +const auto dataset_precommit_f32 = concat(concat(dataset_quant_f32, dataset_quant_per_channel_f32), dataset_quant_asymm_signed_f32); +const auto dataset_nightly_f16 = concat(dataset_quant_f16, dataset_quant_per_channel_f16); +const auto dataset_nightly_f32 = concat(dataset_quant_f32, dataset_quant_per_channel_f32); + } // namespace TEST_SUITE(NEON) @@ -86,14 +100,16 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::QASYMM8), // Missmatching shapes TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Valid TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Valid + TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8_SIGNED), // Valid }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8), TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32), TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), })), - framework::dataset::make("Expected", { false, false, false, true, true})), + framework::dataset::make("Expected", { false, false, false, true, true, true })), input_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(NEDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -132,12 +148,12 @@ using NEDequantizationLayerFixture = DequantizationValidationFixture, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f16, dataset_quant_per_channel_f16)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, dataset_precommit_f16) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f16, dataset_quant_per_channel_nightly_f16)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, dataset_nightly_f16) { // Validate output validate(Accessor(_target), _reference); @@ -146,12 +162,12 @@ TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f32, dataset_quant_per_channel_f32)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, dataset_precommit_f32) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f32, dataset_quant_per_channel_nightly_f32)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, dataset_nightly_f32) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h index f44f8658c2..3699613a39 100644 --- a/tests/validation/fixtures/DequantizationLayerFixture.h +++ b/tests/validation/fixtures/DequantizationLayerFixture.h @@ -106,6 +106,7 @@ protected: fill(src); return reference::dequantization_layer(src); } + case DataType::QASYMM8_SIGNED: case DataType::QSYMM8_PER_CHANNEL: case DataType::QSYMM8: { @@ -149,6 +150,8 @@ protected: } case DataType::QASYMM8: return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen)); + case DataType::QASYMM8_SIGNED: + return QuantizationInfo(1.f / distribution_scale_q8(gen), -distribution_offset_q8(gen)); default: ARM_COMPUTE_ERROR("Unsupported data type"); } diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 16f25c4427..7dd36402b3 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -36,18 +36,27 @@ namespace reference namespace { template -TOut dequantize(int8_t val, const UniformQuantizationInfo qinfo) +TOut dequantize(int8_t val, const UniformQuantizationInfo qinfo, DataType dt) { - return static_cast(dequantize_qsymm8(val, qinfo)); + if(dt == DataType::QSYMM8 || dt == DataType::QSYMM8_PER_CHANNEL) + { + return static_cast(dequantize_qsymm8(val, qinfo)); + } + else + { + return static_cast(dequantize_qasymm8_signed(val, qinfo)); + } } template -TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo) +TOut dequantize(uint8_t val, const UniformQuantizationInfo qinfo, DataType dt) { + ARM_COMPUTE_UNUSED(dt); return static_cast(dequantize_qasymm8(val, qinfo)); } template -TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo) +TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt) { + ARM_COMPUTE_UNUSED(dt); return static_cast(dequantize_qsymm16(val, qinfo)); } } // namespace @@ -77,7 +86,7 @@ SimpleTensor dequantization_layer(const SimpleTensor &src) // Dequantize slice for(int s = 0; s < WH; ++s) { - dst[idx + s] = dequantize(static_cast(src[idx + s]), channel_qinfo); + dst[idx + s] = dequantize(static_cast(src[idx + s]), channel_qinfo, src_data_type); } } } @@ -89,7 +98,7 @@ SimpleTensor dequantization_layer(const SimpleTensor &src) for(int i = 0; i < src.num_elements(); ++i) { - dst[i] = static_cast(dequantize(static_cast(src[i]), quantization_info)); + dst[i] = static_cast(dequantize(static_cast(src[i]), quantization_info, src_data_type)); } } -- cgit v1.2.1