From 29a01c90fc372d31188ab7157b45b32ce24fa9b3 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 22 Aug 2019 11:44:04 +0100 Subject: COMPMID-2417: NEDequantizationLayer support for QASYMM8_PER_CHANNEL Change-Id: I1ef4ce8610e11e81702b0b7f0f7c437fed49833e Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/1795 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEDequantizationLayerKernel.h | 4 +- arm_compute/core/QuantizationInfo.h | 11 +++++ .../runtime/NEON/functions/NEDequantizationLayer.h | 4 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 47 +++++++++++++++++- tests/AssetsLibrary.h | 1 + tests/datasets/DatatypeDataset.h | 11 +++++ tests/validation/NEON/DequantizationLayer.cpp | 6 ++- .../fixtures/DequantizationLayerFixture.h | 57 +++++++++++++--------- tests/validation/reference/DequantizationLayer.cpp | 8 +-- 9 files changed, 117 insertions(+), 32 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index f0a2a57d1a..3e7feda650 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -52,13 +52,13 @@ public: ~NEDequantizationLayerKernel() = default; /** Set input, output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data types supported: F16/F32. * * @return a status diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 79afca0714..1517d48381 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -103,6 +103,17 @@ public: : _scale(scale), _offset() { } + /** Construct quantization info. + * + * @note Used for asymmetric per channel quantization + * + * @param[in] scale Scale. + * @param[in] offset Offset. + */ + QuantizationInfo(std::vector scale, std::vector offset) + : _scale(scale), _offset(offset) + { + } /** Scale vector accessor * * @return A reference to quantization scale metadata diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index c08366e5a7..88c8777a68 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -39,13 +39,13 @@ class NEDequantizationLayer : public INESimpleFunctionNoBorder public: /** Configure the kernel. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index e52f53ea04..d880c80d82 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -43,7 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { @@ -159,6 +159,48 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win in, out); } +template +void run_dequantization_qasymm8_per_channel(const ITensor *input, ITensor *output, const Window &window) +{ + const std::vector scale = input->info()->quantization_info().scale(); + const std::vector offset = input->info()->quantization_info().offset(); + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Reset first dimension to handle tail calculations manually + Window win(window); + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win); + Iterator out(output, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + uint8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()], offset[id.z()])); + } + }, + in, out); +} + template void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window) { @@ -251,6 +293,9 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window case DataType::QASYMM8: run_dequantization_qasymm8(input, output, window); break; + case DataType::QASYMM8_PER_CHANNEL: + run_dequantization_qasymm8_per_channel(input, output, window); + break; case DataType::QSYMM8: run_dequantization_qsymm8(input, output, window); break; diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index 2f2665f381..2ac13468de 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -628,6 +628,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t { case DataType::U8: case DataType::QASYMM8: + case DataType::QASYMM8_PER_CHANNEL: { std::uniform_int_distribution distribution_u8(std::numeric_limits::lowest(), std::numeric_limits::max()); fill(tensor, distribution_u8, seed_offset); diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h index a158a5f52d..9bdb346340 100644 --- a/tests/datasets/DatatypeDataset.h +++ b/tests/datasets/DatatypeDataset.h @@ -48,6 +48,17 @@ public: { } }; +class QuantizedPerChannelTypes final : public framework::dataset::ContainerDataset> +{ +public: + QuantizedPerChannelTypes() + : ContainerDataset("QuantizedPerChannelTypes", + { + DataType::QASYMM8_PER_CHANNEL + }) + { + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/DequantizationLayer.cpp b/tests/validation/NEON/DequantizationLayer.cpp index a4606fe8a0..005ed6900c 100644 --- a/tests/validation/NEON/DequantizationLayer.cpp +++ b/tests/validation/NEON/DequantizationLayer.cpp @@ -123,13 +123,15 @@ TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), concat(datasets::QuantizedTypes(), + datasets::QuantizedPerChannelTypes())), framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::QuantizedTypes()), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), concat(datasets::QuantizedTypes(), + datasets::QuantizedPerChannelTypes())), framework::dataset::make("DataType", DataType::F32))) { // Validate output diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h index 2c8f05746d..4842ee1c59 100644 --- a/tests/validation/fixtures/DequantizationLayerFixture.h +++ b/tests/validation/fixtures/DequantizationLayerFixture.h @@ -49,7 +49,7 @@ public: template void setup(TensorShape shape, DataType src_data_type, DataType dst_datatype) { - _quantization_info = generate_quantization_info(src_data_type); + _quantization_info = generate_quantization_info(src_data_type, shape.z()); _target = compute_target(shape, src_data_type, dst_datatype); _reference = compute_reference(shape, src_data_type); } @@ -92,32 +92,34 @@ protected: SimpleTensor compute_reference(const TensorShape &shape, DataType src_data_type) { - if(src_data_type == DataType::QASYMM8) + switch(src_data_type) { - SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; - fill(src); - return reference::dequantization_layer(src); - } - else if(src_data_type == DataType::QSYMM8) - { - SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; - fill(src); - return reference::dequantization_layer(src); - } - else if(src_data_type == DataType::QSYMM16) - { - SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; - fill(src); - return reference::dequantization_layer(src); - } - else - { - ARM_COMPUTE_ERROR("Unsupported data type"); + case DataType::QASYMM8: + case DataType::QASYMM8_PER_CHANNEL: + { + SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; + fill(src); + return reference::dequantization_layer(src); + } + case DataType::QSYMM8: + { + SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; + fill(src); + return reference::dequantization_layer(src); + } + case DataType::QSYMM16: + { + SimpleTensor src{ shape, src_data_type, 1, _quantization_info }; + fill(src); + return reference::dequantization_layer(src); + } + default: + ARM_COMPUTE_ERROR("Unsupported data type"); } } protected: - QuantizationInfo generate_quantization_info(DataType data_type) + QuantizationInfo generate_quantization_info(DataType data_type, int32_t num_channels) { std::mt19937 gen(library.get()->seed()); std::uniform_int_distribution<> distribution_scale_q8(1, 255); @@ -130,6 +132,17 @@ protected: return QuantizationInfo(1.f / distribution_scale_q16(gen)); case DataType::QSYMM8: return QuantizationInfo(1.f / distribution_scale_q8(gen)); + case DataType::QASYMM8_PER_CHANNEL: + { + std::vector scale(num_channels); + std::vector offset(num_channels); + for(int32_t i = 0; i < num_channels; ++i) + { + scale[i] = 1.f / distribution_scale_q8(gen); + offset[i] = distribution_offset_q8(gen); + } + return QuantizationInfo(scale, offset); + } case DataType::QASYMM8: return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen)); default: diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index cceee0421c..74686bdaaf 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -59,20 +59,22 @@ SimpleTensor dequantization_layer_nchw(const SimpleTensor &src) SimpleTensor dst{ src.shape(), dst_data_type }; - if(src_data_type == DataType::QSYMM8_PER_CHANNEL) + if(is_data_type_quantized_per_channel(src_data_type)) { const int WH = src.shape().x() * src.shape().y(); const int C = src.shape().z(); const int N = src.shape().total_size() / (WH * C); - const std::vector qscales = src.quantization_info().scale(); + const std::vector qscales = src.quantization_info().scale(); + const std::vector qoffsets = src.quantization_info().offset(); + const bool has_offsets = src_data_type == DataType::QASYMM8_PER_CHANNEL; for(int n = 0; n < N; ++n) { for(int c = 0; c < C; ++c) { const size_t idx = n * C * WH + c * WH; - const UniformQuantizationInfo channel_qinfo = { qscales[c], 0 }; + const UniformQuantizationInfo channel_qinfo = { qscales[c], has_offsets ? qoffsets[c] : 0 }; // Dequantize slice for(int s = 0; s < WH; ++s) -- cgit v1.2.1