From 0a48c4c83b598991b4d4235f870c24d9e6634b20 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Tue, 30 Jan 2024 18:25:51 +0000 Subject: Requantization cases for offset changes only Resolves: [COMPMID-6681] Signed-off-by: Mohammed Suhail Munshi Change-Id: I325b9d478dd1d04a45533bb7708cf76e98ee0cee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11058 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/utils/DataTypeUtils.h | 26 +++- src/cpu/kernels/CpuQuantizeKernel.cpp | 170 +++++++++++++++++++++++++-- src/cpu/kernels/CpuQuantizeKernel.h | 25 +++- src/cpu/operators/CpuQuantize.cpp | 5 +- tests/validation/NEON/LSTMLayerQuantized.cpp | 6 +- tests/validation/NEON/QuantizationLayer.cpp | 30 ++++- 6 files changed, 234 insertions(+), 28 deletions(-) diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h index 7ea5eb7670..6fabb19b64 100644 --- a/arm_compute/core/utils/DataTypeUtils.h +++ b/arm_compute/core/utils/DataTypeUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2023 Arm Limited. + * Copyright (c) 2016-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H -#define ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H +#define ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" @@ -373,6 +373,24 @@ inline bool is_data_type_quantized_asymmetric_signed(DataType dt) } } +/** Check if a given data type is of 8-bit asymmetric quantized signed type + * + * @param[in] dt Input data type. + * + * @return True if data type is of 8-bit asymmetric quantized signed type, else false. + */ +inline bool is_data_type_quantized_asymmetric_char(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8_SIGNED: + case DataType::QASYMM8: + return true; + default: + return false; + } +} + /** Check if a given data type is of symmetric quantized type * * @param[in] dt Input data type. @@ -528,4 +546,4 @@ inline std::string cpu_impl_dt(const DataType &data_type) } } // namespace arm_compute -#endif /*ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H */ +#endif // ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H diff --git a/src/cpu/kernels/CpuQuantizeKernel.cpp b/src/cpu/kernels/CpuQuantizeKernel.cpp index 5dde680837..d2ac6cf8ac 100644 --- a/src/cpu/kernels/CpuQuantizeKernel.cpp +++ b/src/cpu/kernels/CpuQuantizeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -104,6 +104,18 @@ vector_type vquantize_qasymm8(const float32x4x4_t &qv, const Uni return vquantize_signed(qv, qi); } +template ::value, bool>::type> +inline int8x16_t recombine_8_16(int16x8_t lower, int16x8_t upper) +{ + return wrapper::vcombine(wrapper::vqmovn(lower), wrapper::vqmovn(upper)); +} + +template ::value, bool>::type> +inline uint8x16_t recombine_8_16(int16x8_t lower, int16x8_t upper) +{ + return wrapper::vcombine(wrapper::vqmovun(lower), wrapper::vqmovun(upper)); +} + } // namespace void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) @@ -120,6 +132,19 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) {"op_QASYMM8_SIGNED_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8}, {"op_QASYMM8_SIGNED_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16}, + // Functions for offset only requantization + {"op_OFFSET_ONLY_QASYMM8_QASYMM8", &CpuQuantizeKernel::run_requantize_offset_only}, + {"op_OFFSET_ONLY_QASYMM8_QASYMM8_SIGNED", &CpuQuantizeKernel::run_requantize_offset_only}, + {"op_OFFSET_ONLY_QASYMM8_SIGNED_QASYMM8", &CpuQuantizeKernel::run_requantize_offset_only}, + {"op_OFFSET_ONLY_QASYMM8_SIGNED_QASYMM8_SIGNED", + &CpuQuantizeKernel::run_requantize_offset_only}, + + // Functions for offset uint8 to int8 and vice versa quantization (no scale changes) + {"op_OFFSET_ONLY_CONVERT_QASYMM8_SIGNED_QASYMM8", + &CpuQuantizeKernel::run_requantize_offset_only_convert}, + {"op_OFFSET_ONLY_CONVERT_QASYMM8_QASYMM8_SIGNED", + &CpuQuantizeKernel::run_requantize_offset_only_convert}, + {"op_F32_QSYMM8", &CpuQuantizeKernel::run_quantize_qsymm8}, {"op_F32_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8}, @@ -134,6 +159,26 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) }; std::string function_to_call("op_"); + + // For offset only functions - must be 8-bit and have identical scale values. + if (src->quantization_info().scale() == dst->quantization_info().scale() && + (is_data_type_quantized_asymmetric_char(src->data_type()) && + is_data_type_quantized_asymmetric_char(dst->data_type()))) + { + function_to_call += "OFFSET_ONLY_"; + // For optimized datatype conversion 8-bit re-quantization offset only functions. + // These must have an offset of exactly 128 to match requirements - has specific circumstances to match use case. + auto uqinfo = + compute_requantization_scale_offset(src->quantization_info().uniform(), dst->quantization_info().uniform()); + const auto src_dt = src->data_type(); + if (src->data_type() != dst->data_type() && ((src_dt == DataType::QASYMM8_SIGNED && uqinfo.offset == 128) || + (src_dt == DataType::QASYMM8 && uqinfo.offset == -128))) + { + function_to_call += "CONVERT_"; + } + } + + // Specify datatype for function function_to_call += string_from_data_type(src->data_type()) + "_"; function_to_call += string_from_data_type(dst->data_type()); @@ -145,9 +190,11 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) } _func = it->second; - // Configure kernel window - Window win_config = calculate_max_window(*src, Steps()); - ICpuKernel::configure(win_config); + // Calculate window. Squash if possible. + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src); + + ICpuKernel::configure(win); } Status CpuQuantizeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) @@ -164,10 +211,8 @@ void CpuQuantizeKernel::run_quantize_qsymm8(const ITensor *src, ITensor *dst, co const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); - if (is_data_type_quantized_asymmetric(src->info()->data_type())) - { - uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); - } + uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); + // Collapse window and reset first dimension to handle tail calculations manually Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -194,6 +239,114 @@ void CpuQuantizeKernel::run_quantize_qsymm8(const ITensor *src, ITensor *dst, co input, output); } +template +void CpuQuantizeKernel::run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window) +{ + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Calculate output offset difference. + const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); + UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); + uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Duplicate offset in signed vector format + const int8x16_t offset = wrapper::vdup_n(static_cast(uqinfo.offset), wrapper::traits::vector_128_tag{}); + + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + auto input_ptr = reinterpret_cast(input.ptr()); + auto output_ptr = reinterpret_cast(output.ptr()); + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + const wrapper::traits::neon_vector_t qv = + wrapper::vloadq(input_ptr + x); // load 128 bit vector of 8 bit datatype + + // Signed addition. + auto res = vaddq_s8(reinterpret_cast(qv), offset); + + // Output is dependent on datatype. + wrapper::vstore(&output_ptr[x], + reinterpret_cast>(res)); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + auto result = uqinfo.offset + static_cast(input_ptr[x]); + output_ptr[x] = static_cast(result); + } + }, + input, output); +} + +template +void CpuQuantizeKernel::run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window) +{ + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); + UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); + uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Duplicate offset in signed vector format + const int16x8_t offset = wrapper::vdup_n(static_cast(uqinfo.offset), wrapper::traits::vector_128_tag{}); + + const int32_t low_bound = (dst->info()->data_type() == DataType::QASYMM8) ? 0 : -128; + const int32_t upper_bound = (dst->info()->data_type() == DataType::QASYMM8) ? 255 : 127; + + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + auto input_ptr = reinterpret_cast(input.ptr()); + TOut *output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + const auto qv = wrapper::vloadq(input_ptr + x); // load 128 bit vector of 8 bit datatype + int16x8_t lower = reinterpret_cast(wrapper::vmovl(wrapper::vgetlow(qv))); + int16x8_t upper = reinterpret_cast(wrapper::vmovl(wrapper::vgethigh(qv))); + + // Signed addition. + lower = wrapper::vqadd(lower, offset); + upper = wrapper::vqadd(upper, offset); + + // Output is dependent on datatype. + auto res = recombine_8_16(lower, upper); + wrapper::vstore(&output_ptr[x], res); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + // Add offset and clamp result to within the range of the output datatype. + int32_t result = uqinfo.offset + static_cast(input_ptr[x]); + result = utility::clamp(result, low_bound, upper_bound); + + // Cast result to output datatype. + output_ptr[x] = static_cast(result); + } + }, + input, output); +} + template void CpuQuantizeKernel::run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window) { @@ -302,6 +455,7 @@ const char *CpuQuantizeKernel::name() const { return "CpuQuantizeKernel"; } + } // namespace kernels } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/CpuQuantizeKernel.h b/src/cpu/kernels/CpuQuantizeKernel.h index d6714136da..c2f7ac6d9d 100644 --- a/src/cpu/kernels/CpuQuantizeKernel.h +++ b/src/cpu/kernels/CpuQuantizeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_QUANTIZE_KERNEL_H -#define ARM_COMPUTE_CPU_QUANTIZE_KERNEL_H +#ifndef ACL_SRC_CPU_KERNELS_CPUQUANTIZEKERNEL_H +#define ACL_SRC_CPU_KERNELS_CPUQUANTIZEKERNEL_H #include "src/core/common/Macros.h" #include "src/cpu/ICpuKernel.h" @@ -58,6 +58,15 @@ public: */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. + * + * @return The split dimension hint. + */ + size_t get_split_dimension_hint() const + { + return _split_dimension; + } + // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; @@ -86,9 +95,17 @@ private: template void run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window &window); + template + void run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window); + + template + void run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window); + QuantizeFunctionExecutorPtr _func{nullptr}; + size_t _split_dimension{Window::DimY}; }; + } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_QUANTIZE_KERNEL_H */ +#endif // ACL_SRC_CPU_KERNELS_CPUQUANTIZEKERNEL_H diff --git a/src/cpu/operators/CpuQuantize.cpp b/src/cpu/operators/CpuQuantize.cpp index 4315499c39..4a3f1827c7 100644 --- a/src/cpu/operators/CpuQuantize.cpp +++ b/src/cpu/operators/CpuQuantize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,8 @@ void CpuQuantize::configure(const ITensorInfo *src, ITensorInfo *dst) void CpuQuantize::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); + auto split_dimension = static_cast(_kernel.get())->get_split_dimension_hint(); + NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors); } } // namespace cpu } // namespace arm_compute diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp index d391267e3e..6b98ee2b67 100644 --- a/tests/validation/NEON/LSTMLayerQuantized.cpp +++ b/tests/validation/NEON/LSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,11 +64,7 @@ inline void fill_tensor(SimpleTensor &tensor, const std::vector &v) } /** Tolerance for quantized asymmetric operations */ -#if defined(__aarch64__) -constexpr AbsoluteTolerance tolerance_qsymm16(0); -#else // defined(__aarch64__) constexpr AbsoluteTolerance tolerance_qsymm16(1); -#endif // defined(__aarch64__) } // namespace diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp index aeee54c835..bab7490762 100644 --- a/tests/validation/NEON/QuantizationLayer.cpp +++ b/tests/validation/NEON/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/QuantizationLayerFixture.h" + namespace arm_compute { namespace test @@ -182,7 +183,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, -1) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 127) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_u8); @@ -191,7 +201,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED framework::dataset::make("DataTypeIn", DataType::QASYMM8)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10), QuantizationInfo(2.0f, -25) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15), QuantizationInfo(1.0f, 127) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_s8); @@ -211,7 +221,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5), QuantizationInfo(1.0f, 43) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_s8); @@ -220,11 +230,21 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 0) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, -128) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_s8); +} + TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized -- cgit v1.2.1