From d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 1 Oct 2019 12:25:49 +0100 Subject: COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2016 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 100 ++++++++++++++++----- src/runtime/NEON/functions/NEQuantizationLayer.cpp | 5 +- 2 files changed, 81 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 0aa34cd411..6a9c4ae14c 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -34,9 +34,10 @@ #include "arm_compute/core/CPP/Validate.h" #include +#include -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) @@ -45,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); return Status{}; @@ -71,7 +72,7 @@ inline const float32x4x4_t load_value(const float16_t *input_ptr) } // namespace NEQuantizationLayerKernel::NEQuantizationLayerKernel() - : _input(nullptr), _output(nullptr) + : _input(nullptr), _output(nullptr), _func(nullptr) { } @@ -83,6 +84,33 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output) _input = input; _output = output; + static std::map quant_map_f32 = + { + { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, + }; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + static std::map quant_map_f16 = + { + { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8 }, + { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16 }, + }; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ + + switch(input->info()->data_type()) + { + case DataType::F32: + _func = quant_map_f32[output->info()->data_type()]; + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + _func = quant_map_f16[output->info()->data_type()]; + break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + default: + ARM_COMPUTE_ERROR("Unsupported input data type."); + } + // Configure kernel window Window win_config = calculate_max_window(*input->info(), Steps()); @@ -96,18 +124,17 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output) Status NEQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - return Status{}; } template -void NEQuantizationLayerKernel::quantize(const Window &window, const QuantizationInfo &qinfo) +void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window) { constexpr auto window_step = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const UniformQuantizationInfo uqinfo = qinfo.uniform(); + const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -139,25 +166,54 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio input, output); } +template +void NEQuantizationLayerKernel::run_quantize_qasymm16(const Window &window) +{ + constexpr auto window_step = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform(); +#ifdef __aarch64__ + constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; +#else //__aarch64__ + constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO; +#endif //__aarch64__ + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + auto input_ptr = reinterpret_cast(input.ptr()); + auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step); x += window_step) + { + uint16x8x2_t tmp = vquantize_qasymm16(load_value(&input_ptr[x]), uqinfo); + vst1q_u16(&output_ptr[x], tmp.val[0]); + vst1q_u16(&output_ptr[x + 8], tmp.val[1]); + } + // Compute left-over elements + for(; x < window_end_x; ++x) + { + output_ptr[x] = quantize_qasymm16(input_ptr[x], uqinfo, rounding_policy); + } + }, + input, output); +} + void NEQuantizationLayerKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON(_func == nullptr); - const QuantizationInfo &qinfo = _output->info()->quantization_info(); - - switch(_input->info()->data_type()) - { - case DataType::F32: - NEQuantizationLayerKernel::quantize(window, qinfo); - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - NEQuantizationLayerKernel::quantize(window, qinfo); - break; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } + (this->*_func)(window); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEQuantizationLayer.cpp b/src/runtime/NEON/functions/NEQuantizationLayer.cpp index 65873b1b14..4464978762 100644 --- a/src/runtime/NEON/functions/NEQuantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEQuantizationLayer.cpp @@ -27,8 +27,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -using namespace arm_compute; - +namespace arm_compute +{ Status NEQuantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); @@ -46,3 +46,4 @@ void NEQuantizationLayer::configure(const ITensor *input, ITensor *output) k->configure(input, output); _kernel = std::move(k); } +} // namespace arm_compute -- cgit v1.2.1