diff options
Diffstat (limited to 'src/cpu/kernels/CpuQuantizeKernel.cpp')
-rw-r--r-- | src/cpu/kernels/CpuQuantizeKernel.cpp | 167 |
1 files changed, 85 insertions, 82 deletions
diff --git a/src/cpu/kernels/CpuQuantizeKernel.cpp b/src/cpu/kernels/CpuQuantizeKernel.cpp index 9700c62318..5dde680837 100644 --- a/src/cpu/kernels/CpuQuantizeKernel.cpp +++ b/src/cpu/kernels/CpuQuantizeKernel.cpp @@ -28,13 +28,13 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" + +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "src/core/CPP/Validate.h" #include <arm_neon.h> #include <map> @@ -53,9 +53,11 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8, + DataType::QASYMM8_SIGNED, DataType::QASYMM16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); return Status{}; @@ -71,19 +73,15 @@ inline float32x4x4_t load_value(const T *input_ptr) template <> inline float32x4x4_t load_value(const float *input_ptr) { - return { wrapper::vloadq(input_ptr), - wrapper::vloadq(input_ptr + 4), - wrapper::vloadq(input_ptr + 8), - wrapper::vloadq(input_ptr + 12) }; + return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4), wrapper::vloadq(input_ptr + 8), + wrapper::vloadq(input_ptr + 12)}; } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <> inline float32x4x4_t load_value(const float16_t *input_ptr) { - return { vcvt_f32_f16(wrapper::vload(input_ptr)), - vcvt_f32_f16(wrapper::vload(input_ptr + 4)), - vcvt_f32_f16(wrapper::vload(input_ptr + 8)), - vcvt_f32_f16(wrapper::vload(input_ptr + 12)) }; + return {vcvt_f32_f16(wrapper::vload(input_ptr)), vcvt_f32_f16(wrapper::vload(input_ptr + 4)), + vcvt_f32_f16(wrapper::vload(input_ptr + 8)), vcvt_f32_f16(wrapper::vload(input_ptr + 12))}; } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -113,26 +111,25 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst)); - static const std::map<std::string, QuantizeFunctionExecutorPtr> quant_map = - { - { "op_QASYMM8_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, uint8_t> }, - { "op_QASYMM8_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, int8_t> }, - { "op_QASYMM8_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<uint8_t> }, + static const std::map<std::string, QuantizeFunctionExecutorPtr> quant_map = { + {"op_QASYMM8_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, uint8_t>}, + {"op_QASYMM8_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, int8_t>}, + {"op_QASYMM8_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<uint8_t>}, - { "op_QASYMM8_SIGNED_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, uint8_t> }, - { "op_QASYMM8_SIGNED_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, int8_t> }, - { "op_QASYMM8_SIGNED_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<int8_t> }, + {"op_QASYMM8_SIGNED_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, uint8_t>}, + {"op_QASYMM8_SIGNED_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, int8_t>}, + {"op_QASYMM8_SIGNED_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<int8_t>}, - { "op_F32_QSYMM8", &CpuQuantizeKernel::run_quantize_qsymm8<float, int8_t> }, + {"op_F32_QSYMM8", &CpuQuantizeKernel::run_quantize_qsymm8<float, int8_t>}, - { "op_F32_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float, uint8_t> }, - { "op_F32_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float, int8_t> }, - { "op_F32_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float> }, + {"op_F32_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float, uint8_t>}, + {"op_F32_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float, int8_t>}, + {"op_F32_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float>}, #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - { "op_F16_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, uint8_t> }, - { "op_F16_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, int8_t> }, - { "op_F16_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float16_t> }, + {"op_F16_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, uint8_t>}, + {"op_F16_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, int8_t>}, + {"op_F16_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float16_t>}, #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ }; @@ -142,7 +139,7 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst) auto it = quant_map.find(function_to_call); - if(it == quant_map.end()) + if (it == quant_map.end()) { ARM_COMPUTE_ERROR("Unsupported combination of input and output data types"); } @@ -167,7 +164,7 @@ void CpuQuantizeKernel::run_quantize_qsymm8(const ITensor *src, ITensor *dst, co const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); - if(is_data_type_quantized_asymmetric(src->info()->data_type())) + if (is_data_type_quantized_asymmetric(src->info()->data_type())) { uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); } @@ -177,22 +174,24 @@ void CpuQuantizeKernel::run_quantize_qsymm8(const ITensor *src, ITensor *dst, co Iterator input(src, win_collapsed); Iterator output(dst, win_collapsed); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - auto input_ptr = reinterpret_cast<const TIn *>(input.ptr()); - auto output_ptr = reinterpret_cast<TOut *>(output.ptr()); - int x = window_start_x; - for(; x <= (window_end_x - window_step); x += window_step) - { - wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo)); - } - // Compute left-over elements - for(; x < window_end_x; ++x) + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { - output_ptr[x] = quantize_qsymm8(input_ptr[x], dst->info()->quantization_info()); - } - }, - input, output); + auto input_ptr = reinterpret_cast<const TIn *>(input.ptr()); + auto output_ptr = reinterpret_cast<TOut *>(output.ptr()); + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo)); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + output_ptr[x] = quantize_qsymm8(input_ptr[x], dst->info()->quantization_info()); + } + }, + input, output); } template <typename TIn, typename TOut> @@ -203,7 +202,7 @@ void CpuQuantizeKernel::run_quantize_qasymm8(const ITensor *src, ITensor *dst, c const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); - if(is_data_type_quantized_asymmetric(src->info()->data_type())) + if (is_data_type_quantized_asymmetric(src->info()->data_type())) { uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); } @@ -219,23 +218,25 @@ void CpuQuantizeKernel::run_quantize_qasymm8(const ITensor *src, ITensor *dst, c Iterator input(src, win_collapsed); Iterator output(dst, win_collapsed); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - auto input_ptr = reinterpret_cast<const TIn *>(input.ptr()); - auto output_ptr = reinterpret_cast<TOut *>(output.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step); x += window_step) + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { - wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo)); - } - // Compute left-over elements - for(; x < window_end_x; ++x) - { - output_ptr[x] = Qasymm8QuantizationHelper<TOut>::quantize(input_ptr[x], uqinfo, rounding_policy); - } - }, - input, output); + auto input_ptr = reinterpret_cast<const TIn *>(input.ptr()); + auto output_ptr = reinterpret_cast<TOut *>(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo)); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + output_ptr[x] = Qasymm8QuantizationHelper<TOut>::quantize(input_ptr[x], uqinfo, rounding_policy); + } + }, + input, output); } template <typename T> @@ -246,7 +247,7 @@ void CpuQuantizeKernel::run_quantize_qasymm16(const ITensor *src, ITensor *dst, const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform(); UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform(); - if(is_data_type_quantized_asymmetric(src->info()->data_type())) + if (is_data_type_quantized_asymmetric(src->info()->data_type())) { uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo); } @@ -262,25 +263,27 @@ void CpuQuantizeKernel::run_quantize_qasymm16(const ITensor *src, ITensor *dst, Iterator input(src, win_collapsed); Iterator output(dst, win_collapsed); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - auto input_ptr = reinterpret_cast<const T *>(input.ptr()); - auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step); x += window_step) - { - uint16x8x2_t tmp = vquantize_qasymm16(load_value(&input_ptr[x]), uqinfo); - vst1q_u16(&output_ptr[x], tmp.val[0]); - vst1q_u16(&output_ptr[x + 8], tmp.val[1]); - } - // Compute left-over elements - for(; x < window_end_x; ++x) + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { - output_ptr[x] = quantize_qasymm16(input_ptr[x], uqinfo, rounding_policy); - } - }, - input, output); + auto input_ptr = reinterpret_cast<const T *>(input.ptr()); + auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + uint16x8x2_t tmp = vquantize_qasymm16(load_value(&input_ptr[x]), uqinfo); + vst1q_u16(&output_ptr[x], tmp.val[0]); + vst1q_u16(&output_ptr[x + 8], tmp.val[1]); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + output_ptr[x] = quantize_qasymm16(input_ptr[x], uqinfo, rounding_policy); + } + }, + input, output); } void CpuQuantizeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) |