diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-05-21 13:32:43 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-03 14:51:29 +0000 |
commit | 4c5469b192665c94118a8a558787cb9cec2d0765 (patch) | |
tree | 168aa969de8243bdbb1f25247dd9f54d037ae32c /src/core/NEON/kernels | |
parent | 43a129e94df41f9ac8bc78b702da5a387ada0494 (diff) | |
download | ComputeLibrary-4c5469b192665c94118a8a558787cb9cec2d0765.tar.gz |
COMPMID-2225: Add interface support for new quantized data types.
Add support for:
-QSYMM8, 8-bit quantized symmetric
-QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization
Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1236
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
21 files changed, 127 insertions, 126 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index bc6a281353..3f71553926 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -320,15 +319,15 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const QuantizationInfo qi_in = _input->info()->quantization_info(); - const QuantizationInfo qi_out = _output->info()->quantization_info(); - const qasymm8x16_t va = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset)); - const qasymm8x16_t vb = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset)); - const qasymm8_t a = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset); - const qasymm8_t b = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset); - const qasymm8_t const_0 = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -415,9 +414,9 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat } else if(act == ActivationFunction::LOGISTIC) { - float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset); + float tmp_f = dequantize_qasymm8(in, qi_in); tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset); + tmp = quantize_qasymm8(tmp_f, qi_out); } else { diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index ca79a0a419..164026c1ab 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -165,25 +165,26 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor const auto window_end_x = static_cast<int>(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t voffseto = vdupq_n_f32(output_offset); + const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale); + const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale); + const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset); + const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset); + const float32x4_t voffseto = vdupq_n_f32(oq_info.offset); if(is_broadcast_across_x) { - const bool is_broadcast_input_2 = input2_win.x().step() == 0; - Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; - Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; - const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; - const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const bool is_broadcast_input_2 = input2_win.x().step() == 0; + Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; + Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; + const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; + const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); // Clear X Dimension on execution window as we handle manually non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -252,7 +253,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor for(; x < window_end_x; ++x) { const float afs = static_cast<int32_t>(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, broadcast_input, non_broadcast_input, output); @@ -263,9 +264,6 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -328,9 +326,9 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor // Compute left-over elements for(; x < window_end_x; ++x) { - const float afs = static_cast<int32_t>((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale; - const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + const float afs = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; + const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; + *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info()); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 45e1562d8d..8874b52e19 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -87,10 +87,14 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); Iterator output(out, window); + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates &) { - const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info()); - const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info()); + const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), iq1_info); + const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), iq2_info); const float32x4x4_t ta3 = { @@ -102,7 +106,7 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 } }; - const uint8x16_t result = vquantize(ta3, out->info()->quantization_info()); + const uint8x16_t result = vquantize(ta3, oq_info); vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result); }, diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b360e9e6be..c9c70d6500 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -53,9 +53,9 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo Iterator input(in, window); Iterator output(out, window); - const DataType dt = in->info()->data_type(); - const QuantizationInfo &input_qinfo = in->info()->quantization_info(); - const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + const DataType dt = in->info()->data_type(); + const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index fdafc2da90..385be04e4a 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -51,8 +51,8 @@ public: static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation) { - const int input_offset = -input->info()->quantization_info().offset; - const int weights_offset = -weights->info()->quantization_info().offset; + const int input_offset = -input->info()->quantization_info().uniform().offset; + const int weights_offset = -weights->info()->quantization_info().uniform().offset; const int input_stride_x = input->info()->strides_in_bytes().x(); const int input_stride_y = input->info()->strides_in_bytes().y(); diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index 88f8b31a35..53789e2472 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -92,7 +92,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) auto zero = static_cast<T>(0); if(std::is_same<T, uint8_t>::value) { - zero = _input->info()->quantization_info().offset; + zero = _input->info()->quantization_info().uniform().offset; } execute_window_loop(window_out, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 1520225249..a6dc0977d2 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -97,7 +97,7 @@ inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v) template <typename T> void run_dequantization(const ITensor *input, ITensor *output, const Window &window) { - const QuantizationInfo &qinfo = input->info()->quantization_info(); + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); const int window_step_x = 16; const auto window_start_x = static_cast<int>(window.x().start()); @@ -129,7 +129,7 @@ void run_dequantization(const ITensor *input, ITensor *output, const Window &win for(; x < window_end_x; ++x) { uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast<T>(qinfo.dequantize(val)); + *(out_ptr + x) = static_cast<T>(dequantize_qasymm8(val, qinfo)); } }, in, out); diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 33457e1fca..0fe05d2044 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,9 +142,9 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar } template <ArithmeticOperation op> -inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { - return qinfo.quantize(elementwise_arithm_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(elementwise_arithm_op_scalar<op>(a, b), qinfo); } template <ArithmeticOperation op, typename VectorType> @@ -253,7 +253,7 @@ inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputS } template <ComparisonOperation op> -inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { ARM_COMPUTE_UNUSED(qinfo); return elementwise_comp_op_scalar<op>(a, b); @@ -567,7 +567,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const } void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo), + uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t, float32x4_t, float32x4_t, const bool), int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, @@ -587,12 +587,11 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const auto window_end_x = static_cast<int>(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero) - const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); + const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset + 0.5f); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale); if(is_broadcast_across_x) { @@ -603,8 +602,8 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset); const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale); @@ -628,31 +627,30 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset); - const float bfs = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, - out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo); + const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo); + *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo); } }, broadcast_input, non_broadcast_input, output); } else { + const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform(); + // Input1 quantization info - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); + const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale); // Input2 quantization info - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); + const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale); // Clear X Dimension on execution window as we handle manually input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -667,9 +665,9 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o vscale1, vscale2, voffseto, invvscaleo); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset); - const float bfs = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e699bac556..d45e3ce56a 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -27,12 +27,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/Window.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp index cba3390641..0e77ead72b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp @@ -179,8 +179,8 @@ void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiply<uint8_t, uint8_t, Iterator in2(_input1, window_w); Iterator out(_output, window_out); - const int input_offset = -_input0->info()->quantization_info().offset; - const int weights_offset = -_input1->info()->quantization_info().offset; + const int input_offset = -_input0->info()->quantization_info().uniform().offset; + const int weights_offset = -_input1->info()->quantization_info().uniform().offset; const int input_w = _input0->info()->dimension(0); const int input_h = _input0->info()->dimension(1); diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index b8e204cfd8..8efab7da33 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 34af0cf3fd..874259bbb7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -279,7 +279,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) const int pad_top = _conv_info.pad_top(); const int stride_x = _conv_info.stride().first; const int stride_y = _conv_info.stride().second; - const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0; + const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0; Window window_in_out(window); // The first three dimensions of the input and output are increased by the inner loops diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index fa16484cd3..c313b23ad3 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -174,7 +174,7 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in) } void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info) + const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info) { const auto input1 = static_cast<const qasymm8_t *__restrict>(input1_ptr); const auto input2 = static_cast<const qasymm8_t *__restrict>(input2_ptr); @@ -187,7 +187,7 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); - const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset); + const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; const float32x4x4_t out_f32x4x4 = { @@ -660,7 +660,7 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo execute_window_loop(collapsed, [&](const Coordinates &) { (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale, - _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info()); + _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform()); collapsed.slide_window_slice_3D(slice_input1); collapsed.slide_window_slice_3D(slice_input2); }, diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index ac2ffa1988..62c9ca0d5e 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -562,6 +562,10 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con const int scale_step_x = (pool_stride_x == 1) ? 2 : 1; + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); + const bool have_different_qinfo = input_qinfo != output_qinfo; + execute_window_loop(window, [&](const Coordinates & id) { const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset())); @@ -640,9 +644,7 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con } } - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - if(input_qinfo != output_qinfo) + if(have_different_qinfo) { const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo); lower_res = vget_low_u8(requantized_output); @@ -814,8 +816,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))); const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)); @@ -1598,6 +1600,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates & id) { uint8_t res = 0; @@ -1671,11 +1676,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c } // Store result - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale, - output_qinfo.offset) : - res; + res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res; *(reinterpret_cast<uint8_t *>(output.ptr())) = res; }, input, output); @@ -1698,9 +1699,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom); - const float32x4_t half_scale_v = vdupq_n_f32(0.5f); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 4deeb1c7cc..0aa34cd411 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -107,6 +107,7 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio const auto window_start_x = static_cast<int>(window.x().start()); const auto window_end_x = static_cast<int>(window.x().end()); + const UniformQuantizationInfo uqinfo = qinfo.uniform(); #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -127,12 +128,12 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio int x = window_start_x; for(; x <= (window_end_x - window_step); x += window_step) { - wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo)); + wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo)); } // Compute left-over elements for(; x < window_end_x; ++x) { - output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy); + output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy); } }, input, output); diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index c6e853659c..1bfef27d49 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -542,6 +542,9 @@ struct RedOpX_qasymm8 inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + auto vec_res_value1 = vdupq_n_u32(static_cast<uint32_t>(0.f)); auto vec_res_value2 = vdupq_n_u32(static_cast<uint32_t>(0.f)); auto vec_res_value3 = vdupq_n_u32(static_cast<uint32_t>(0.f)); @@ -584,8 +587,8 @@ struct RedOpX_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -673,7 +676,7 @@ struct RedOpX_qasymm8 res *= wrapper::vgetlane(carry_res, 3); //re-quantize result - res = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset); + res = quantize_qasymm8(res, iq_info); *(output.ptr()) = static_cast<uint8_t>(res); break; } @@ -877,6 +880,8 @@ struct RedOpYZW_qasymm8 { ARM_COMPUTE_UNUSED(out_slice); + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + execute_window_loop(in_slice, [&](const Coordinates &) { uint32x4x4_t vec_res_idx{ { 0 } }; @@ -932,8 +937,8 @@ struct RedOpYZW_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -1004,8 +1009,8 @@ struct RedOpYZW_qasymm8 } else if(op == ReductionOperation::PROD) { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale)); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale)); //re-quantize vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4); diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 36398cf89a..99328deecd 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 003f472486..e99b97bbe5 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -218,7 +218,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset const int input_height = input->info()->dimension(2); T border_value; - if(use_padding && border_mode != BorderMode::REPLICATE ) + if(use_padding && border_mode != BorderMode::REPLICATE) { // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w); @@ -235,9 +235,9 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1; - const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = input->info()->quantization_info(); - const QuantizationInfo oq_info = output->info()->quantization_info(); + const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { @@ -295,11 +295,11 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset //dequantize quantized input if(is_quantized) { - float inp00 = iq_info.dequantize(a00); - float inp01 = iq_info.dequantize(a01); - float inp10 = iq_info.dequantize(a10); - float inp11 = iq_info.dequantize(a11); - res = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP)); + float inp00 = dequantize_qasymm8(a00, iq_info); + float inp01 = dequantize_qasymm8(a01, iq_info); + float inp10 = dequantize_qasymm8(a10, iq_info); + float inp11 = dequantize_qasymm8(a11, iq_info); + res = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info)); } else { @@ -651,9 +651,9 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1]; const size_t in_stride = in_stide_in_bytes / _input->info()->element_size(); - const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = _input->info()->quantization_info(); - const QuantizationInfo oq_info = _output->info()->quantization_info(); + const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); switch(_input->info()->data_type()) { diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e9417ece44..4144a1877b 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -595,7 +595,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons const int start_x = in.info()->valid_region().anchor.x(); const int input_width = in.info()->valid_region().shape.x(); - const float scale_beta = -beta * in.info()->quantization_info().scale; + const float scale_beta = -beta * in.info()->quantization_info().uniform().scale; Iterator in_it(&in, window); Iterator max_it(&max, window); diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index aea6875f20..28f655c529 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 09a4a11b66..383c2b8b99 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" |