From d66094e37ecd747e85f30130e1a678bdbaf30788 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 15 Apr 2019 15:44:17 +0100 Subject: COMPMID-1995: Fix NEPoolingLayer for quantized 3x3 Quantized 3x3 pooling layer on NEON did not support different quantization information for the input and output. Change-Id: I38f8da6ec91c91ba37a21d9d0e1a14fd5bb99f86 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/992 Reviewed-by: Isabella Gottardi Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 33 +++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'src/core') diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 308fad5ffb..0b90d9f290 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -813,6 +813,9 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 2)); @@ -822,6 +825,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const auto top_data = vld1q_u8(reinterpret_cast(input_top_ptr + input.offset())); const auto middle_data = vld1q_u8(reinterpret_cast(input_middle_ptr + input.offset())); const auto bottom_data = vld1q_u8(reinterpret_cast(input_bottom_ptr + input.offset())); + uint8x8_t fres = {}; + uint8x16_t fqres = {}; if(pooling_type == PoolingType::AVG) { @@ -877,7 +882,7 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con scale_vector_s16x8(exclude_padding, res, id, 0, 1, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y); - vst1_u8(reinterpret_cast(output.ptr()), vmovn_u16(res)); + fres = vmovn_u16(res); } else { @@ -889,8 +894,7 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con scale_vector_s16x8(exclude_padding, final_sum.val[1], id, 8, 1, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y); - const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1])); - vst1q_u8(reinterpret_cast(output.ptr()), res); + fqres = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1])); } } else @@ -904,13 +908,30 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con { const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } }; static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 }; - const uint8x8_t res = vtbl2_u8(table, lookup_val); - vst1_u8(reinterpret_cast(output.ptr()), res); + fres = vtbl2_u8(table, lookup_val); } else { - vst1q_u8(reinterpret_cast(output.ptr()), final_max); + fqres = final_max; + } + } + + // Store result + if(pool_stride_x == 1) + { + if(input_qinfo != output_qinfo) + { + fqres = vquantize(vdequantize(fqres, input_qinfo), output_qinfo); + } + vst1q_u8(reinterpret_cast(output.ptr()), fqres); + } + else + { + if(input_qinfo != output_qinfo) + { + fres = vquantize(vdequantize(fres, input_qinfo), output_qinfo); } + vst1_u8(reinterpret_cast(output.ptr()), fres); } }, input, output); -- cgit v1.2.1