aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-15 15:44:17 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-15 15:57:47 +0000
commitd66094e37ecd747e85f30130e1a678bdbaf30788 (patch)
tree3587faa39d46fb344db03beee54f663185407678 /src
parent17dae8765ba738c0d68fd3ed6af9eae8ae40798b (diff)
downloadComputeLibrary-d66094e37ecd747e85f30130e1a678bdbaf30788.tar.gz
COMPMID-1995: Fix NEPoolingLayer for quantized 3x3
Quantized 3x3 pooling layer on NEON did not support different quantization information for the input and output. Change-Id: I38f8da6ec91c91ba37a21d9d0e1a14fd5bb99f86 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/992 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp33
1 files changed, 27 insertions, 6 deletions
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 308fad5ffb..0b90d9f290 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -813,6 +813,9 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
+ const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
+ const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+
const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
@@ -822,6 +825,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
const auto middle_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_middle_ptr + input.offset()));
const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
+ uint8x8_t fres = {};
+ uint8x16_t fqres = {};
if(pooling_type == PoolingType::AVG)
{
@@ -877,7 +882,7 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
scale_vector_s16x8(exclude_padding, res, id, 0, 1,
pool_size, upper_bound_w, upper_bound_h,
pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
- vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), vmovn_u16(res));
+ fres = vmovn_u16(res);
}
else
{
@@ -889,8 +894,7 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
scale_vector_s16x8(exclude_padding, final_sum.val[1], id, 8, 1,
pool_size, upper_bound_w, upper_bound_h,
pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
- const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
- vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
+ fqres = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
}
}
else
@@ -904,13 +908,30 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
{
const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } };
static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
- const uint8x8_t res = vtbl2_u8(table, lookup_val);
- vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
+ fres = vtbl2_u8(table, lookup_val);
}
else
{
- vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), final_max);
+ fqres = final_max;
+ }
+ }
+
+ // Store result
+ if(pool_stride_x == 1)
+ {
+ if(input_qinfo != output_qinfo)
+ {
+ fqres = vquantize(vdequantize(fqres, input_qinfo), output_qinfo);
+ }
+ vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), fqres);
+ }
+ else
+ {
+ if(input_qinfo != output_qinfo)
+ {
+ fres = vquantize(vdequantize(fres, input_qinfo), output_qinfo);
}
+ vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), fres);
}
},
input, output);