From a52e4cf36ec86b63660f5a687073fa0985384dc1 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Mon, 1 Apr 2019 14:55:18 +0100 Subject: COMPMID-2060: Support different qinfo in PoolingLayer CL and Neon back ends now support different qinfos Change-Id: I638d5f258ab2f99b40659601b4c5398d2c34c43b Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/927 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michalis Spyrou Reviewed-by: Gian Marco Iodice --- src/core/CL/cl_kernels/pooling_layer_quantized.cl | 42 ++++++++++++++++++++--- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 15 ++++++-- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 27 +++++++++++++-- 3 files changed, 73 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl index 198250bfb3..919b76ed8d 100644 --- a/src/core/CL/cl_kernels/pooling_layer_quantized.cl +++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,19 @@ */ #include "helpers.h" +#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) +#define VEC_FLOAT(VEC_SIZE) \ + VEC_DATA_TYPE(float, VEC_SIZE) \ +#define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE) #define VEC_UCHAR(VEC_SIZE) VEC_DATA_TYPE(uchar, VEC_SIZE) #define CONVERT_RTE(x, type)(convert_##type##_rte((x))) +#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) +#define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \ + { \ + const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \ + const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \ + res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_UCHAR(VEC_SIZE)); \ + } +#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ + #if defined(POOL_AVG) #define POOL_OP(x, y) ((x) + (y)) #else /* defined(POOL_AVG) */ @@ -118,8 +131,22 @@ __kernel void pooling_layer_MxN_quantized_nchw( res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))); #endif /* defined(POOL_AVG) */ - // Store result - *(__global uchar *)output.ptr = convert_uchar(res); + uchar result_u8 = convert_uchar(res); + +#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) + + const float result_f32 = convert_float(result_u8); + const float input_offset = (float)OFFSET_IN1; + const float input_scale = (float)SCALE_IN1; + const float scale_out = (float)SCALE_OUT; + const float offset_out = (float)OFFSET_OUT; + const float in_f32 = (result_f32 - input_offset) * input_scale; + const float out_f32 = in_f32 / scale_out + offset_out; + result_u8 = convert_uchar_sat(convert_int_rte(out_f32)); + +#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ + + *(__global uchar *)output.ptr = result_u8; } int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h, @@ -217,6 +244,11 @@ __kernel void pooling_layer_MxN_quantized_nhwc( vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)))); #endif /* defined(POOL_AVG) */ + uchar8 out_u8 = convert_uchar8(vdata); +#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) + REQUANTIZE(8, out_u8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_u8); +#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ + // Store result - vstore8(convert_uchar8(vdata), 0, (__global uchar *)output.ptr); -} \ No newline at end of file + vstore8(out_u8, 0, (__global uchar *)output.ptr); +} diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 7081688bff..7ccbda9be3 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -78,7 +78,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); TensorInfo out_info(TensorInfo(compute_pool_shape(*input, pool_info), 1, output->data_type())); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); } @@ -201,6 +200,17 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const int pool_pad_top = pad_stride_info.pad_top(); const int pool_pad_left = pad_stride_info.pad_left(); + // Set build options + CLBuildOptions build_opts; + + if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) + { + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + } + // Check output dimensions auto_init(input->info(), output->info(), pool_info); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info)); @@ -212,8 +222,6 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const DataType data_type = input->info()->data_type(); - // Set build options - CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type)); build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)); @@ -222,6 +230,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_top)); build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x)); build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y)); + build_opts.add_option_if(data_type == DataType::F16, "-DFP16"); // Create kernel diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index d00a4af4fe..308fad5ffb 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -138,7 +138,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w) || (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h)); } @@ -640,6 +639,15 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con } } + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + if(input_qinfo != output_qinfo) + { + const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo); + lower_res = vget_low_u8(requantized_output); + upper_res = vget_high_u8(requantized_output); + } + // Store result if(pool_stride_x == 1) { @@ -1641,6 +1649,11 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c } // Store result + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale, + output_qinfo.offset) : + res; *(reinterpret_cast(output.ptr())) = res; }, input, output); @@ -1663,7 +1676,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom); - const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); execute_window_loop(window, [&](const Coordinates & id) { @@ -1713,6 +1728,12 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c uint8x8_t res1 = vmovn_u16(vcombine_u16(vmovn_u32(vres1), vmovn_u32(vres2))); uint8x8_t res2 = vmovn_u16(vcombine_u16(vmovn_u32(vres3), vmovn_u32(vres4))); + if(input_qinfo != output_qinfo) + { + const auto requantized_output = vquantize(vdequantize(vcombine_u8(res1, res2), input_qinfo), output_qinfo); + res1 = vget_low_u8(requantized_output); + res2 = vget_high_u8(requantized_output); + } // Store result vst1_u8(output.ptr(), res1); @@ -1733,7 +1754,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c } // Store result - vst1q_u8(output.ptr(), vres); + vst1q_u8(output.ptr(), (input_qinfo != output_qinfo) ? vquantize(vdequantize(vres, input_qinfo), output_qinfo) : vres); } }, input, output); -- cgit v1.2.1