From 4c5469b192665c94118a8a558787cb9cec2d0765 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 21 May 2019 13:32:43 +0100 Subject: COMPMID-2225: Add interface support for new quantized data types. Add support for: -QSYMM8, 8-bit quantized symmetric -QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1236 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLActivationLayerKernel.cpp | 35 ++++++------- src/core/CL/kernels/CLComparisonKernel.cpp | 11 +++-- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 11 +++-- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 22 +++++---- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 22 +++++---- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 9 ++-- .../CL/kernels/CLDequantizationLayerKernel.cpp | 6 ++- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 12 +++-- .../CL/kernels/CLElementwiseOperationKernel.cpp | 16 +++--- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 9 ++-- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 11 +++-- src/core/CL/kernels/CLIm2ColKernel.cpp | 11 +++-- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 5 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 16 +++--- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 11 +++-- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 6 ++- src/core/CL/kernels/CLRangeKernel.cpp | 5 +- src/core/CL/kernels/CLScaleKernel.cpp | 5 +- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 16 +++--- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 16 +++--- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 26 ++++++---- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 11 +++-- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 23 +++++---- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 42 ++++++++-------- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 10 ++-- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 6 +-- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 4 +- src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp | 2 +- .../NEON/kernels/NEDequantizationLayerKernel.cpp | 4 +- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 46 +++++++++-------- .../kernels/NEFuseBatchNormalizationKernel.cpp | 3 -- .../kernels/NEGEMMMatrixVectorMultiplyKernel.cpp | 4 +- .../kernels/NEHeightConcatenateLayerKernel.cpp | 10 ++-- src/core/NEON/kernels/NEIm2ColKernel.cpp | 2 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 6 +-- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 27 +++++----- .../NEON/kernels/NEQuantizationLayerKernel.cpp | 5 +- .../NEON/kernels/NEReductionOperationKernel.cpp | 19 +++++--- src/core/NEON/kernels/NEReverseKernel.cpp | 1 - src/core/NEON/kernels/NEScaleKernel.cpp | 24 ++++----- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 4 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 10 ++-- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 1 - src/runtime/CL/CLSubTensor.cpp | 7 ++- src/runtime/CL/CLTensor.cpp | 7 ++- src/runtime/CL/CLTensorAllocator.cpp | 57 +++++++++++++++++++++- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 14 ++++-- .../CL/functions/CLDirectConvolutionLayer.cpp | 4 +- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 19 +++++--- .../CL/functions/CLGEMMConvolutionLayer.cpp | 32 +++++++----- .../CL/functions/CLGEMMDeconvolutionLayer.cpp | 8 ++- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 8 +-- src/runtime/CL/functions/CLPoolingLayer.cpp | 4 +- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 22 +++++---- .../NEON/functions/NEFullyConnectedLayer.cpp | 16 +++--- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 44 ++++++++--------- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 8 +-- src/runtime/NEON/functions/NEPoolingLayer.cpp | 4 +- .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 6 +-- 59 files changed, 481 insertions(+), 324 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d601dfc20d..65e6561b0a 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -122,42 +122,43 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act int a_const_int = 0; int b_const_int = 0; + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(dt); // Create quantized version of constants a, b if needed - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { - a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP); - b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + a_const_int = quantize_qasymm8(a_const, iq_info); + b_const_int = quantize_qasymm8(b_const, iq_info); } - const bool is_logistic_activation_quantized = is_data_type_quantized_asymmetric(dt) && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; + const bool is_logistic_activation_quantized = is_quantized_asymmetric && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; // Set build options CLBuildOptions build_opts; build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - const int o1 = input->info()->quantization_info().offset; - const float s1 = input->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + // Quantized value of 0 corresponds to the offset o1 - build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(o1))); - build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); - build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(iq_info.offset))); + build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale))); + build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(iq_info.offset))); // Set scale and offset of the input and output if they have different quantization info - if(is_data_type_quantized_asymmetric(dt) && output != nullptr) + if(is_quantized_asymmetric && output != nullptr) { - const float s2 = output->info()->quantization_info().scale; - const int o2 = output->info()->quantization_info().offset; + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - if(o1 != o2 || s1 != s2) + if(iq_info != oq_info) { - build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); - build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(o2))); + build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(oq_info.scale))); + build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(oq_info.offset))); } } } @@ -171,7 +172,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act // Create kernel std::string kernel_name = std::string("activation_layer"); - if(is_data_type_quantized_asymmetric(dt)) + if(is_quantized_asymmetric) { kernel_name += is_logistic_activation_quantized ? std::string("_logistic_qa8") : std::string("_qa8"); } diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 4f44851ef8..628f9f18e7 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -134,10 +134,13 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp build_opts.emplace("-DOP_NAME=" + lower_string(operation_name)); if(is_data_type_quantized_asymmetric(input1->info()->data_type())) { - build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + + build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); kernel_name += "_quantized"; } diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 1cae3712dc..5e1bbe944f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -99,10 +99,13 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index cd25bb1e7f..615327a7cc 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -251,30 +251,34 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y)); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 758e99b77e..e32faa10df 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -213,30 +213,34 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1))); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 28d4ff2759..0312a57664 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -72,9 +72,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu _input = input; _output = output; - const DataLayout data_layout = input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const DataLayout data_layout = input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Create kernel CLBuildOptions build_opts; @@ -96,7 +97,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout())); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()), - "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), + "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); _kernel = static_cast(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options())); diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 78cc5596dd..0b066837a9 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -95,10 +95,12 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 12affa9880..3e158a52ff 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -452,16 +452,20 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL // Set static kernel arguments if(is_data_type_quantized_asymmetric(data_type)) { + const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform(); + int output_multiplier = 0; int output_shift = 0; - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; - _kernel.setArg(idx++, -_input->info()->quantization_info().offset); - _kernel.setArg(idx++, -_weights->info()->quantization_info().offset); - _kernel.setArg(idx++, _output->info()->quantization_info().offset); + _kernel.setArg(idx++, -iqinfo.offset); + _kernel.setArg(idx++, -wqinfo.offset); + _kernel.setArg(idx++, oqinfo.offset); _kernel.setArg(idx++, output_multiplier); _kernel.setArg(idx++, output_shift); } diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 414b040f4c..1d9c71555a 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -134,12 +134,16 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i build_opts.add_option("-DOP=" + operation_string); if(is_data_type_quantized_asymmetric(input1.data_type())) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1.quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2.quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output.quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1.quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2.quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output.quantization_info().scale)); + const UniformQuantizationInfo iq1info = input1.quantization_info().uniform(); + const UniformQuantizationInfo iq2info = input2.quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output.quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1info.scale)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } return build_opts; } diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 11a4292270..0ff2f1343a 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -104,9 +104,12 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const // Add static arguments if(is_quantized) { + const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform(); + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, -_input0->info()->quantization_info().offset); - _kernel.setArg(idx++, -_input1->info()->quantization_info().offset); + _kernel.setArg(idx++, -iq0_info.offset); + _kernel.setArg(idx++, -iq1_info.offset); } // Configure kernel window diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index e3f2a96281..4da3e245c0 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -133,10 +133,13 @@ void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 8caa927f8b..10d6e68cd9 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -162,10 +162,11 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size const std::pair convolved_dims = scaled_dimensions(input_width, input_height, kernel_dims.width, kernel_dims.height, conv_info, dilation); // Im2Col configuration - std::string kernel_name = "im2col_generic_"; - CLBuildOptions build_opts; - unsigned int num_elems_processed_per_iteration = 1; - bool is_padding_required_nchw = false; + std::string kernel_name = "im2col_generic_"; + CLBuildOptions build_opts; + unsigned int num_elems_processed_per_iteration = 1; + bool is_padding_required_nchw = false; + const UniformQuantizationInfo qinfo = input->quantization_info().uniform(); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->element_size())); @@ -185,7 +186,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x())); build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y())); build_opts.add_option_if(num_groups > 1, "-DNUM_GROUPS=" + support::cpp11::to_string(num_groups)); - build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->quantization_info().offset), "-DPAD_VALUE=0"); + build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); if(data_layout == DataLayout::NHWC) diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 90330163ea..b255ba346f 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -123,8 +123,9 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso std::string kernel_name = "normalize_planar_yuv_layer_"; if(is_data_type_quantized(dt)) { - build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset))); - build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale))); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset))); + build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(qinfo.scale))); kernel_name += "q8_"; } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index dda9b16083..050bbb810b 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -181,12 +181,16 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I CLBuildOptions build_opts; if(is_quantized) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(iq1_info.scale)); + build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(iq2_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(oq_info.scale)); kernel_name += "_quantized"; } else diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 7ccbda9be3..8eaf5bf76f 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -205,10 +205,13 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Check output dimensions diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 374b22eab1..22d4e3345f 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -93,10 +93,12 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(output->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(input_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index eb8822b957..a22f5cb4cb 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -116,8 +116,9 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step)); if(is_data_type_quantized_asymmetric(output->info()->data_type())) { - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(qinfo.scale)); kernel_name += "_quantized"; } // Create kernel diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index cd89d1c6db..488313fd12 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -206,8 +206,9 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT"); if(call_quantized_kernel) { - build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); } std::string interpolation_name = string_from_interpolation_policy(policy); diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index e2d988103c..a9c08703c0 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -233,15 +233,16 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor _output = output; _sum = sum; - const DataType dt = input->info()->data_type(); - const size_t reduction_dim_size = input->info()->dimension(0); + const DataType dt = input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + const size_t reduction_dim_size = input->info()->dimension(0); // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)); build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16"); build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); - build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : @@ -338,9 +339,10 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); // Note: output should always have a scale of 1/256 and offset 0 - const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); - const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); - const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); + const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); + const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), @@ -357,7 +359,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_options_if(is_quantized_asymmetric, - prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); // Create kernel std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm"; diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 5f266c5ffa..bd4ff2c735 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -116,12 +116,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 54edaafa29..a3ac102564 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -138,16 +138,22 @@ void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info(), input3->info(), input4->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq3_info = input3->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq4_info = input4->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); + build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); + build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); + build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 6c32cd2371..b577944a03 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -109,10 +109,13 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } // Create kernel diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index bc6a281353..3f71553926 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -320,15 +319,15 @@ typename std::enable_if::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const QuantizationInfo qi_in = _input->info()->quantization_info(); - const QuantizationInfo qi_out = _output->info()->quantization_info(); - const qasymm8x16_t va = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset)); - const qasymm8x16_t vb = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset)); - const qasymm8_t a = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset); - const qasymm8_t b = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset); - const qasymm8_t const_0 = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -415,9 +414,9 @@ typename std::enable_if::value, void>::type NEActivat } else if(act == ActivationFunction::LOGISTIC) { - float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset); + float tmp_f = dequantize_qasymm8(in, qi_in); tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset); + tmp = quantize_qasymm8(tmp_f, qi_out); } else { diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index ca79a0a419..164026c1ab 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -165,25 +165,26 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor const auto window_end_x = static_cast(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t voffseto = vdupq_n_f32(output_offset); + const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale); + const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale); + const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset); + const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset); + const float32x4_t voffseto = vdupq_n_f32(oq_info.offset); if(is_broadcast_across_x) { - const bool is_broadcast_input_2 = input2_win.x().step() == 0; - Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; - Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; - const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; - const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const bool is_broadcast_input_2 = input2_win.x().step() == 0; + Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; + Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; + const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; + const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); // Clear X Dimension on execution window as we handle manually non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -252,7 +253,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor for(; x < window_end_x; ++x) { const float afs = static_cast(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, broadcast_input, non_broadcast_input, output); @@ -263,9 +264,6 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -328,9 +326,9 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor // Compute left-over elements for(; x < window_end_x; ++x) { - const float afs = static_cast((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale; - const float bfs = static_cast((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + const float afs = static_cast((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; + const float bfs = static_cast((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; + *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info()); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 45e1562d8d..8874b52e19 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -87,10 +87,14 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); Iterator output(out, window); + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates &) { - const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast(input1.ptr())), in1->info()->quantization_info()); - const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast(input2.ptr())), in2->info()->quantization_info()); + const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast(input1.ptr())), iq1_info); + const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast(input2.ptr())), iq2_info); const float32x4x4_t ta3 = { @@ -102,7 +106,7 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 } }; - const uint8x16_t result = vquantize(ta3, out->info()->quantization_info()); + const uint8x16_t result = vquantize(ta3, oq_info); vst1q_u8(reinterpret_cast(output.ptr()), result); }, diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b360e9e6be..c9c70d6500 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -53,9 +53,9 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo Iterator input(in, window); Iterator output(out, window); - const DataType dt = in->info()->data_type(); - const QuantizationInfo &input_qinfo = in->info()->quantization_info(); - const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + const DataType dt = in->info()->data_type(); + const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index fdafc2da90..385be04e4a 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -51,8 +51,8 @@ public: static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation) { - const int input_offset = -input->info()->quantization_info().offset; - const int weights_offset = -weights->info()->quantization_info().offset; + const int input_offset = -input->info()->quantization_info().uniform().offset; + const int weights_offset = -weights->info()->quantization_info().uniform().offset; const int input_stride_x = input->info()->strides_in_bytes().x(); const int input_stride_y = input->info()->strides_in_bytes().y(); diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index 88f8b31a35..53789e2472 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -92,7 +92,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) auto zero = static_cast(0); if(std::is_same::value) { - zero = _input->info()->quantization_info().offset; + zero = _input->info()->quantization_info().uniform().offset; } execute_window_loop(window_out, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 1520225249..a6dc0977d2 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -97,7 +97,7 @@ inline void store_result(float16_t *ptr, const float32x4x4_t &v) template void run_dequantization(const ITensor *input, ITensor *output, const Window &window) { - const QuantizationInfo &qinfo = input->info()->quantization_info(); + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); @@ -129,7 +129,7 @@ void run_dequantization(const ITensor *input, ITensor *output, const Window &win for(; x < window_end_x; ++x) { uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(qinfo.dequantize(val)); + *(out_ptr + x) = static_cast(dequantize_qasymm8(val, qinfo)); } }, in, out); diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 33457e1fca..0fe05d2044 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,9 +142,9 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar } template -inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { - return qinfo.quantize(elementwise_arithm_op_scalar(a, b), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(elementwise_arithm_op_scalar(a, b), qinfo); } template @@ -253,7 +253,7 @@ inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputS } template -inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { ARM_COMPUTE_UNUSED(qinfo); return elementwise_comp_op_scalar(a, b); @@ -567,7 +567,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const } void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo), + uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t, float32x4_t, float32x4_t, const bool), int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, @@ -587,12 +587,11 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const auto window_end_x = static_cast(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero) - const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); + const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset + 0.5f); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale); if(is_broadcast_across_x) { @@ -603,8 +602,8 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset); const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale); @@ -628,31 +627,30 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset); - const float bfs = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, - out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo); + const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo); + *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo); } }, broadcast_input, non_broadcast_input, output); } else { + const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform(); + // Input1 quantization info - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); + const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale); // Input2 quantization info - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); + const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale); // Clear X Dimension on execution window as we handle manually input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -667,9 +665,9 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o vscale1, vscale2, voffseto, invvscaleo); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset); - const float bfs = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e699bac556..d45e3ce56a 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -27,12 +27,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/Window.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp index cba3390641..0e77ead72b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp @@ -179,8 +179,8 @@ void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiplyinfo()->quantization_info().offset; - const int weights_offset = -_input1->info()->quantization_info().offset; + const int input_offset = -_input0->info()->quantization_info().uniform().offset; + const int weights_offset = -_input1->info()->quantization_info().uniform().offset; const int input_w = _input0->info()->dimension(0); const int input_h = _input0->info()->dimension(1); diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index b8e204cfd8..8efab7da33 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 34af0cf3fd..874259bbb7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -279,7 +279,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) const int pad_top = _conv_info.pad_top(); const int stride_x = _conv_info.stride().first; const int stride_y = _conv_info.stride().second; - const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0; + const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0; Window window_in_out(window); // The first three dimensions of the input and output are increased by the inner loops diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index fa16484cd3..c313b23ad3 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -174,7 +174,7 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in) } void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info) + const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info) { const auto input1 = static_cast(input1_ptr); const auto input2 = static_cast(input2_ptr); @@ -187,7 +187,7 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); - const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset); + const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; const float32x4x4_t out_f32x4x4 = { @@ -660,7 +660,7 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo execute_window_loop(collapsed, [&](const Coordinates &) { (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale, - _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info()); + _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform()); collapsed.slide_window_slice_3D(slice_input1); collapsed.slide_window_slice_3D(slice_input2); }, diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index ac2ffa1988..62c9ca0d5e 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -562,6 +562,10 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con const int scale_step_x = (pool_stride_x == 1) ? 2 : 1; + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); + const bool have_different_qinfo = input_qinfo != output_qinfo; + execute_window_loop(window, [&](const Coordinates & id) { const auto top_data = vld1q_u8(reinterpret_cast(input_top_ptr + input.offset())); @@ -640,9 +644,7 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con } } - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - if(input_qinfo != output_qinfo) + if(have_different_qinfo) { const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo); lower_res = vget_low_u8(requantized_output); @@ -814,8 +816,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); @@ -1598,6 +1600,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates & id) { uint8_t res = 0; @@ -1671,11 +1676,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c } // Store result - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale, - output_qinfo.offset) : - res; + res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res; *(reinterpret_cast(output.ptr())) = res; }, input, output); @@ -1698,9 +1699,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom); - const float32x4_t half_scale_v = vdupq_n_f32(0.5f); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 4deeb1c7cc..0aa34cd411 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -107,6 +107,7 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); + const UniformQuantizationInfo uqinfo = qinfo.uniform(); #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -127,12 +128,12 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio int x = window_start_x; for(; x <= (window_end_x - window_step); x += window_step) { - wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo)); + wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo)); } // Compute left-over elements for(; x < window_end_x; ++x) { - output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy); + output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy); } }, input, output); diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index c6e853659c..1bfef27d49 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -542,6 +542,9 @@ struct RedOpX_qasymm8 inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + auto vec_res_value1 = vdupq_n_u32(static_cast(0.f)); auto vec_res_value2 = vdupq_n_u32(static_cast(0.f)); auto vec_res_value3 = vdupq_n_u32(static_cast(0.f)); @@ -584,8 +587,8 @@ struct RedOpX_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -673,7 +676,7 @@ struct RedOpX_qasymm8 res *= wrapper::vgetlane(carry_res, 3); //re-quantize result - res = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset); + res = quantize_qasymm8(res, iq_info); *(output.ptr()) = static_cast(res); break; } @@ -877,6 +880,8 @@ struct RedOpYZW_qasymm8 { ARM_COMPUTE_UNUSED(out_slice); + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + execute_window_loop(in_slice, [&](const Coordinates &) { uint32x4x4_t vec_res_idx{ { 0 } }; @@ -932,8 +937,8 @@ struct RedOpYZW_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -1004,8 +1009,8 @@ struct RedOpYZW_qasymm8 } else if(op == ReductionOperation::PROD) { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale)); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale)); //re-quantize vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4); diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 36398cf89a..99328deecd 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 003f472486..e99b97bbe5 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -218,7 +218,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset const int input_height = input->info()->dimension(2); T border_value; - if(use_padding && border_mode != BorderMode::REPLICATE ) + if(use_padding && border_mode != BorderMode::REPLICATE) { // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE border_value = *reinterpret_cast(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w); @@ -235,9 +235,9 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1; - const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = input->info()->quantization_info(); - const QuantizationInfo oq_info = output->info()->quantization_info(); + const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { @@ -295,11 +295,11 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset //dequantize quantized input if(is_quantized) { - float inp00 = iq_info.dequantize(a00); - float inp01 = iq_info.dequantize(a01); - float inp10 = iq_info.dequantize(a10); - float inp11 = iq_info.dequantize(a11); - res = static_cast(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP)); + float inp00 = dequantize_qasymm8(a00, iq_info); + float inp01 = dequantize_qasymm8(a01, iq_info); + float inp10 = dequantize_qasymm8(a10, iq_info); + float inp11 = dequantize_qasymm8(a11, iq_info); + res = static_cast(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info)); } else { @@ -651,9 +651,9 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1]; const size_t in_stride = in_stide_in_bytes / _input->info()->element_size(); - const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = _input->info()->quantization_info(); - const QuantizationInfo oq_info = _output->info()->quantization_info(); + const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); switch(_input->info()->data_type()) { diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e9417ece44..4144a1877b 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -595,7 +595,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons const int start_x = in.info()->valid_region().anchor.x(); const int input_width = in.info()->valid_region().shape.x(); - const float scale_beta = -beta * in.info()->quantization_info().scale; + const float scale_beta = -beta * in.info()->quantization_info().uniform().scale; Iterator in_it(&in, window); Iterator max_it(&max, window); diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index aea6875f20..28f655c529 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 09a4a11b66..383c2b8b99 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp index d0e7d760ff..0f362507cf 100644 --- a/src/runtime/CL/CLSubTensor.cpp +++ b/src/runtime/CL/CLSubTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,11 @@ const cl::Buffer &CLSubTensor::cl_buffer() const return _parent->cl_buffer(); } +CLQuantization CLSubTensor::quantization() const +{ + return _parent->quantization(); +} + ICLTensor *CLSubTensor::parent() { return _parent; diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp index dd277384c7..732689e7ec 100644 --- a/src/runtime/CL/CLTensor.cpp +++ b/src/runtime/CL/CLTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,11 @@ const cl::Buffer &CLTensor::cl_buffer() const return _allocator.cl_data(); } +CLQuantization CLTensor::quantization() const +{ + return _allocator.quantization(); +} + CLTensorAllocator *CLTensor::allocator() { return &_allocator; diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 101e4f1cd4..63aa1ba9ea 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -34,6 +34,14 @@ const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); namespace { +/** Helper function used to allocate the backing memory of a tensor + * + * @param[in] context OpenCL context to use + * @param[in] size Size of the allocation + * @param[in] alignment Alignment of the allocation + * + * @return A wrapped memory region + */ std::unique_ptr allocate_region(const cl::Context &context, size_t size, cl_uint alignment) { // Try fine-grain SVM @@ -54,11 +62,47 @@ std::unique_ptr allocate_region(const cl::Context &context, siz } return region; } +/** Clears quantization arrays + * + * @param[in, out] scale Quantization scale array + * @param[in, out] offset Quantization offset array + */ +void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset) +{ + // Clear arrays + scale = CLFloatArray(); + offset = CLInt32Array(); +} +/** Helper function used to create quantization data arrays + * + * @param[in, out] scale Quantization scale array + * @param[in, out] offset Quantization offset array + * @param[in] qinfo Quantization info + * @param[in] pad_size Pad size to use in case array needs to be padded for computation purposes + * + * @return A pair (scale, offset) containing the respective allocated and filled arrays + */ +void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size) +{ + clear_quantization_arrays(scale, offset); + + // Create scale array + const size_t num_elements = qinfo.scale.size(); + const size_t element_size = sizeof(decltype(qinfo.scale)::value_type); + scale = CLFloatArray(num_elements + pad_size); + scale.resize(num_elements); + CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale.data()); +} } // namespace CLTensorAllocator::CLTensorAllocator(CLTensor *owner) - : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner) + : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner), _scale(), _offset() +{ +} + +CLQuantization CLTensorAllocator::quantization() const { + return { &_scale, &_offset }; } uint8_t *CLTensorAllocator::data() @@ -73,6 +117,7 @@ const cl::Buffer &CLTensorAllocator::cl_data() const void CLTensorAllocator::allocate() { + // Allocate tensor backing memory if(_associated_memory_group == nullptr) { if(_memory.region() != nullptr && _memory.cl_region()->cl_data().get() != nullptr) @@ -91,6 +136,15 @@ void CLTensorAllocator::allocate() { _associated_memory_group->finalize_memory(_owner, _memory, info().total_size()); } + + // Allocate and fill the quantization parameter arrays + if(info().data_type() == DataType::QSYMM8_PER_CHANNEL) + { + const size_t pad_size = 0; + populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size); + } + + // Lock allocator info().set_is_resizable(false); } @@ -98,6 +152,7 @@ void CLTensorAllocator::free() { _mapping = nullptr; _memory.set_region(nullptr); + clear_quantization_arrays(_scale, _offset); info().set_is_resizable(true); } diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 97b0a01331..e912740d69 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -130,7 +130,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor PixelValue &&zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type())) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); } @@ -288,6 +288,10 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0); const size_t conv_size = conv_w * conv_h; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + // Im2Col configuration TensorShape shape_im2col = input->info()->tensor_shape(); shape_im2col.set(0, patch_size); @@ -319,9 +323,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w // Output staged configuration if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const UniformQuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info; - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = iq_info.scale * wq_info.scale / output_quant_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -334,8 +338,8 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w PixelValue zero_w(static_cast(0)); if(_is_quantized) { - zero_in = PixelValue(static_cast(input->info()->quantization_info().offset)); - zero_w = PixelValue(static_cast(weights->info()->quantization_info().offset)); + zero_in = PixelValue(static_cast(iq_info.offset)); + zero_w = PixelValue(static_cast(wq_info.offset)); } BorderSize border_size = _v2mm_kernel.border_size(); _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in); diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index c451bd4b4c..bfc6ff158c 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig PixelValue &&zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type())) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value); diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 7b9229c4ae..87d4c56a0e 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -41,10 +41,13 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I { if(is_data_type_quantized_asymmetric(input.data_type())) { + const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset); + const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset); + const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset); // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info), @@ -88,8 +91,8 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -230,11 +233,15 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w // Configure output stage for asymmetric quantized types if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 03d516f703..4e518fcfd5 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -115,8 +115,8 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); _mm_gemmlowp.configure(input, weights, biases, output, gemm_info); @@ -151,8 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens std::unique_ptr input_qa = input->clone(); std::unique_ptr weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Perform validation step on GEMMLowp return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info); @@ -190,6 +190,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(idx_width); const unsigned int kernel_height = weights->info()->dimension(idx_height); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + _is_prepared = weights_info.retain_internal_weights(); _original_weights = weights; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -281,9 +285,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * // Configure output stage for quantized case if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -298,8 +302,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; @@ -387,6 +391,10 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI // In case of F16, fused bias will be used in GEMM const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); @@ -468,9 +476,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_quantized) { - const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info(); + const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; @@ -486,8 +494,8 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index bcb91e052c..36a120e4ef 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -277,11 +277,15 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / _gemmlowp_final.info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _gemmlowp_final.info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier(0); int output_shift(0); quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, _gemmlowp_final.info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_final.allocator()->allocate(); } diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 049db1d461..875e3a2a00 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -77,8 +77,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor _is_prepared = false; _original_b = b; _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _a_offset = a->info()->quantization_info().offset; - _b_offset = b->info()->quantization_info().offset; + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; // Get the GPU target const GPUTarget gpu_target = CLScheduler::get().target(); @@ -213,8 +213,8 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); - int32_t a_offset = a->quantization_info().offset; - int32_t b_offset = b->quantization_info().offset; + int32_t a_offset = a->quantization_info().uniform().offset; + int32_t b_offset = b->quantization_info().uniform().offset; const ITensorInfo *matrix_a_info = a; const ITensorInfo *matrix_b_info = b; diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index cbe1ce3b47..086017a7fd 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin PixelValue pixel_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) { - pixel_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + pixel_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } switch(input->info()->data_layout()) { diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 3bb69b1ffc..4bc8439d93 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -72,7 +72,7 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor _memory_group.manage(&_accumulator); _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info())); _accumulator.info()->set_data_layout(accum_layout); - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } if(!_is_nchw) @@ -109,13 +109,15 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor // Configure biases accumulation if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, output_quant_info.offset); + _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset); _accumulator.allocator()->allocate(); } else if(_has_bias) @@ -459,13 +461,15 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh // Output staged configuration if(_is_quantized) { - const QuantizationInfo output_quant_info = output->info()->quantization_info(); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, output_quant_info.offset); + _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset); _output_reshaped.allocator()->allocate(); } @@ -483,8 +487,8 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh PixelValue zero_w(static_cast(0)); if(_is_quantized) { - zero_in = PixelValue(static_cast(input->info()->quantization_info().offset)); - zero_w = PixelValue(static_cast(weights->info()->quantization_info().offset)); + zero_in = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); + zero_w = PixelValue(static_cast(weights->info()->quantization_info().uniform().offset)); } BorderSize border_size = _v2mm_kernel.border_size(); _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index e1a17db6d4..7a74a7ea90 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -44,8 +44,8 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset); + const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, -input.quantization_info().uniform().offset); + const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, -weights.quantization_info().uniform().offset); // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info), @@ -90,8 +90,8 @@ void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *we const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -227,11 +227,15 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh // Configure output stage for asymmetric quantized types if(_is_quantized) { - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index a2c4e8a8b1..c011ddd18f 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -109,15 +109,15 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->info()->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); + const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = weights->info()->quantization_info().uniform(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); + weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input_quantization_info : output->info()->quantization_info(); + const UniformQuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info().uniform(); - float multiplier = input_quantization_info.scale * weights->info()->quantization_info().scale / output_quant_info.scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -132,10 +132,10 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w }; if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; _is_activationlayer_enabled = false; @@ -143,7 +143,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w GEMMLowpOutputStageInfo output_info; output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = output_quant_info.offset; + output_info.gemmlowp_offset = oqinfo.offset; output_info.gemmlowp_multiplier = output_multiplier; output_info.gemmlowp_shift = output_shift; output_info.gemmlowp_min_bound = min_activation; @@ -152,8 +152,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w _mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info)); // Revert back QuantizatioInfo as input and weights could be used in other convolution layers - input->info()->set_quantization_info(input_quantization_info); - weights->info()->set_quantization_info(weights_quantization_info); + input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset)); + weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, wqinfo.offset)); } else { @@ -174,17 +174,17 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->quantization_info(); + const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform(); std::unique_ptr input_qa = input->clone(); std::unique_ptr weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input_qa->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); + weights_qa->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input_quantization_info : output->quantization_info(); + const UniformQuantizationInfo oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info().uniform(); - float multiplier = input_quantization_info.scale * weights->quantization_info().scale / output_quant_info.scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -199,16 +199,16 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens }; if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; } GEMMLowpOutputStageInfo output_info; output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = output_quant_info.offset; + output_info.gemmlowp_offset = oqinfo.offset; output_info.gemmlowp_multiplier = output_multiplier; output_info.gemmlowp_shift = output_shift; output_info.gemmlowp_min_bound = min_activation; diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 54f49a6707..d8773e37ab 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -61,8 +61,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, _mtx_b_reshape_kernel = nullptr; // Set internal variables - _a_offset = a->info()->quantization_info().offset; - _b_offset = b->info()->quantization_info().offset; + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); _is_prepared = false; @@ -224,8 +224,8 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso TensorInfo tmp_b_info{}; TensorInfo mm_result_s32_info{}; - int32_t a_offset = a->quantization_info().offset; - int32_t b_offset = b->quantization_info().offset; + int32_t a_offset = a->quantization_info().uniform().offset; + int32_t b_offset = b->quantization_info().uniform().offset; const bool reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); bool fuse_output_stage = gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE; diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index cbfd68485f..d92086d08d 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay PixelValue zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); } _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value); break; diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 049bf66689..0499d9930f 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -72,9 +72,9 @@ std::unique_ptr create_convolver(const ITensor // Create quantized convolver if(data_type == DataType::QASYMM8) { - const QuantizationInfo &input_qinfo = input->info()->quantization_info(); - const QuantizationInfo &weights_qinfo = weights->info()->quantization_info(); - const QuantizationInfo &output_qinfo = output->info()->quantization_info(); + const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform(); // Check that quantization info are in the range [0, 255] ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255); -- cgit v1.2.1