aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-05-21 13:32:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-06-03 14:51:29 +0000
commit4c5469b192665c94118a8a558787cb9cec2d0765 (patch)
tree168aa969de8243bdbb1f25247dd9f54d037ae32c /src
parent43a129e94df41f9ac8bc78b702da5a387ada0494 (diff)
downloadComputeLibrary-4c5469b192665c94118a8a558787cb9cec2d0765.tar.gz
COMPMID-2225: Add interface support for new quantized data types.
Add support for: -QSYMM8, 8-bit quantized symmetric -QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1236 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp35
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp22
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp22
-rw-r--r--src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp12
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLRangeKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp26
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp11
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp23
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp42
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDequantizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEElementwiseOperationKernel.cpp46
-rw-r--r--src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp27
-rw-r--r--src/core/NEON/kernels/NEQuantizationLayerKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp19
-rw-r--r--src/core/NEON/kernels/NEReverseKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp24
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEYOLOLayerKernel.cpp1
-rw-r--r--src/runtime/CL/CLSubTensor.cpp7
-rw-r--r--src/runtime/CL/CLTensor.cpp7
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp57
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp14
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp19
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp32
-rw-r--r--src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp8
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp8
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp22
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp16
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp44
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp8
-rw-r--r--src/runtime/NEON/functions/NEPoolingLayer.cpp4
-rw-r--r--src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp6
59 files changed, 481 insertions, 324 deletions
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index d601dfc20d..65e6561b0a 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -122,42 +122,43 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
int a_const_int = 0;
int b_const_int = 0;
+ const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(dt);
// Create quantized version of constants a, b if needed
- if(is_data_type_quantized(dt))
+ if(is_quantized_asymmetric)
{
- a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP);
- b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP);
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ a_const_int = quantize_qasymm8(a_const, iq_info);
+ b_const_int = quantize_qasymm8(b_const, iq_info);
}
- const bool is_logistic_activation_quantized = is_data_type_quantized_asymmetric(dt) && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC;
+ const bool is_logistic_activation_quantized = is_quantized_asymmetric && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC;
// Set build options
CLBuildOptions build_opts;
build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- if(is_data_type_quantized(dt))
+ if(is_quantized_asymmetric)
{
build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
- const int o1 = input->info()->quantization_info().offset;
- const float s1 = input->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+
// Quantized value of 0 corresponds to the offset o1
- build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(o1)));
- build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(s1)));
- build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(o1)));
+ build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(iq_info.offset)));
+ build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale)));
+ build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(iq_info.offset)));
// Set scale and offset of the input and output if they have different quantization info
- if(is_data_type_quantized_asymmetric(dt) && output != nullptr)
+ if(is_quantized_asymmetric && output != nullptr)
{
- const float s2 = output->info()->quantization_info().scale;
- const int o2 = output->info()->quantization_info().offset;
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
- if(o1 != o2 || s1 != s2)
+ if(iq_info != oq_info)
{
- build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(s2)));
- build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(o2)));
+ build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(oq_info.scale)));
+ build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(oq_info.offset)));
}
}
}
@@ -171,7 +172,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
// Create kernel
std::string kernel_name = std::string("activation_layer");
- if(is_data_type_quantized_asymmetric(dt))
+ if(is_quantized_asymmetric)
{
kernel_name += is_logistic_activation_quantized ? std::string("_logistic_qa8") : std::string("_qa8");
}
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 4f44851ef8..628f9f18e7 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -134,10 +134,13 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp
build_opts.emplace("-DOP_NAME=" + lower_string(operation_name));
if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
{
- build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset));
- build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+
+ build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset));
+ build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset));
+ build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+ build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
kernel_name += "_quantized";
}
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 1cae3712dc..5e1bbe944f 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -99,10 +99,13 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
}
// Create kernel
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index cd25bb1e7f..615327a7cc 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -251,30 +251,34 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
if(is_qasymm)
{
- float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier = 0;
int output_shift = 0;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y));
- build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset));
- build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset));
- build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset));
- build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset));
+ build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
+ build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
+ build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
+ build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
if(act_info.enabled())
{
- const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
- const int o1 = output->info()->quantization_info().offset;
+ const int a_val = quantize_qasymm8(act_info.a(), oq_info);
+ const int b_val = quantize_qasymm8(act_info.b(), oq_info);
+ const int o1 = oq_info.offset;
build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
- const float s1 = input->info()->quantization_info().scale;
+ const float s1 = iq_info.scale;
build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 758e99b77e..e32faa10df 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -213,30 +213,34 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
if(is_qasymm)
{
- float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier = 0;
int output_shift = 0;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1)));
- build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset));
- build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset));
- build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset));
- build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset));
+ build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
+ build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
+ build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
+ build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
if(act_info.enabled())
{
- const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
- const int o1 = output->info()->quantization_info().offset;
+ const int a_val = quantize_qasymm8(act_info.a(), oq_info);
+ const int b_val = quantize_qasymm8(act_info.b(), oq_info);
+ const int o1 = oq_info.offset;
build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
- const float s1 = input->info()->quantization_info().scale;
+ const float s1 = iq_info.scale;
build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
}
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index 28d4ff2759..0312a57664 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -72,9 +72,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
_input = input;
_output = output;
- const DataLayout data_layout = input->info()->data_layout();
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const DataLayout data_layout = input->info()->data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
// Create kernel
CLBuildOptions build_opts;
@@ -96,7 +97,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout()));
build_opts.add_option_if(has_bias, "-DHAS_BIAS");
build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()),
- "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset),
+ "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset),
"-DPAD_VALUE=0");
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options()));
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 78cc5596dd..0b066837a9 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -95,10 +95,12 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o
}
ICLKernel::configure_internal(win);
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+ build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
+ build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 12affa9880..3e158a52ff 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -452,16 +452,20 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
// Set static kernel arguments
if(is_data_type_quantized_asymmetric(data_type))
{
+ const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform();
+
int output_multiplier = 0;
int output_shift = 0;
- float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+ float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1;
- _kernel.setArg(idx++, -_input->info()->quantization_info().offset);
- _kernel.setArg(idx++, -_weights->info()->quantization_info().offset);
- _kernel.setArg(idx++, _output->info()->quantization_info().offset);
+ _kernel.setArg(idx++, -iqinfo.offset);
+ _kernel.setArg(idx++, -wqinfo.offset);
+ _kernel.setArg(idx++, oqinfo.offset);
_kernel.setArg(idx++, output_multiplier);
_kernel.setArg(idx++, output_shift);
}
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 414b040f4c..1d9c71555a 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -134,12 +134,16 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
build_opts.add_option("-DOP=" + operation_string);
if(is_data_type_quantized_asymmetric(input1.data_type()))
{
- build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1.quantization_info().offset));
- build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2.quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output.quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1.quantization_info().scale));
- build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2.quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output.quantization_info().scale));
+ const UniformQuantizationInfo iq1info = input1.quantization_info().uniform();
+ const UniformQuantizationInfo iq2info = input2.quantization_info().uniform();
+ const UniformQuantizationInfo oqinfo = output.quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1info.offset));
+ build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1info.scale));
+ build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
}
return build_opts;
}
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 11a4292270..0ff2f1343a 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -104,9 +104,12 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
// Add static arguments
if(is_quantized)
{
+ const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform();
+
unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor();
- _kernel.setArg<int>(idx++, -_input0->info()->quantization_info().offset);
- _kernel.setArg<int>(idx++, -_input1->info()->quantization_info().offset);
+ _kernel.setArg<int>(idx++, -iq0_info.offset);
+ _kernel.setArg<int>(idx++, -iq1_info.offset);
}
// Configure kernel window
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index e3f2a96281..4da3e245c0 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -133,10 +133,13 @@ void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
}
// Create kernel
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 8caa927f8b..10d6e68cd9 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -162,10 +162,11 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size
const std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(input_width, input_height, kernel_dims.width, kernel_dims.height, conv_info, dilation);
// Im2Col configuration
- std::string kernel_name = "im2col_generic_";
- CLBuildOptions build_opts;
- unsigned int num_elems_processed_per_iteration = 1;
- bool is_padding_required_nchw = false;
+ std::string kernel_name = "im2col_generic_";
+ CLBuildOptions build_opts;
+ unsigned int num_elems_processed_per_iteration = 1;
+ bool is_padding_required_nchw = false;
+ const UniformQuantizationInfo qinfo = input->quantization_info().uniform();
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->element_size()));
@@ -185,7 +186,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size
build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
build_opts.add_option_if(num_groups > 1, "-DNUM_GROUPS=" + support::cpp11::to_string(num_groups));
- build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->quantization_info().offset), "-DPAD_VALUE=0");
+ build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0");
build_opts.add_option_if(has_bias, "-DHAS_BIAS");
if(data_layout == DataLayout::NHWC)
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index 90330163ea..b255ba346f 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -123,8 +123,9 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso
std::string kernel_name = "normalize_planar_yuv_layer_";
if(is_data_type_quantized(dt))
{
- build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)));
- build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale)));
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)));
+ build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(qinfo.scale)));
kernel_name += "q8_";
}
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index dda9b16083..050bbb810b 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -181,12 +181,16 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I
CLBuildOptions build_opts;
if(is_quantized)
{
- build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset));
+ build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(iq1_info.scale));
+ build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(iq2_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(oq_info.scale));
kernel_name += "_quantized";
}
else
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 7ccbda9be3..8eaf5bf76f 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -205,10 +205,13 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
}
// Check output dimensions
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 374b22eab1..22d4e3345f 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -93,10 +93,12 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out
}
ICLKernel::configure_internal(win);
+ const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(output->info()->quantization_info().offset));
+ build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
+ build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp
index eb8822b957..a22f5cb4cb 100644
--- a/src/core/CL/kernels/CLRangeKernel.cpp
+++ b/src/core/CL/kernels/CLRangeKernel.cpp
@@ -116,8 +116,9 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float
build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step));
if(is_data_type_quantized_asymmetric(output->info()->data_type()))
{
- build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+ build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(qinfo.scale));
kernel_name += "_quantized";
}
// Create kernel
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index cd89d1c6db..488313fd12 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -206,8 +206,9 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
if(call_quantized_kernel)
{
- build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale));
+ build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
}
std::string interpolation_name = string_from_interpolation_policy(policy);
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index e2d988103c..a9c08703c0 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -233,15 +233,16 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
_output = output;
_sum = sum;
- const DataType dt = input->info()->data_type();
- const size_t reduction_dim_size = input->info()->dimension(0);
+ const DataType dt = input->info()->data_type();
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ const size_t reduction_dim_size = input->info()->dimension(0);
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16");
build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
- build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
+ build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options());
cl::NDRange lws_hint(cl::NullRange);
std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") :
@@ -338,9 +339,10 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
// Note: output should always have a scale of 1/256 and offset 0
- const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0);
- const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32);
- const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type();
+ const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0);
+ const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32);
+ const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type();
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(),
@@ -357,7 +359,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_options_if(is_quantized_asymmetric,
- prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
+ prepare_quantized_softmax_build_options(qinfo.scale, beta).options());
// Create kernel
std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm";
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index 5f266c5ffa..bd4ff2c735 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -116,12 +116,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const
const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info());
if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo)
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+ build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+ build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
}
// Create kernel
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index 54edaafa29..a3ac102564 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -138,16 +138,22 @@ void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const
const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info(), input3->info(), input4->info());
if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo)
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().scale));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq3_info = input3->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq4_info = input4->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+ build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+ build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+ build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset));
+ build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale));
+ build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset));
+ build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
}
// Create kernel
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 6c32cd2371..b577944a03 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -109,10 +109,13 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+ const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oqinfo = output->info()->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
}
// Create kernel
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index bc6a281353..3f71553926 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -30,7 +30,6 @@
#include "arm_compute/core/NEON/NEFixedPoint.h"
#include "arm_compute/core/NEON/NEMath.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/QAsymm8.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
@@ -320,15 +319,15 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const QuantizationInfo qi_in = _input->info()->quantization_info();
- const QuantizationInfo qi_out = _output->info()->quantization_info();
- const qasymm8x16_t va = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset));
- const qasymm8x16_t vb = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset));
- const qasymm8_t a = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset);
- const qasymm8_t b = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset);
- const qasymm8_t const_0 = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset);
- const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
- const auto vconst_1 = vdupq_n_f32(1.f);
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -415,9 +414,9 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
}
else if(act == ActivationFunction::LOGISTIC)
{
- float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset);
+ float tmp_f = dequantize_qasymm8(in, qi_in);
tmp_f = 1.f / (1.f + std::exp(-tmp_f));
- tmp = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
}
else
{
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index ca79a0a419..164026c1ab 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -165,25 +165,26 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
const auto window_end_x = static_cast<int>(window.x().end());
const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
- const float output_scale = out->info()->quantization_info().scale;
- const int output_offset = out->info()->quantization_info().offset;
+ const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform();
- const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
- const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
- const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale);
- const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
- const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
- const float32x4_t voffseto = vdupq_n_f32(output_offset);
+ const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale);
+ const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale);
+ const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale);
+ const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset);
+ const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset);
+ const float32x4_t voffseto = vdupq_n_f32(oq_info.offset);
if(is_broadcast_across_x)
{
- const bool is_broadcast_input_2 = input2_win.x().step() == 0;
- Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
- Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
- const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
- const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
- const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
- const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+ const bool is_broadcast_input_2 = input2_win.x().step() == 0;
+ Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
+ Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
+ const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
+ const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
+ const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform();
+ const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform();
// Clear X Dimension on execution window as we handle manually
non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -252,7 +253,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
for(; x < window_end_x; ++x)
{
const float afs = static_cast<int32_t>(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale;
- *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP);
+ *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info);
}
},
broadcast_input, non_broadcast_input, output);
@@ -263,9 +264,6 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
- const QuantizationInfo input1_qinfo = in1->info()->quantization_info();
- const QuantizationInfo input2_qinfo = in2->info()->quantization_info();
-
Iterator input1(in1, input1_win);
Iterator input2(in2, input2_win);
Iterator output(out, win);
@@ -328,9 +326,9 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- const float afs = static_cast<int32_t>((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale;
- const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale;
- *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP);
+ const float afs = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale;
+ const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale;
+ *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info());
}
},
input1, input2, output);
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 45e1562d8d..8874b52e19 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -87,10 +87,14 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2
Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
Iterator output(out, window);
+ const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform();
+
execute_window_loop(window, [&](const Coordinates &)
{
- const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info());
- const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info());
+ const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), iq1_info);
+ const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), iq2_info);
const float32x4x4_t ta3 =
{
@@ -102,7 +106,7 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2
}
};
- const uint8x16_t result = vquantize(ta3, out->info()->quantization_info());
+ const uint8x16_t result = vquantize(ta3, oq_info);
vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result);
},
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index b360e9e6be..c9c70d6500 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -53,9 +53,9 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo
Iterator input(in, window);
Iterator output(out, window);
- const DataType dt = in->info()->data_type();
- const QuantizationInfo &input_qinfo = in->info()->quantization_info();
- const QuantizationInfo &output_qinfo = out->info()->quantization_info();
+ const DataType dt = in->info()->data_type();
+ const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform();
+ const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index fdafc2da90..385be04e4a 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -51,8 +51,8 @@ public:
static void convolve(const Window &window, unsigned int num_elems_written_per_iteration,
const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation)
{
- const int input_offset = -input->info()->quantization_info().offset;
- const int weights_offset = -weights->info()->quantization_info().offset;
+ const int input_offset = -input->info()->quantization_info().uniform().offset;
+ const int weights_offset = -weights->info()->quantization_info().uniform().offset;
const int input_stride_x = input->info()->strides_in_bytes().x();
const int input_stride_y = input->info()->strides_in_bytes().y();
diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
index 88f8b31a35..53789e2472 100644
--- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
@@ -92,7 +92,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window)
auto zero = static_cast<T>(0);
if(std::is_same<T, uint8_t>::value)
{
- zero = _input->info()->quantization_info().offset;
+ zero = _input->info()->quantization_info().uniform().offset;
}
execute_window_loop(window_out, [&](const Coordinates & id)
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 1520225249..a6dc0977d2 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -97,7 +97,7 @@ inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v)
template <typename T>
void run_dequantization(const ITensor *input, ITensor *output, const Window &window)
{
- const QuantizationInfo &qinfo = input->info()->quantization_info();
+ const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
@@ -129,7 +129,7 @@ void run_dequantization(const ITensor *input, ITensor *output, const Window &win
for(; x < window_end_x; ++x)
{
uint8_t val = *(in_ptr + x);
- *(out_ptr + x) = static_cast<T>(qinfo.dequantize(val));
+ *(out_ptr + x) = static_cast<T>(dequantize_qasymm8(val, qinfo));
}
},
in, out);
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index 33457e1fca..0fe05d2044 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -142,9 +142,9 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar
}
template <ArithmeticOperation op>
-inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo)
+inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo)
{
- return qinfo.quantize(elementwise_arithm_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+ return quantize_qasymm8(elementwise_arithm_op_scalar<op>(a, b), qinfo);
}
template <ArithmeticOperation op, typename VectorType>
@@ -253,7 +253,7 @@ inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputS
}
template <ComparisonOperation op>
-inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo)
+inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo)
{
ARM_COMPUTE_UNUSED(qinfo);
return elementwise_comp_op_scalar<op>(a, b);
@@ -567,7 +567,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const
}
void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
- uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo),
+ uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo),
int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t,
float32x4_t, float32x4_t, const bool),
int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *,
@@ -587,12 +587,11 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o
const auto window_end_x = static_cast<int>(window.x().end());
const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
- const float output_scale = out->info()->quantization_info().scale;
- const int output_offset = out->info()->quantization_info().offset;
+ const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
// Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero)
- const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f);
- const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale);
+ const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset + 0.5f);
+ const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale);
if(is_broadcast_across_x)
{
@@ -603,8 +602,8 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o
const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
- const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
- const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+ const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform();
+ const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform();
const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -628,31 +627,30 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o
voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2);
for(; x < window_end_x; ++x)
{
- const float afs = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset);
- const float bfs = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
- *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
- out->info()->quantization_info());
+ const float afs = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+ const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
+ *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo);
}
},
broadcast_input, non_broadcast_input, output);
}
else
{
+ const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform();
+
// Input1 quantization info
- const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
- const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+ const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset);
+ const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale);
// Input2 quantization info
- const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
- const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+ const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset);
+ const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale);
// Clear X Dimension on execution window as we handle manually
input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
- const QuantizationInfo input1_qinfo = in1->info()->quantization_info();
- const QuantizationInfo input2_qinfo = in2->info()->quantization_info();
-
Iterator input1(in1, input1_win);
Iterator input2(in2, input2_win);
Iterator output(out, win);
@@ -667,9 +665,9 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o
vscale1, vscale2, voffseto, invvscaleo);
for(; x < window_end_x; ++x)
{
- const float afs = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
- const float bfs = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
- *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
+ const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+ const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+ *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo);
}
},
input1, input2, output);
diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
index e699bac556..d45e3ce56a 100644
--- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
@@ -27,12 +27,9 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/Window.h"
#include "support/ToolchainSupport.h"
diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
index cba3390641..0e77ead72b 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
@@ -179,8 +179,8 @@ void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiply<uint8_t, uint8_t,
Iterator in2(_input1, window_w);
Iterator out(_output, window_out);
- const int input_offset = -_input0->info()->quantization_info().offset;
- const int weights_offset = -_input1->info()->quantization_info().offset;
+ const int input_offset = -_input0->info()->quantization_info().uniform().offset;
+ const int weights_offset = -_input1->info()->quantization_info().uniform().offset;
const int input_w = _input0->info()->dimension(0);
const int input_h = _input0->info()->dimension(1);
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index b8e204cfd8..8efab7da33 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -112,11 +112,11 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo
uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY];
// Create iterators
- Iterator input(_input, window);
- Iterator output(_output, window);
- const DataType dt = _input->info()->data_type();
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+ Iterator input(_input, window);
+ Iterator output(_output, window);
+ const DataType dt = _input->info()->data_type();
+ const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 34af0cf3fd..874259bbb7 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -279,7 +279,7 @@ void NEIm2ColKernel::run_im2col(const Window &window)
const int pad_top = _conv_info.pad_top();
const int stride_x = _conv_info.stride().first;
const int stride_y = _conv_info.stride().second;
- const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0;
+ const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0;
Window window_in_out(window);
// The first three dimensions of the input and output are increased by the inner loops
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index fa16484cd3..c313b23ad3 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -174,7 +174,7 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in)
}
void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
- const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info)
+ const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info)
{
const auto input1 = static_cast<const qasymm8_t *__restrict>(input1_ptr);
const auto input2 = static_cast<const qasymm8_t *__restrict>(input2_ptr);
@@ -187,7 +187,7 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c
const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info);
const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info);
- const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset);
+ const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset };
const float32x4x4_t out_f32x4x4 =
{
@@ -660,7 +660,7 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo
execute_window_loop(collapsed, [&](const Coordinates &)
{
(*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale,
- _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info());
+ _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
collapsed.slide_window_slice_3D(slice_input1);
collapsed.slide_window_slice_3D(slice_input2);
},
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index ac2ffa1988..62c9ca0d5e 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -562,6 +562,10 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con
const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
+ const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform();
+ const bool have_different_qinfo = input_qinfo != output_qinfo;
+
execute_window_loop(window, [&](const Coordinates & id)
{
const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
@@ -640,9 +644,7 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con
}
}
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
- if(input_qinfo != output_qinfo)
+ if(have_different_qinfo)
{
const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo);
lower_res = vget_low_u8(requantized_output);
@@ -814,8 +816,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+ const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
@@ -1598,6 +1600,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c
const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
+ const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
+
execute_window_loop(window, [&](const Coordinates & id)
{
uint8_t res = 0;
@@ -1671,11 +1676,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c
}
// Store result
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
- res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale,
- output_qinfo.offset) :
- res;
+ res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res;
*(reinterpret_cast<uint8_t *>(output.ptr())) = res;
},
input, output);
@@ -1698,9 +1699,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c
const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
- const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+ const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
+ const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform();
execute_window_loop(window, [&](const Coordinates & id)
{
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index 4deeb1c7cc..0aa34cd411 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -107,6 +107,7 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
+ const UniformQuantizationInfo uqinfo = qinfo.uniform();
#ifdef __aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
#else //__aarch64__
@@ -127,12 +128,12 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio
int x = window_start_x;
for(; x <= (window_end_x - window_step); x += window_step)
{
- wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo));
+ wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy);
+ output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy);
}
},
input, output);
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index c6e853659c..1bfef27d49 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -542,6 +542,9 @@ struct RedOpX_qasymm8
inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op)
{
ARM_COMPUTE_UNUSED(out_slice);
+
+ const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform();
+
auto vec_res_value1 = vdupq_n_u32(static_cast<uint32_t>(0.f));
auto vec_res_value2 = vdupq_n_u32(static_cast<uint32_t>(0.f));
auto vec_res_value3 = vdupq_n_u32(static_cast<uint32_t>(0.f));
@@ -584,8 +587,8 @@ struct RedOpX_qasymm8
}
case ReductionOperation::PROD:
{
- const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
- const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale);
+ const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+ const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale);
const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements));
const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements));
@@ -673,7 +676,7 @@ struct RedOpX_qasymm8
res *= wrapper::vgetlane(carry_res, 3);
//re-quantize result
- res = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset);
+ res = quantize_qasymm8(res, iq_info);
*(output.ptr()) = static_cast<uint8_t>(res);
break;
}
@@ -877,6 +880,8 @@ struct RedOpYZW_qasymm8
{
ARM_COMPUTE_UNUSED(out_slice);
+ const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform();
+
execute_window_loop(in_slice, [&](const Coordinates &)
{
uint32x4x4_t vec_res_idx{ { 0 } };
@@ -932,8 +937,8 @@ struct RedOpYZW_qasymm8
}
case ReductionOperation::PROD:
{
- const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
- const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale);
+ const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+ const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale);
const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements));
const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements));
@@ -1004,8 +1009,8 @@ struct RedOpYZW_qasymm8
}
else if(op == ReductionOperation::PROD)
{
- const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
- const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale));
+ const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+ const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale));
//re-quantize
vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4);
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 36398cf89a..99328deecd 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -31,7 +31,6 @@
#include "arm_compute/core/NEON/NEFixedPoint.h"
#include "arm_compute/core/NEON/NEMath.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/QAsymm8.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 003f472486..e99b97bbe5 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -218,7 +218,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
const int input_height = input->info()->dimension(2);
T border_value;
- if(use_padding && border_mode != BorderMode::REPLICATE )
+ if(use_padding && border_mode != BorderMode::REPLICATE)
{
// configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE
border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
@@ -235,9 +235,9 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
- const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8);
- const QuantizationInfo iq_info = input->info()->quantization_info();
- const QuantizationInfo oq_info = output->info()->quantization_info();
+ const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8);
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
execute_window_loop(window, [&](const Coordinates & id)
{
@@ -295,11 +295,11 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
//dequantize quantized input
if(is_quantized)
{
- float inp00 = iq_info.dequantize(a00);
- float inp01 = iq_info.dequantize(a01);
- float inp10 = iq_info.dequantize(a10);
- float inp11 = iq_info.dequantize(a11);
- res = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP));
+ float inp00 = dequantize_qasymm8(a00, iq_info);
+ float inp01 = dequantize_qasymm8(a01, iq_info);
+ float inp10 = dequantize_qasymm8(a10, iq_info);
+ float inp11 = dequantize_qasymm8(a11, iq_info);
+ res = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
}
else
{
@@ -651,9 +651,9 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
const size_t in_stride = in_stide_in_bytes / _input->info()->element_size();
- const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
- const QuantizationInfo iq_info = _input->info()->quantization_info();
- const QuantizationInfo oq_info = _output->info()->quantization_info();
+ const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
+ const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
switch(_input->info()->data_type())
{
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index e9417ece44..4144a1877b 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -595,7 +595,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
const int start_x = in.info()->valid_region().anchor.x();
const int input_width = in.info()->valid_region().shape.x();
- const float scale_beta = -beta * in.info()->quantization_info().scale;
+ const float scale_beta = -beta * in.info()->quantization_info().uniform().scale;
Iterator in_it(&in, window);
Iterator max_it(&max, window);
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index aea6875f20..28f655c529 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -112,11 +112,11 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &
uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0];
// Create iterators
- Iterator input(_input, window);
- Iterator output(_output, window);
- const DataType dt = _input->info()->data_type();
- const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
- const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+ Iterator input(_input, window);
+ Iterator output(_output, window);
+ const DataType dt = _input->info()->data_type();
+ const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
index 09a4a11b66..383c2b8b99 100644
--- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
@@ -30,7 +30,6 @@
#include "arm_compute/core/NEON/NEFixedPoint.h"
#include "arm_compute/core/NEON/NEMath.h"
#include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
-#include "arm_compute/core/QAsymm8.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp
index d0e7d760ff..0f362507cf 100644
--- a/src/runtime/CL/CLSubTensor.cpp
+++ b/src/runtime/CL/CLSubTensor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,6 +58,11 @@ const cl::Buffer &CLSubTensor::cl_buffer() const
return _parent->cl_buffer();
}
+CLQuantization CLSubTensor::quantization() const
+{
+ return _parent->quantization();
+}
+
ICLTensor *CLSubTensor::parent()
{
return _parent;
diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp
index dd277384c7..732689e7ec 100644
--- a/src/runtime/CL/CLTensor.cpp
+++ b/src/runtime/CL/CLTensor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,6 +47,11 @@ const cl::Buffer &CLTensor::cl_buffer() const
return _allocator.cl_data();
}
+CLQuantization CLTensor::quantization() const
+{
+ return _allocator.quantization();
+}
+
CLTensorAllocator *CLTensor::allocator()
{
return &_allocator;
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 101e4f1cd4..63aa1ba9ea 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -34,6 +34,14 @@ const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
namespace
{
+/** Helper function used to allocate the backing memory of a tensor
+ *
+ * @param[in] context OpenCL context to use
+ * @param[in] size Size of the allocation
+ * @param[in] alignment Alignment of the allocation
+ *
+ * @return A wrapped memory region
+ */
std::unique_ptr<ICLMemoryRegion> allocate_region(const cl::Context &context, size_t size, cl_uint alignment)
{
// Try fine-grain SVM
@@ -54,11 +62,47 @@ std::unique_ptr<ICLMemoryRegion> allocate_region(const cl::Context &context, siz
}
return region;
}
+/** Clears quantization arrays
+ *
+ * @param[in, out] scale Quantization scale array
+ * @param[in, out] offset Quantization offset array
+ */
+void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset)
+{
+ // Clear arrays
+ scale = CLFloatArray();
+ offset = CLInt32Array();
+}
+/** Helper function used to create quantization data arrays
+ *
+ * @param[in, out] scale Quantization scale array
+ * @param[in, out] offset Quantization offset array
+ * @param[in] qinfo Quantization info
+ * @param[in] pad_size Pad size to use in case array needs to be padded for computation purposes
+ *
+ * @return A pair (scale, offset) containing the respective allocated and filled arrays
+ */
+void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size)
+{
+ clear_quantization_arrays(scale, offset);
+
+ // Create scale array
+ const size_t num_elements = qinfo.scale.size();
+ const size_t element_size = sizeof(decltype(qinfo.scale)::value_type);
+ scale = CLFloatArray(num_elements + pad_size);
+ scale.resize(num_elements);
+ CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale.data());
+}
} // namespace
CLTensorAllocator::CLTensorAllocator(CLTensor *owner)
- : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner)
+ : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner), _scale(), _offset()
+{
+}
+
+CLQuantization CLTensorAllocator::quantization() const
{
+ return { &_scale, &_offset };
}
uint8_t *CLTensorAllocator::data()
@@ -73,6 +117,7 @@ const cl::Buffer &CLTensorAllocator::cl_data() const
void CLTensorAllocator::allocate()
{
+ // Allocate tensor backing memory
if(_associated_memory_group == nullptr)
{
if(_memory.region() != nullptr && _memory.cl_region()->cl_data().get() != nullptr)
@@ -91,6 +136,15 @@ void CLTensorAllocator::allocate()
{
_associated_memory_group->finalize_memory(_owner, _memory, info().total_size());
}
+
+ // Allocate and fill the quantization parameter arrays
+ if(info().data_type() == DataType::QSYMM8_PER_CHANNEL)
+ {
+ const size_t pad_size = 0;
+ populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size);
+ }
+
+ // Lock allocator
info().set_is_resizable(false);
}
@@ -98,6 +152,7 @@ void CLTensorAllocator::free()
{
_mapping = nullptr;
_memory.set_region(nullptr);
+ clear_quantization_arrays(_scale, _offset);
info().set_is_resizable(true);
}
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 97b0a01331..e912740d69 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -130,7 +130,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor
PixelValue &&zero_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
_border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
@@ -288,6 +288,10 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0);
const size_t conv_size = conv_w * conv_h;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
// Im2Col configuration
TensorShape shape_im2col = input->info()->tensor_shape();
shape_im2col.set(0, patch_size);
@@ -319,9 +323,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
// Output staged configuration
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const UniformQuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = iq_info.scale * wq_info.scale / output_quant_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -334,8 +338,8 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
PixelValue zero_w(static_cast<int32_t>(0));
if(_is_quantized)
{
- zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));
- zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));
+ zero_in = PixelValue(static_cast<int32_t>(iq_info.offset));
+ zero_w = PixelValue(static_cast<int32_t>(wq_info.offset));
}
BorderSize border_size = _v2mm_kernel.border_size();
_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c451bd4b4c..bfc6ff158c 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
PixelValue &&zero_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
_input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 7b9229c4ae..87d4c56a0e 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -41,10 +41,13 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
{
if(is_data_type_quantized_asymmetric(input.data_type()))
{
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
+ const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
@@ -88,8 +91,8 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -230,11 +233,15 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
// Configure output stage for asymmetric quantized types
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_output.allocator()->allocate();
}
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 03d516f703..4e518fcfd5 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -115,8 +115,8 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
_mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
@@ -151,8 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
std::unique_ptr<ITensorInfo> input_qa = input->clone();
std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
- input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Perform validation step on GEMMLowp
return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
@@ -190,6 +190,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
const unsigned int kernel_width = weights->info()->dimension(idx_width);
const unsigned int kernel_height = weights->info()->dimension(idx_height);
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
_is_prepared = weights_info.retain_internal_weights();
_original_weights = weights;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -281,9 +285,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
// Configure output stage for quantized case
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
- const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale;
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
int output_multiplier = 0;
int output_shift = 0;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -298,8 +302,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+ const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
@@ -387,6 +391,10 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
// In case of F16, fused bias will be used in GEMM
const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16);
+ const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+
ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
@@ -468,9 +476,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(is_quantized)
{
- const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info();
+ const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
- const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale;
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
int output_multiplier = 0;
int output_shift = 0;
@@ -486,8 +494,8 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+ const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index bcb91e052c..36a120e4ef 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -277,11 +277,15 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / _gemmlowp_final.info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _gemmlowp_final.info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier(0);
int output_shift(0);
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, _gemmlowp_final.info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_final.allocator()->allocate();
}
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 049db1d461..875e3a2a00 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -77,8 +77,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
_is_prepared = false;
_original_b = b;
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
// Get the GPU target
const GPUTarget gpu_target = CLScheduler::get().target();
@@ -213,8 +213,8 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
const ITensorInfo *matrix_a_info = a;
const ITensorInfo *matrix_b_info = b;
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index cbe1ce3b47..086017a7fd 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,7 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
PixelValue pixel_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
{
- pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+ pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
}
switch(input->info()->data_layout())
{
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 3bb69b1ffc..4bc8439d93 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -72,7 +72,7 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor
_memory_group.manage(&_accumulator);
_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
_accumulator.info()->set_data_layout(accum_layout);
- zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
}
if(!_is_nchw)
@@ -109,13 +109,15 @@ void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor
// Configure biases accumulation
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, output_quant_info.offset);
+ _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
_accumulator.allocator()->allocate();
}
else if(_has_bias)
@@ -459,13 +461,15 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
// Output staged configuration
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = output->info()->quantization_info();
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, output_quant_info.offset);
+ _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset);
_output_reshaped.allocator()->allocate();
}
@@ -483,8 +487,8 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
PixelValue zero_w(static_cast<int32_t>(0));
if(_is_quantized)
{
- zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));
- zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));
+ zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().uniform().offset));
+ zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().uniform().offset));
}
BorderSize border_size = _v2mm_kernel.border_size();
_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index e1a17db6d4..7a74a7ea90 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -44,8 +44,8 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
{
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, -input.quantization_info().uniform().offset);
+ const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, -weights.quantization_info().uniform().offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
@@ -90,8 +90,8 @@ void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *we
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -227,11 +227,15 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
// Configure output stage for asymmetric quantized types
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_output.allocator()->allocate();
}
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index a2c4e8a8b1..c011ddd18f 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -109,15 +109,15 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
{
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info = input->info()->quantization_info();
- const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
+ const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wqinfo = weights->info()->quantization_info().uniform();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset));
+ weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset));
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input_quantization_info : output->info()->quantization_info();
+ const UniformQuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info().uniform();
- float multiplier = input_quantization_info.scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -132,10 +132,10 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
};
if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo);
+ const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo);
- min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
+ min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
_is_activationlayer_enabled = false;
@@ -143,7 +143,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
GEMMLowpOutputStageInfo output_info;
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
- output_info.gemmlowp_offset = output_quant_info.offset;
+ output_info.gemmlowp_offset = oqinfo.offset;
output_info.gemmlowp_multiplier = output_multiplier;
output_info.gemmlowp_shift = output_shift;
output_info.gemmlowp_min_bound = min_activation;
@@ -152,8 +152,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
_mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info));
// Revert back QuantizatioInfo as input and weights could be used in other convolution layers
- input->info()->set_quantization_info(input_quantization_info);
- weights->info()->set_quantization_info(weights_quantization_info);
+ input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset));
+ weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, wqinfo.offset));
}
else
{
@@ -174,17 +174,17 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
{
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info = input->quantization_info();
- const QuantizationInfo weights_quantization_info = weights->quantization_info();
+ const UniformQuantizationInfo iqinfo = input->quantization_info().uniform();
+ const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
std::unique_ptr<ITensorInfo> input_qa = input->clone();
std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
- input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input_qa->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset));
+ weights_qa->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset));
- const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input_quantization_info : output->quantization_info();
+ const UniformQuantizationInfo oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info().uniform();
- float multiplier = input_quantization_info.scale * weights->quantization_info().scale / output_quant_info.scale;
+ float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -199,16 +199,16 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
};
if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo);
+ const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo);
- min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
+ min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
}
GEMMLowpOutputStageInfo output_info;
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
- output_info.gemmlowp_offset = output_quant_info.offset;
+ output_info.gemmlowp_offset = oqinfo.offset;
output_info.gemmlowp_multiplier = output_multiplier;
output_info.gemmlowp_shift = output_shift;
output_info.gemmlowp_min_bound = min_activation;
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 54f49a6707..d8773e37ab 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -61,8 +61,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
_mtx_b_reshape_kernel = nullptr;
// Set internal variables
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
_is_prepared = false;
@@ -224,8 +224,8 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
TensorInfo tmp_b_info{};
TensorInfo mm_result_s32_info{};
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
const bool reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
bool fuse_output_stage = gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index cbfd68485f..d92086d08d 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
PixelValue zero_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
{
- zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
}
_border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value);
break;
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
index 049bf66689..0499d9930f 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
@@ -72,9 +72,9 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
// Create quantized convolver
if(data_type == DataType::QASYMM8)
{
- const QuantizationInfo &input_qinfo = input->info()->quantization_info();
- const QuantizationInfo &weights_qinfo = weights->info()->quantization_info();
- const QuantizationInfo &output_qinfo = output->info()->quantization_info();
+ const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform();
// Check that quantization info are in the range [0, 255]
ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255);