aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-05-21 13:32:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-06-03 14:51:29 +0000
commit4c5469b192665c94118a8a558787cb9cec2d0765 (patch)
tree168aa969de8243bdbb1f25247dd9f54d037ae32c /src/runtime/CL/functions
parent43a129e94df41f9ac8bc78b702da5a387ada0494 (diff)
downloadComputeLibrary-4c5469b192665c94118a8a558787cb9cec2d0765.tar.gz
COMPMID-2225: Add interface support for new quantized data types.
Add support for: -QSYMM8, 8-bit quantized symmetric -QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1236 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions')
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp14
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp19
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp32
-rw-r--r--src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp8
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp8
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp4
7 files changed, 56 insertions, 33 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 97b0a01331..e912740d69 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -130,7 +130,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor
PixelValue &&zero_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
_border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
@@ -288,6 +288,10 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0);
const size_t conv_size = conv_w * conv_h;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
// Im2Col configuration
TensorShape shape_im2col = input->info()->tensor_shape();
shape_im2col.set(0, patch_size);
@@ -319,9 +323,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
// Output staged configuration
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const UniformQuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = iq_info.scale * wq_info.scale / output_quant_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -334,8 +338,8 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
PixelValue zero_w(static_cast<int32_t>(0));
if(_is_quantized)
{
- zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));
- zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));
+ zero_in = PixelValue(static_cast<int32_t>(iq_info.offset));
+ zero_w = PixelValue(static_cast<int32_t>(wq_info.offset));
}
BorderSize border_size = _v2mm_kernel.border_size();
_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c451bd4b4c..bfc6ff158c 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
PixelValue &&zero_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+ zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
_input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 7b9229c4ae..87d4c56a0e 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -41,10 +41,13 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
{
if(is_data_type_quantized_asymmetric(input.data_type()))
{
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
+ const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
@@ -88,8 +91,8 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -230,11 +233,15 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
// Configure output stage for asymmetric quantized types
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_output.allocator()->allocate();
}
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 03d516f703..4e518fcfd5 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -115,8 +115,8 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
_mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
@@ -151,8 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
std::unique_ptr<ITensorInfo> input_qa = input->clone();
std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
- input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Perform validation step on GEMMLowp
return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
@@ -190,6 +190,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
const unsigned int kernel_width = weights->info()->dimension(idx_width);
const unsigned int kernel_height = weights->info()->dimension(idx_height);
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
_is_prepared = weights_info.retain_internal_weights();
_original_weights = weights;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -281,9 +285,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
// Configure output stage for quantized case
if(_is_quantized)
{
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
- const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale;
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
int output_multiplier = 0;
int output_shift = 0;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -298,8 +302,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+ const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
@@ -387,6 +391,10 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
// In case of F16, fused bias will be used in GEMM
const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16);
+ const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+
ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
@@ -468,9 +476,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(is_quantized)
{
- const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info();
+ const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
- const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale;
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
int output_multiplier = 0;
int output_shift = 0;
@@ -486,8 +494,8 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
- const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+ const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+ const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index bcb91e052c..36a120e4ef 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -277,11 +277,15 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / _gemmlowp_final.info()->quantization_info().scale;
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _gemmlowp_final.info()->quantization_info().uniform();
+
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier(0);
int output_shift(0);
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, _gemmlowp_final.info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_final.allocator()->allocate();
}
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 049db1d461..875e3a2a00 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -77,8 +77,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
_is_prepared = false;
_original_b = b;
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
// Get the GPU target
const GPUTarget gpu_target = CLScheduler::get().target();
@@ -213,8 +213,8 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
const ITensorInfo *matrix_a_info = a;
const ITensorInfo *matrix_b_info = b;
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index cbe1ce3b47..086017a7fd 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,7 +45,7 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
PixelValue pixel_value(0.f);
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
{
- pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+ pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
}
switch(input->info()->data_layout())
{