From f29d1b7d8bf2d1619554eb3443556b44d4aa1a4c Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 29 Oct 2019 10:58:13 +0000 Subject: COMPMID-2608: Enable quantization with multiplier greater than 1 on NEON Change-Id: Ib2b0c9ac88fc2b645f478c9981f71ee28f2c77fd Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2425 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 12 +---------- .../NEON/functions/NEDirectConvolutionLayer.cpp | 5 +++-- .../NEON/functions/NEFullyConnectedLayer.cpp | 23 ++++++++++++---------- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 4 ++-- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 13 ++++++++++++ .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 10 ++++++++++ 6 files changed, 42 insertions(+), 25 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index ca4fe732a7..ddcc71f466 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -60,16 +60,6 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type()); - if(is_quantized) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; - ARM_COMPUTE_UNUSED(multiplier); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f); - } if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation)) { TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); @@ -205,7 +195,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int32_t output_multiplier; int32_t output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset); _accumulator.allocator()->allocate(); } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 322bb2c425..65538848df 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -31,8 +31,8 @@ #include #include -using namespace arm_compute; - +namespace arm_compute +{ NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_activationlayer_enabled(false), _dim_split(Window::DimZ) @@ -118,3 +118,4 @@ void NEDirectConvolutionLayer::run() _activationlayer_function.run(); } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index b3b90f8599..01746eb3db 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -33,7 +33,8 @@ #include #include -using namespace arm_compute; +namespace arm_compute +{ using namespace arm_compute::misc::shape_calculator; namespace @@ -258,7 +259,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; int32_t output_multiplier; int32_t output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); } @@ -352,13 +353,14 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn // Validate output stage for asymmetric quantized types if(is_quantized) { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - const float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; - - ARM_COMPUTE_UNUSED(multiplier); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; + int output_multiplier; + int output_shift; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(&gemmlowp_output, biases, output)); } @@ -475,4 +477,5 @@ void NEFullyConnectedLayer::prepare() _is_prepared = true; } -} \ No newline at end of file +} +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 0507c6b2bd..920917a58b 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -154,7 +154,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w output_info.gemmlowp_min_bound = min_activation; output_info.gemmlowp_max_bound = max_activation; output_info.is_quantized_per_channel = (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL); - quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info); + quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info); _mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info)); @@ -217,7 +217,7 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens output_info.gemmlowp_min_bound = min_activation; output_info.gemmlowp_max_bound = max_activation; output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL); - ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info)); + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info)); // Perform validation step on GEMMLowp std::unique_ptr input_qa = input->clone(); diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index e36cb3d399..440f043527 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -344,6 +344,19 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso { run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); run_optimised_requantized = run_optimised; + + const UniformQuantizationInfo a_qinfo = a_to_use->quantization_info().uniform(); + const QuantizationInfo b_qinfo = b->quantization_info(); + const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform(); + for(auto const s : b_qinfo.scale()) + { + const float fmultipler = a_qinfo.scale * s / output_qinfo.scale; + if(fmultipler > 1.f) + { + run_optimised_requantized = false; + break; + } + } } else { diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 3235eee19a..142f873ef4 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -437,6 +437,16 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } + // The uniform quantization case will only have 1 scale value in the weights quantization info + const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform(); + const QuantizationInfo weights_qinfo = weights->quantization_info(); + const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform(); + for(auto const s : weights_qinfo.scale()) + { + const float fmultipler = input_qinfo.scale * s / output_qinfo.scale; + ARM_COMPUTE_RETURN_ERROR_ON(fmultipler > 1.f); + } + return Status{}; } -- cgit v1.2.1