From f72f9367d1eddee91f15a64952b99ee6b80b821d Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 12 Jan 2018 16:29:45 +0000 Subject: COMPMID-791: Adds support of QASYMM8 in NEDepthwiseConvolution3x3 Change-Id: I1a9ed6c3420ddf8978aeaad48d9915333b006b49 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116374 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 48 +++++++++++++++++----- 1 file changed, 38 insertions(+), 10 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 298101a09d..2d08b45210 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,28 +26,56 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() - : _kernel(), _output_stage_kernel(), _border_handler(), _has_bias(false) + : _kernel(), _output_stage_kernel(), _border_handler(), _accumulator(), _has_bias(false), _is_quantized(false) { } void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - // Configure kernels - _kernel.configure(input, weights, output, conv_info); - _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); - if(biases != nullptr) + PixelValue zero_value(0.f); + + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _has_bias = biases != nullptr; + + // Allocate the intermediate accumulator tensor in case of fixed point input + if(_is_quantized) + { + _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::S32)); + _accumulator.info()->set_quantization_info(input->info()->quantization_info()); + zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + } + + // Configure depthwise convolution kernel + _kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info); + + // Configure border handler + _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value); + + // Configure biases accumulation + if(_has_bias || _is_quantized) { - _output_stage_kernel.configure(output, biases); - _has_bias = true; + if(_is_quantized) + { + float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + int output_multiplier, output_shift; + quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _accumulator.allocator()->allocate(); + } + else + { + _output_stage_kernel.configure(output, biases); + } } } @@ -55,7 +83,7 @@ void NEDepthwiseConvolutionLayer3x3::run() { NEScheduler::get().schedule(&_border_handler, Window::DimX); NEScheduler::get().schedule(&_kernel, Window::DimX); - if(_has_bias) + if(_has_bias || _is_quantized) { NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX); } -- cgit v1.2.1