diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-01-12 16:29:45 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:43:42 +0000 |
commit | f72f9367d1eddee91f15a64952b99ee6b80b821d (patch) | |
tree | 0d3296219ca7919c263b3701ab22b5468df86354 /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | |
parent | a026e981c607272181292b044c91f73a27d2bcd9 (diff) | |
download | ComputeLibrary-f72f9367d1eddee91f15a64952b99ee6b80b821d.tar.gz |
COMPMID-791: Adds support of QASYMM8 in NEDepthwiseConvolution3x3
Change-Id: I1a9ed6c3420ddf8978aeaad48d9915333b006b49
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116374
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 48 |
1 files changed, 38 insertions, 10 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 298101a09d..2d08b45210 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,28 +26,56 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() - : _kernel(), _output_stage_kernel(), _border_handler(), _has_bias(false) + : _kernel(), _output_stage_kernel(), _border_handler(), _accumulator(), _has_bias(false), _is_quantized(false) { } void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - // Configure kernels - _kernel.configure(input, weights, output, conv_info); - _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f))); - if(biases != nullptr) + PixelValue zero_value(0.f); + + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _has_bias = biases != nullptr; + + // Allocate the intermediate accumulator tensor in case of fixed point input + if(_is_quantized) + { + _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::S32)); + _accumulator.info()->set_quantization_info(input->info()->quantization_info()); + zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset)); + } + + // Configure depthwise convolution kernel + _kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info); + + // Configure border handler + _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value); + + // Configure biases accumulation + if(_has_bias || _is_quantized) { - _output_stage_kernel.configure(output, biases); - _has_bias = true; + if(_is_quantized) + { + float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + int output_multiplier, output_shift; + quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _accumulator.allocator()->allocate(); + } + else + { + _output_stage_kernel.configure(output, biases); + } } } @@ -55,7 +83,7 @@ void NEDepthwiseConvolutionLayer3x3::run() { NEScheduler::get().schedule(&_border_handler, Window::DimX); NEScheduler::get().schedule(&_kernel, Window::DimX); - if(_has_bias) + if(_has_bias || _is_quantized) { NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX); } |