aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-01-12 16:29:45 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:43:42 +0000
commitf72f9367d1eddee91f15a64952b99ee6b80b821d (patch)
tree0d3296219ca7919c263b3701ab22b5468df86354 /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
parenta026e981c607272181292b044c91f73a27d2bcd9 (diff)
downloadComputeLibrary-f72f9367d1eddee91f15a64952b99ee6b80b821d.tar.gz
COMPMID-791: Adds support of QASYMM8 in NEDepthwiseConvolution3x3
Change-Id: I1a9ed6c3420ddf8978aeaad48d9915333b006b49 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116374 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp48
1 files changed, 38 insertions, 10 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 298101a09d..2d08b45210 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,28 +26,56 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
- : _kernel(), _output_stage_kernel(), _border_handler(), _has_bias(false)
+ : _kernel(), _output_stage_kernel(), _border_handler(), _accumulator(), _has_bias(false), _is_quantized(false)
{
}
void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- // Configure kernels
- _kernel.configure(input, weights, output, conv_info);
- _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
- if(biases != nullptr)
+ PixelValue zero_value(0.f);
+
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _has_bias = biases != nullptr;
+
+ // Allocate the intermediate accumulator tensor in case of fixed point input
+ if(_is_quantized)
+ {
+ _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::S32));
+ _accumulator.info()->set_quantization_info(input->info()->quantization_info());
+ zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+ }
+
+ // Configure depthwise convolution kernel
+ _kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info);
+
+ // Configure border handler
+ _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value);
+
+ // Configure biases accumulation
+ if(_has_bias || _is_quantized)
{
- _output_stage_kernel.configure(output, biases);
- _has_bias = true;
+ if(_is_quantized)
+ {
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ int output_multiplier, output_shift;
+ quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _accumulator.allocator()->allocate();
+ }
+ else
+ {
+ _output_stage_kernel.configure(output, biases);
+ }
}
}
@@ -55,7 +83,7 @@ void NEDepthwiseConvolutionLayer3x3::run()
{
NEScheduler::get().schedule(&_border_handler, Window::DimX);
NEScheduler::get().schedule(&_kernel, Window::DimX);
- if(_has_bias)
+ if(_has_bias || _is_quantized)
{
NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
}