COMPMID-791: Adds support of QASYMM8 in NEDepthwiseConvolution3x3

Change-Id: I1a9ed6c3420ddf8978aeaad48d9915333b006b49 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116374 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-01-12 16:29:45 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:43:42 +0000
commit: f72f9367d1eddee91f15a64952b99ee6b80b821d (patch)
tree: 0d3296219ca7919c263b3701ab22b5468df86354 /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
parent: a026e981c607272181292b044c91f73a27d2bcd9 (diff)
download: ComputeLibrary-f72f9367d1eddee91f15a64952b99ee6b80b821d.tar.gz
1 files changed, 38 insertions, 10 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 298101a09d..2d08b45210 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,28 +26,56 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "support/ToolchainSupport.h"
 
 using namespace arm_compute;
 
 NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
-    : _kernel(), _output_stage_kernel(), _border_handler(), _has_bias(false)
+    : _kernel(), _output_stage_kernel(), _border_handler(), _accumulator(), _has_bias(false), _is_quantized(false)
 {
 }
 
 void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
 
-    // Configure kernels
-    _kernel.configure(input, weights, output, conv_info);
-    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
-    if(biases != nullptr)
+    PixelValue zero_value(0.f);
+
+    _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+    _has_bias     = biases != nullptr;
+
+    // Allocate the intermediate accumulator tensor in case of fixed point input
+    if(_is_quantized)
+    {
+        _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::S32));
+        _accumulator.info()->set_quantization_info(input->info()->quantization_info());
+        zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+    }
+
+    // Configure depthwise convolution kernel
+    _kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info);
+
+    // Configure border handler
+    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value);
+
+    // Configure biases accumulation
+    if(_has_bias || _is_quantized)
     {
-        _output_stage_kernel.configure(output, biases);
-        _has_bias = true;
+        if(_is_quantized)
+        {
+            float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+            int   output_multiplier, output_shift;
+            quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+            _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+            _accumulator.allocator()->allocate();
+        }
+        else
+        {
+            _output_stage_kernel.configure(output, biases);
+        }
     }
 }
 
@@ -55,7 +83,7 @@ void NEDepthwiseConvolutionLayer3x3::run()
 {
     NEScheduler::get().schedule(&_border_handler, Window::DimX);
     NEScheduler::get().schedule(&_kernel, Window::DimX);
-    if(_has_bias)
+    if(_has_bias || _is_quantized)
     {
         NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
     }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-01-12 16:29:45 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:43:42 +0000
commit	f72f9367d1eddee91f15a64952b99ee6b80b821d (patch)
tree	0d3296219ca7919c263b3701ab22b5468df86354 /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
parent	a026e981c607272181292b044c91f73a27d2bcd9 (diff)
download	ComputeLibrary-f72f9367d1eddee91f15a64952b99ee6b80b821d.tar.gz