diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-01-22 16:29:17 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:45:00 +0000 |
commit | d05dce46a14a7b67f322328ecd95bf96bdd30bae (patch) | |
tree | 6e001f539969a1a669241a72e78ff5a62998a984 /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | |
parent | 5d9d019b2c7ca3dc59bfbb44b3169ee5cd71dc79 (diff) | |
download | ComputeLibrary-d05dce46a14a7b67f322328ecd95bf96bdd30bae.tar.gz |
COMPMID-791: Generic Depthwise Convolution Layer NEON QASYMM8
Change-Id: I33cf54e68f6c097ac58b6f16c3f9a720978f09cd
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117289
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 70 |
1 files changed, 54 insertions, 16 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 2d08b45210..1af0b18933 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -26,11 +26,13 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; +using namespace arm_compute::misc; NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() : _kernel(), _output_stage_kernel(), _border_handler(), _accumulator(), _has_bias(false), _is_quantized(false) @@ -90,13 +92,14 @@ void NEDepthwiseConvolutionLayer3x3::run() } NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer() - : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _input_reshaped(), _weights_reshaped(), _v2mm_output() + : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), + _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_quantized(false) { } void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); @@ -104,14 +107,20 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh const size_t weights_h = weights->info()->dimension(1); const size_t weights_z = weights->info()->dimension(2); - bool has_bias = (biases != nullptr); + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + + // Should bias be appended ? + bool append_bias = (biases != nullptr) && !_is_quantized; - unsigned int conv_w = 0; - unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); + // Calculate output shape + TensorShape dwc_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); + + // Output width and height + const unsigned int conv_w = dwc_output_shape.x(); + const unsigned int conv_h = dwc_output_shape.y(); // Set up intermediate tensors - const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); + const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0); const size_t conv_size = conv_w * conv_h; // Im2Col configuration @@ -119,25 +128,48 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh shape_im2col.set(0, patch_size); shape_im2col.set(1, conv_size); shape_im2col.set(2, weights_z); - const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _input_reshaped.allocator()->init(info_im2col); - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); + _input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias); // Weights reshape configuration const TensorShape shape_weights_reshape(patch_size, weights_z); - const TensorInfo info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); - _weights_reshaped.allocator()->init(info_weights_reshape); - _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); + _weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape)); + _weights_reshape_kernel.configure(weights, &_weights_reshaped, append_bias ? biases : nullptr); // GEMV configuration + DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type(); TensorShape shape_v2mm_out = input->info()->tensor_shape(); shape_v2mm_out.set(0, conv_size * weights_z); shape_v2mm_out.set(1, 1); shape_v2mm_out.set(2, 1); - const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _v2mm_output.allocator()->init(info_v2mm_out); + _v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out)); _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); - _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); + _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dwc_output_shape)); + _vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output, conv_w, conv_h); + + // Output staged configuration + if(_is_quantized) + { + float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale; + int output_multiplier, output_shift; + quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + _output_stage_kernel.configure(&_output_reshaped, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset); + _output_reshaped.allocator()->allocate(); + } + + // Fill borders on inputs + PixelValue zero_in(0); + PixelValue zero_w(0); + if(_is_quantized) + { + zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset)); + zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset)); + } + BorderSize border_size = _v2mm_kernel.border_size(); + _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in); + + border_size.bottom = 0; + _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w); // Allocate intermediate tensors _input_reshaped.allocator()->allocate(); @@ -149,6 +181,12 @@ void NEDepthwiseConvolutionLayer::run() { NEScheduler::get().schedule(&_im2col_kernel, Window::DimX); NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX); + NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX); + NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX); NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX); NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX); + if(_is_quantized) + { + NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX); + } }
\ No newline at end of file |