From a799ce0ad775829862891dd98d1232638ec8761e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 12 Sep 2018 20:11:34 +0100 Subject: COMPMID-1564: Add NEDepthwiseConvolution3x3 for QASYMM8 Change-Id: I1f55508af6f220e5f41df7b56daffb4761ed0591 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148253 Tested-by: bsgcomp Reviewed-by: Isabella Gottardi --- .../kernels/convolution/depthwise/depthwise.hpp | 17 ++++++++++++-- .../kernels/convolution/depthwise/impl_base.hpp | 26 +++++++++++++++++----- 2 files changed, 35 insertions(+), 8 deletions(-) (limited to 'arm_compute/core/NEON/kernels/convolution/depthwise') diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp index 4ca68116db..472c44f97a 100644 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp +++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp @@ -33,6 +33,7 @@ class IDepthwiseConvolution virtual ~IDepthwiseConvolution() = default; virtual int output_size(const int dim_size, const bool padding_same) const = 0; virtual unsigned int get_window(void) const = 0; + virtual void set_offsets(int input_offset, int weights_offset) = 0; virtual void run(const unsigned int start, const unsigned int stop) = 0; }; @@ -179,6 +180,13 @@ class DepthwiseConvolution : public IDepthwiseConvolution >::get_output_size(dim_size, padding_same); } + /** Sets quantization offsets + * + * @param[in] input_offset Input offset + * @param[in] weights_offset Weights offset + */ + void set_offsets(int input_offset, int weights_offset) override; + /** Get the window of work to be performed by an instance of the operator. */ unsigned int get_window(void) const override; @@ -212,7 +220,9 @@ class DepthwiseConvolution : public IDepthwiseConvolution const int row_pad_out_bottom, const int n_tiles, const int n_input_cols, - const int n_output_cols + const int n_output_cols, + const int input_offset, + const int weights_offset ); // Determine the maximum (and minimum) padding values which can be applied @@ -272,7 +282,9 @@ class DepthwiseConvolution : public IDepthwiseConvolution const int _in_pad_bottom, const int _in_pad_right, const int _out_pad_bottom, - const int _out_pad_right + const int _out_pad_right, + const int _input_offset, + const int _weights_offset ); /* Arrays of methods to process tensor tiles. @@ -300,6 +312,7 @@ class DepthwiseConvolution : public IDepthwiseConvolution const int _weight_col_stride, _weight_row_stride; const int _input_col_stride, _input_row_stride, _input_batch_stride; const int _output_col_stride, _output_row_stride, _output_batch_stride; + int _input_offset, _weights_offset; }; } // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp index 17889849db..e262817a3c 100644 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp +++ b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp @@ -82,7 +82,8 @@ DepthwiseConvolution::DepthwiseConvolution( _input_batch_stride(input_batch_stride ? input_batch_stride : _n_input_rows * _input_row_stride), _output_col_stride(output_col_stride ? output_col_stride : _n_channels), _output_row_stride(output_row_stride ? output_row_stride : _n_output_cols * _output_col_stride), - _output_batch_stride(output_batch_stride ? output_batch_stride : _n_output_rows * _output_row_stride) + _output_batch_stride(output_batch_stride ? output_batch_stride : _n_output_rows * _output_row_stride), + _input_offset(0), _weights_offset(0) { } @@ -94,6 +95,12 @@ unsigned int DepthwiseConvolution::get_wind return iceildiv(_n_channels, CHANNEL_BLOCK); } +template +void DepthwiseConvolution::set_offsets(int input_offset, int weights_offset) +{ + _input_offset = input_offset; + _weights_offset = weights_offset; +} template void DepthwiseConvolution::run( @@ -145,7 +152,8 @@ void DepthwiseConvolution::run( outptr_row + start_channel, _output_row_stride, _output_col_stride, input_row_pad_top, input_pad_left, input_row_pad_bottom, output_row_pad_bottom, - _n_tile_cols, _n_input_cols, _n_output_cols + _n_tile_cols, _n_input_cols, _n_output_cols, + _input_offset, _weights_offset ); } } @@ -170,7 +178,9 @@ void DepthwiseConvolution::process_tile_row const int row_pad_out_bottom, const int n_tiles, const int n_input_cols, - const int n_output_cols + const int n_output_cols, + const int input_offset, + const int weights_offset ) { constexpr int tile_overlap = kernel_cols - stride_cols; @@ -242,7 +252,7 @@ void DepthwiseConvolution::process_tile_row inptr_col, in_row_stride, in_col_stride, outptr_col, out_row_stride, out_col_stride, row_pad_in_top, t_pad_in_left, row_pad_in_bottom, t_pad_in_right, - row_pad_out_bottom, t_pad_out_right + row_pad_out_bottom, t_pad_out_right, input_offset, weights_offset ); } } @@ -313,7 +323,9 @@ struct DepthwiseConvolutionImpl : public DepthwiseConvolution< const int in_pad_bottom=0, const int in_pad_right=0, const int out_pad_bottom=0, - const int out_pad_right=0 + const int out_pad_right=0, + const int input_offset=0, + const int weights_offset=0 ); }; @@ -340,7 +352,9 @@ void DepthwiseConvolutionImpl::process_tile const int _in_pad_bottom, const int _in_pad_right, const int _out_pad_bottom, - const int _out_pad_right + const int _out_pad_right, + const int _input_offset, + const int _weights_offset ) { constexpr auto inner_tile_rows = DWC::inner_tile_rows; -- cgit v1.2.1