aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-09 16:11:51 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-16 15:12:03 +0000
commit1a57ad1edf755bd284c8a387976c292913616081 (patch)
tree98ecdca026ec1c2cb4600eed84b3aa34a0342c08
parent728d6f718dcfc177747fe69a5ce14471cb4c7d8d (diff)
downloadComputeLibrary-1a57ad1edf755bd284c8a387976c292913616081.tar.gz
COMPMID-1710: Add explicit padding arguments to depthwise convolution
Change-Id: I3011640f4d4d80b7f4e488ec8df47454d3220c5d Reviewed-on: https://review.mlplatform.org/484 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp301
-rw-r--r--arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp77
2 files changed, 289 insertions, 89 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
index 472c44f97a..6d9cb18f44 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,6 +32,12 @@ class IDepthwiseConvolution
public:
virtual ~IDepthwiseConvolution() = default;
virtual int output_size(const int dim_size, const bool padding_same) const = 0;
+ virtual int output_size(
+ int dim_size,
+ unsigned int padding_before,
+ unsigned int padding_after
+ ) const = 0;
+
virtual unsigned int get_window(void) const = 0;
virtual void set_offsets(int input_offset, int weights_offset) = 0;
virtual void run(const unsigned int start, const unsigned int stop) = 0;
@@ -65,18 +71,18 @@ class DepthwiseConvolution : public IDepthwiseConvolution
/** Create a new depthwise convolution engine.
*
- * @param[in] n_batches Number of batches tensors.
- * @param[in] n_input_rows Number of rows in input tensor.
- * @param[in] n_input_cols Number of columns in input tensor.
- * @param[in] n_channels Number of channels in input and output tensors.
- * @param[in] padding_same True if padding is SAME, else VALID.
- * @param[in] weights Pointer to Height x Width x Channel ordered weights.
- * @param[in] input Pointer to NHWC ordered input tensor.
- * @param[output] output Pointer to NHWC ordered output tensor.
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_same True if padding is SAME, else VALID.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
*/
DepthwiseConvolution(
- const int n_batches, const int n_input_rows, const int n_input_cols,
- const int n_channels, const bool padding_same,
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels, bool padding_same,
const TIn* const weights,
const TIn* const input,
TOut* const output
@@ -87,21 +93,53 @@ class DepthwiseConvolution : public IDepthwiseConvolution
{
}
+ /** Create a new depthwise convolution engine.
+ *
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_top Padding to apply to top of input.
+ * @param[in] padding_left Padding to apply to left of input.
+ * @param[in] padding_bottom Padding to apply to bottom of input.
+ * @param[in] padding_right Padding to apply to right of input.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
+ */
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right,
+ const TIn* const weights,
+ const TIn* const input,
+ TOut* const output
+ ) : DepthwiseConvolution(
+ n_batches, n_input_rows, n_input_cols, n_channels,
+ padding_top, padding_left, padding_bottom, padding_right,
+ weights, input, output, 0 /* column stride = default */
+ )
+ {
+ }
+
/** Create a new depthwise convolution engine with a specified column stride.
*
- * @param[in] n_batches Number of batches tensors.
- * @param[in] n_input_rows Number of rows in input tensor.
- * @param[in] n_input_cols Number of columns in input tensor.
- * @param[in] n_channels Number of channels in input and output tensors.
- * @param[in] padding_same True if padding is SAME, else VALID.
- * @param[in] weights Pointer to Height x Width x Channel ordered weights.
- * @param[in] input Pointer to NHWC ordered input tensor.
- * @param[output] output Pointer to NHWC ordered output tensor.
- * @param[in] col_stride Stride between columns of the weights, inputs and output tensors.
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_same True if padding is SAME, else VALID.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
+ * @param[in] col_stride Stride between columns of the weights, inputs and output tensors.
*/
DepthwiseConvolution(
- const int n_batches, const int n_input_rows, const int n_input_cols,
- const int n_channels, const bool padding_same,
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels, bool padding_same,
const TIn* const weights,
const TIn* const input,
TOut* const output,
@@ -116,39 +154,118 @@ class DepthwiseConvolution : public IDepthwiseConvolution
{
}
+ /** Create a new depthwise convolution engine with a specified column stride.
+ *
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_top Padding to apply to top of input.
+ * @param[in] padding_left Padding to apply to left of input.
+ * @param[in] padding_bottom Padding to apply to bottom of input.
+ * @param[in] padding_right Padding to apply to right of input.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
+ * @param[in] col_stride Stride between columns of the weights, inputs and output tensors.
+ */
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right,
+ const TIn* const weights,
+ const TIn* const input,
+ TOut* const output,
+ const int col_stride
+ ) : DepthwiseConvolution(
+ n_batches, n_input_rows, n_input_cols, n_channels,
+ padding_top, padding_left, padding_bottom, padding_right,
+ weights, input, output,
+ col_stride, 0, /* Weight row stride = default */
+ col_stride, 0, 0, /* Input row stride, batch stride = default */
+ col_stride, 0, 0 /* Output row stride, batch stride = default */
+ )
+ {
+ }
+
/** Create a new depthwise convolution engine.
*
- * @param[in] n_batches Number of batches tensors.
- * @param[in] n_input_rows Number of rows in input tensor.
- * @param[in] n_input_cols Number of columns in input tensor.
- * @param[in] n_channels Number of channels in input and output tensors.
- * @param[in] padding_same True if padding is SAME, else VALID.
- * @param[in] weights Pointer to Height x Width x Channel ordered weights.
- * @param[in] input Pointer to NHWC ordered input tensor.
- * @param[output] output Pointer to NHWC ordered output tensor.
- * @param[in] weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
- * @param[in] weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
- * @param[in] input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
- * @param[in] input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
- * @param[in] input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
- * @param[in] output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
- * @param[in] output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
- * @param[in] output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_same True if padding is SAME, else VALID.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
+ * @param[in] weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
+ * @param[in] weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
+ * @param[in] input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
+ * @param[in] input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
+ * @param[in] input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
+ * @param[in] output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
+ * @param[in] output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
+ * @param[in] output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
*/
DepthwiseConvolution(
- const int n_batches, const int n_input_rows, const int n_input_cols,
- const int n_channels, const bool padding_same,
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels, bool padding_same,
const TIn* const weights,
const TIn* const input,
TOut* const output,
- const int weight_col_stride,
- const int weight_row_stride,
- const int input_col_stride,
- const int input_row_stride,
- const int input_batch_stride,
- const int output_col_stride,
- const int output_row_stride,
- const int output_batch_stride
+ int weight_col_stride,
+ int weight_row_stride,
+ int input_col_stride,
+ int input_row_stride,
+ int input_batch_stride,
+ int output_col_stride,
+ int output_row_stride,
+ int output_batch_stride
+ );
+
+ /** Create a new depthwise convolution engine.
+ *
+ * @param[in] n_batches Number of batches tensors.
+ * @param[in] n_input_rows Number of rows in input tensor.
+ * @param[in] n_input_cols Number of columns in input tensor.
+ * @param[in] n_channels Number of channels in input and output tensors.
+ * @param[in] padding_top Padding to apply to top of input.
+ * @param[in] padding_left Padding to apply to left of input.
+ * @param[in] padding_bottom Padding to apply to bottom of input.
+ * @param[in] padding_right Padding to apply to right of input.
+ * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+ * @param[in] input Pointer to NHWC ordered input tensor.
+ * @param[out] output Pointer to NHWC ordered output tensor.
+ * @param[in] weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
+ * @param[in] weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
+ * @param[in] input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
+ * @param[in] input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
+ * @param[in] input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
+ * @param[in] output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
+ * @param[in] output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
+ * @param[in] output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
+ */
+ DepthwiseConvolution(
+ int n_batches, int n_input_rows, int n_input_cols,
+ int n_channels,
+ unsigned int padding_top,
+ unsigned int padding_left,
+ unsigned int padding_bottom,
+ unsigned int padding_right,
+ const TIn* const weights,
+ const TIn* const input,
+ TOut* const output,
+ int weight_col_stride,
+ int weight_row_stride,
+ int input_col_stride,
+ int input_row_stride,
+ int input_batch_stride,
+ int output_col_stride,
+ int output_row_stride,
+ int output_batch_stride
);
// Cannot copy or move a DepthwiseConvolution.
@@ -160,14 +277,19 @@ class DepthwiseConvolution : public IDepthwiseConvolution
* @param[in] dim_size Number of elements in the dimension (rows/columns)
* @param[in] same_padding True if the padding is SAME, otherwise false.
*/
- static int get_output_size(const int dim_size, const bool padding_same);
+ static int get_output_size(int dim_size, bool padding_same);
+ static int get_output_size(
+ int dim_size,
+ unsigned int padding_before,
+ unsigned int padding_after
+ );
/** Get the number of output rows/columns.
*
* @param[in] dim_size Number of elements in the dimension (rows/columns)
* @param[in] same_padding True if the padding is SAME, otherwise false.
*/
- int output_size(const int dim_size, const bool padding_same) const override
+ int output_size(int dim_size, bool padding_same) const override
{
return DepthwiseConvolution<
OutputTileRows,
@@ -180,6 +302,23 @@ class DepthwiseConvolution : public IDepthwiseConvolution
>::get_output_size(dim_size, padding_same);
}
+ int output_size(
+ int dim_size,
+ unsigned int padding_before,
+ unsigned int padding_after
+ ) const override
+ {
+ return DepthwiseConvolution<
+ OutputTileRows,
+ OutputTileCols,
+ KernelRows,
+ KernelCols,
+ StrideRows,
+ StrideCols,
+ TIn, TOut
+ >::get_output_size(dim_size, padding_before, padding_after);
+ }
+
/** Sets quantization offsets
*
* @param[in] input_offset Input offset
@@ -198,31 +337,31 @@ class DepthwiseConvolution : public IDepthwiseConvolution
* @param[in] start Start of the window of work to perform.
* @param[in] stop End of the work to perform.
*/
- void run(const unsigned int start, const unsigned int stop) override;
+ void run(unsigned int start, unsigned int stop) override;
protected:
/** Process a tile-row of the tensors.
*/
static void process_tile_row(
- const int n_channels,
+ int n_channels,
const TIn* const weights,
const int weight_row_stride,
const int weight_col_stride,
const TIn* const inptr,
- const int in_row_stride,
- const int in_col_stride,
+ int in_row_stride,
+ int in_col_stride,
TOut* const outptr,
- const int out_row_stride,
- const int out_col_stride,
- const int row_pad_in_top,
- const int row_pad_in_left,
- const int row_pad_in_bottom,
- const int row_pad_out_bottom,
- const int n_tiles,
- const int n_input_cols,
- const int n_output_cols,
- const int input_offset,
- const int weights_offset
+ int out_row_stride,
+ int out_col_stride,
+ int row_pad_in_top,
+ int row_pad_in_left,
+ int row_pad_in_bottom,
+ int row_pad_out_bottom,
+ int n_tiles,
+ int n_input_cols,
+ int n_output_cols,
+ int input_offset,
+ int weights_offset
);
// Determine the maximum (and minimum) padding values which can be applied
@@ -267,24 +406,24 @@ class DepthwiseConvolution : public IDepthwiseConvolution
* @param[in] _out_pad_right Null cells at right of output tile.
*/
typedef void (*TileFn)(
- const int n_channels,
+ int n_channels,
const TIn* const weights,
- const int weight_row_stride,
- const int weight_col_stride,
+ int weight_row_stride,
+ int weight_col_stride,
const TIn* const inptr,
- const int in_row_stride,
- const int in_col_stride,
+ int in_row_stride,
+ int in_col_stride,
TOut* const outptr,
- const int out_row_stride,
- const int out_col_stride,
- const int _in_pad_top,
- const int _in_pad_left,
- const int _in_pad_bottom,
- const int _in_pad_right,
- const int _out_pad_bottom,
- const int _out_pad_right,
- const int _input_offset,
- const int _weights_offset
+ int out_row_stride,
+ int out_col_stride,
+ int _in_pad_top,
+ int _in_pad_left,
+ int _in_pad_bottom,
+ int _in_pad_right,
+ int _out_pad_bottom,
+ int _out_pad_right,
+ int _input_offset,
+ int _weights_offset
);
/* Arrays of methods to process tensor tiles.
@@ -306,7 +445,7 @@ class DepthwiseConvolution : public IDepthwiseConvolution
TOut* const _output;
const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
_n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
- const bool _padding_same;
+ const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
// Stride information for a convolution instance
const int _weight_col_stride, _weight_row_stride;
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
index e262817a3c..b33f2768ad 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,6 +41,24 @@ namespace depthwise
const unsigned int CHANNEL_BLOCK = 16;
+namespace
+{
+ inline int pad_along_dim(
+ const bool padding_same,
+ const int kernel_dim,
+ const int stride_dim,
+ const int input_dim
+ )
+ {
+ if (!padding_same)
+ return 0;
+ if (input_dim % stride_dim)
+ return std::max(kernel_dim - (input_dim % stride_dim), 0);
+ else
+ return std::max(kernel_dim - stride_dim, 0);
+ }
+} // namespace
+
template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
const int dim_size, const bool same_padding
@@ -49,6 +67,13 @@ int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
return iceildiv(dim_size - (same_padding ? 0 : (KC - 1)), SR);
}
+template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
+int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
+ const int dim_size, const unsigned int padding_before, const unsigned int padding_after
+)
+{
+ return iceildiv(dim_size + padding_before + padding_after - KR + 1, SR);
+}
template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
@@ -65,16 +90,54 @@ DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
const int output_col_stride,
const int output_row_stride,
const int output_batch_stride
+) : DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>(
+ n_batches, n_input_rows, n_input_cols,
+ n_channels,
+ pad_along_dim(padding_same, KR, SR, n_input_rows) / 2, /* top padding */
+ pad_along_dim(padding_same, KC, SC, n_input_cols) / 2, /* left padding */
+ iceildiv(pad_along_dim(padding_same, KR, SR, n_input_rows), 2), /* bottom padding */
+ iceildiv(pad_along_dim(padding_same, KC, SC, n_input_cols), 2), /* right padding */
+ weights, input, output,
+ weight_col_stride, weight_row_stride,
+ input_col_stride, input_row_stride, input_batch_stride,
+ output_col_stride, output_row_stride, output_batch_stride
+)
+{
+}
+
+
+template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
+DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
+ const int n_batches, const int n_input_rows, const int n_input_cols,
+ const int n_channels,
+ const unsigned int padding_top,
+ const unsigned int padding_left,
+ const unsigned int padding_bottom,
+ const unsigned int padding_right,
+ const TIn* const weights,
+ const TIn* const input,
+ TOut* const output,
+ const int weight_col_stride,
+ const int weight_row_stride,
+ const int input_col_stride,
+ const int input_row_stride,
+ const int input_batch_stride,
+ const int output_col_stride,
+ const int output_row_stride,
+ const int output_batch_stride
) : _weights(weights), _input(input), _output(output),
_n_batches(n_batches),
_n_input_rows(n_input_rows),
_n_input_cols(n_input_cols),
_n_channels(n_channels),
- _n_output_rows(get_output_size(n_input_rows, padding_same)),
- _n_output_cols(get_output_size(n_input_cols, padding_same)),
+ _n_output_rows(get_output_size(n_input_rows, padding_top, padding_bottom)),
+ _n_output_cols(get_output_size(n_input_cols, padding_left, padding_right)),
_n_tile_rows(iceildiv(_n_output_rows, output_tile_rows)),
_n_tile_cols(iceildiv(_n_output_cols, output_tile_cols)),
- _padding_same(padding_same),
+ _padding_top(padding_top),
+ _padding_left(padding_left),
+ _padding_bottom(padding_bottom),
+ _padding_right(padding_right),
_weight_col_stride(weight_col_stride ? weight_col_stride : _n_channels),
_weight_row_stride(weight_row_stride ? weight_row_stride : KC * _weight_col_stride),
_input_col_stride(input_col_stride ? input_col_stride : _n_channels),
@@ -113,10 +176,8 @@ void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::run(
const auto stop_channel = std::min<unsigned int>(_n_channels, CHANNEL_BLOCK * stop);
// Compute top and bottom padding for input and output
- const int input_pad_top = _padding_same ?
- ((_n_output_rows - 1)*stride_rows + kernel_rows - _n_input_rows) / 2 : 0;
- const int input_pad_left = _padding_same ?
- ((_n_output_cols - 1)*stride_cols + kernel_cols - _n_input_cols) / 2 : 0;
+ const int input_pad_top = _padding_top;
+ const int input_pad_left = _padding_left;
constexpr int tile_overlap = kernel_rows - stride_rows;
// Perform the convolution by calling `process_tile_row` for each tile row in