aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/winograd
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-01-23 09:36:04 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commitd6ca478a7e410f8f529c2e505305b46d9fe21a9b (patch)
tree5c50c06e07f812890f127b1c4933996987f74f17 /arm_compute/core/NEON/kernels/winograd
parentd05dce46a14a7b67f322328ecd95bf96bdd30bae (diff)
downloadComputeLibrary-d6ca478a7e410f8f529c2e505305b46d9fe21a9b.tar.gz
COMPMID-784: Added support for biases in WinogradLayer.
1) Updated to the latest code from the RSH repo. 2) Moved winograd transforms into kernels. 3) Added support for biases Change-Id: I7f39f34a599b49d7d9b549cc10a4f4d4a8007ab8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117474 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/winograd')
-rw-r--r--arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp1
-rw-r--r--arm_compute/core/NEON/kernels/winograd/transforms/input.hpp2
-rw-r--r--arm_compute/core/NEON/kernels/winograd/transforms/output.hpp21
-rw-r--r--arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp12
-rw-r--r--arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp89
5 files changed, 54 insertions, 71 deletions
diff --git a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
index 725f6cab65..6a9984a24a 100644
--- a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
@@ -29,6 +29,7 @@
void direct_convolution(
const Tensor4D<Tensor4DShape, float>& input,
const Tensor4D<KernelShape, float>& kernel,
+ const Tensor4D<Tensor4DShape, float>& biases,
Tensor4D<Tensor4DShape, float>& output,
const PaddingType padding
);
diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
index 39b444184e..075765a513 100644
--- a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
@@ -71,7 +71,7 @@ namespace winograd
const int row_offset = (tile_i == 0) ?
0 : ((padding_type == PADDING_VALID) ? 0 : 1);
const T* const input_base_row = (
- input_base_batch + ((inner_tile_rows - 2)*tile_i - row_offset)*input_row_stride
+ input_base_batch + ((inner_tile_rows - (kernel_rows - 1))*tile_i - row_offset)*input_row_stride
);
T* const outptr_base_row = outptr_base_batch + tile_i*output_row_stride;
diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
index 7fa5ee9617..0dd719751b 100644
--- a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
@@ -35,6 +35,7 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output
)
{
@@ -69,8 +70,9 @@ namespace winograd
// Process the row
process_tile_row(
tile_N, output_shape.n_channels, matrix_tile_row, matrix_stride,
- matrix_row_stride, outptr_row, output_row_stride,
- output_col_stride, row_pad_bottom, pad_right
+ matrix_row_stride, biases,
+ outptr_row, output_row_stride, output_col_stride, row_pad_bottom,
+ pad_right
);
}
}
@@ -85,6 +87,7 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int output_row_stride,
const int output_col_stride,
@@ -102,7 +105,7 @@ namespace winograd
// Perform the output transformation
tile_fns[row_pad_bottom][tile_pad_right](
- n_channels, matrix_row, matrix_stride,
+ n_channels, matrix_row, matrix_stride, biases,
outptr, output_row_stride, output_col_stride
);
}
@@ -131,14 +134,17 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int n_batches,
const int n_rows,
const int n_cols,
const int n_channels
- ) : _matrix_base(matrix_base), _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride),
- _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols), _n_channels(n_channels),
- _tile_M(iceildiv(n_rows, output_tile_rows)), _tile_N(iceildiv(n_cols, output_tile_cols))
+ ) : _matrix_base(matrix_base), _biases(biases),
+ _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride),
+ _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols),
+ _n_channels(n_channels), _tile_M(iceildiv(n_rows, output_tile_rows)),
+ _tile_N(iceildiv(n_cols, output_tile_cols))
{
}
@@ -168,7 +174,8 @@ namespace winograd
_n_batches, _n_rows, _n_cols, _n_channels, NHWC
};
execute(
- output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _outptr
+ output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _biases,
+ _outptr
);
}
} // namespace winograd
diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
index adca48a6d6..2ea70f182b 100644
--- a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
@@ -183,7 +183,7 @@ class WinogradGEMM
const int row_pad_top,
const int row_pad_left,
const int row_pad_bottom,
- const int row_pad_right
+ const int n_cols
);
static constexpr int max_pad_bottom = inner_tile_rows - 1;
@@ -225,6 +225,7 @@ class WinogradGEMM
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output
);
@@ -236,6 +237,7 @@ class WinogradGEMM
const T* const matrix_base, /** Pointer to base of matrices. */
const int matrix_stride, /** Stride between matrices. */
const int matrix_row_stride, /** Stride within a matrix. */
+ const T* const biases, /** Pointer to biases vector. */
T* const output, /** Pointer to output tensor. */
const int n_batches, /** Number of batches in output tensor. */
const int n_rows, /** Number of rows in output tensor. */
@@ -257,6 +259,7 @@ class WinogradGEMM
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int output_row_stride,
const int output_col_stride,
@@ -270,14 +273,15 @@ class WinogradGEMM
/** Prepare a single tile of the output tensor. */
template <int pad_bottom, int pad_right>
- static void process_tile(int, const T*, int, T*, int, int);
+ static void process_tile(int, const T*, int, const T*, T*, int, int);
// Array of methods to produce tiles of output tensor.
- typedef void (*TileFn)(int, const T*, int, T*, int, int);
+ typedef void (*TileFn)(int, const T*, int, const T*, T*, int, int);
static const TileFn tile_fns[max_pad_bottom][max_pad_right];
/** Member constants for instances of the transform. */
const T* const _matrix_base;
+ const T* const _biases;
const int _matrix_stride, _matrix_row_stride;
T* const _outptr;
const int _n_batches, _n_rows, _n_cols, _n_channels, _tile_M, _tile_N;
@@ -328,6 +332,7 @@ class WinogradGEMM
void execute(
TOut* const output,
const TIn* const input,
+ const TOut* const biases,
void* working_space=NULL,
const int n_threads=1
);
@@ -336,6 +341,7 @@ class WinogradGEMM
void execute(
TOut* const output,
const TIn* const input,
+ const TOut* const biases,
const int n_threads
);
diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
index 4559312df4..1db63d750b 100644
--- a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
@@ -74,87 +74,56 @@ class WinogradConvolutionLayer
/** Determine how much memory (in units of TIn) to allocate for the
* transformed weights.
- *
- * @param[in] n_output_channels Number of output feature maps.
- * @param[in] n_input_channels Number of input feature maps.
*/
static unsigned int get_weight_storage_size(
- const int n_output_channels,
- const int n_input_channels
+ const int n_output_channels, /** Number of output feature maps. */
+ const int n_input_channels /** Number of input feature maps. */
);
/** Determine how much memory (in units of TIn) to allocate for the
* transformed input.
- *
- * @param[in] n_batches Number of batches in the input tensor.
- * @param[in] n_channels Number of feature maps in the input tensor.
- * @param[in] n_rows Number of rows in each feature map.
- * @param[in] n_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
*/
static unsigned int get_input_storage_size(
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- const bool same_padding
+ const int n_batches, /** Number of batches in the input tensor. */
+ const int n_channels, /** Number of feature maps in the input tensor. */
+ const int n_rows, /** Number of rows in each feature map. */
+ const int n_cols, /** Number of columns in each feature map. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
/** Determine how much memory (in units of TOut) to allocate for the
* (Winograd domain) output.
- *
- * @param[in] n_batches Number of batches in the output tensor.
- * @param[in] n_rows Number of rows in each feature map of the input tensor.
- * @param[in] n_cols Number of columns in each feature map of the input tensor.
- * @param[in] n_output_channels Number of feature maps in the output tensor.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
*/
static unsigned int get_output_storage_size(
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_output_channels,
- const bool same_padding
+ const int n_batches, /** Number of batches in the output tensor. */
+ const int n_rows, /** Number of rows in each feature map of the input tensor. */
+ const int n_cols, /** Number of columns in each feature map of the input tensor. */
+ const int n_output_channels, /** Number of feature maps in the output tensor. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
- /** Get the shape (rows, cols) of a feature map of the output tensor.
- *
- * @param[in] n_input_rows Number of rows in the input feature map.
- * @param[in] n_input_cols Number of columns in the input feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- */
+ /** Get the shape (rows, cols) of a feature map of the output tensor. */
static std::pair<int, int> get_output_feature_map_shape(
- const int n_input_rows,
- const int n_input_cols,
- const bool same_padding
+ const int n_input_rows, /** Number of rows in the input feature map. */
+ const int n_input_cols, /** Number of columns in the input feature map. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
/** Create a new Winograd convolution layer.
- * @param[in] n_batches Number of batches in the input and output tensors.
- * @param[in] n_input_channels Number of feature maps in a batch of the input tensor.
- * @param[in] n_input_rows Number of rows in a feature map of the input tensor.
- * @param[in] n_input_cols Number of columns in a feature map of the input tensor.
- * @param[in] n_output_channels Number of feature maps in the output tensor.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- * @param[in] weights Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps.
- * @param[out] weights_storage Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size
- * @param[in] input Pointer to NHWC ordered input tensor, in the spatial domain.
- * @param[out] winograd_input Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`.
- * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
- * @param[out] winograd_output Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`.
*/
WinogradConvolutionLayer(
- const int n_batches,
- const int n_input_channels,
- const int n_input_rows,
- const int n_input_cols,
- const int n_output_channels,
- const bool same_padding,
- const TIn* const weights,
- TIn* const weights_storage,
- const TIn* const input,
- TIn* const winograd_input,
- TOut* const output,
- TOut* const winograd_output
+ const int n_batches, /** Number of batches in the input and output tensors. */
+ const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */
+ const int n_input_rows, /** Number of rows in a feature map of the input tensor. */
+ const int n_input_cols, /** Number of columns in a feature map of the input tensor. */
+ const int n_output_channels, /** Number of feature maps in the output tensor. */
+ const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */
+ const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
+ TIn* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
+ const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */
+ TIn* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
+ const TOut* const biases, /** Pointer to biases vector. */
+ TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */
+ TOut* const winograd_output /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
);
};