aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-08-22 11:40:33 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitbda6e4b51bc4045c97100bb9d562164ba7c6c28f (patch)
tree8924bbae251b34dc35a4ffc9a9ece79d28c4415b /src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
parent238c97cd8bfdb6dfce5c4eefed6aac4d9bb59457 (diff)
downloadComputeLibrary-bda6e4b51bc4045c97100bb9d562164ba7c6c28f.tar.gz
COMPMID-1247:Integrate kernel size 1x3 & 3x1 support in NEWinogradLayer.
Change-Id: I6fe198881230e49864c841a3b2366ccf2a9247f9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145210 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp74
1 files changed, 54 insertions, 20 deletions
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
index 3d7a16dd45..8f990712e8 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
@@ -40,19 +40,27 @@ namespace arm_compute
namespace
{
+inline bool is_kernel_size_supported(Size2D size)
+{
+ const std::array<Size2D, 4> supported_input_sizes = { { Size2D(1, 3), Size2D(3, 1), Size2D(5, 5), Size2D(3, 3) } };
+ return std::end(supported_input_sizes) != std::find(std::begin(supported_input_sizes), std::end(supported_input_sizes), size);
+}
+
Status validate_arguments_winograd_weight_trans(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != 3 && input->dimension(idx_width) != 5);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != input->dimension(idx_height));
+ const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+ const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
+ const auto input_width = input->dimension(idx_width);
+ const auto input_height = input->dimension(idx_height);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(input_width, input_height)), "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported");
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
const Size2D &output_tile = winograd_info.output_tile_size;
- ARM_COMPUTE_RETURN_ERROR_ON(output_tile != Size2D(2U, 2U) && output_tile != Size2D(4U, 4U));
+ const std::array<Size2D, 4> supported_tile_sizes = { { Size2D(2U, 2U), Size2D(4U, 4U), Size2D(1U, 6U), Size2D(6U, 1U) } };
+ ARM_COMPUTE_RETURN_ERROR_ON(std::end(supported_tile_sizes) == std::find(std::begin(supported_tile_sizes), std::end(supported_tile_sizes), output_tile));
// Checks performed when output is configured
if(output->total_size() != 0)
@@ -98,8 +106,8 @@ Status validate_arguments_winograd_input_trans(const ITensorInfo *input, const I
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd input transform only supports unit strides");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((kernel_dims.width != 3U && kernel_dims.width != 5U), "Winograd input transform only supports 3x3 and 5x5 kernels");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((kernel_dims.width != kernel_dims.height), "Winograd input transform only supports 3x3 and 5x5 kernels");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(kernel_dims.width, kernel_dims.height)),
+ "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported");
// Validate configured output
if(output->total_size() != 0)
@@ -151,9 +159,11 @@ Status validate_arguments_winograd_output_trans(const ITensorInfo *input, const
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != num_tiles.area());
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((kernel_dims.width != 3U && kernel_dims.width != 5U), "Winograd output transform only supports 3x3 and 5x5 kernels");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((kernel_dims.width != kernel_dims.height), "Winograd output transform only supports 3x3 and 5x5 kernels");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(((input->dimension(2) != size_t(16U)) && (input->dimension(2) != size_t(36U))), "Only 2x2 and 4x4 output tile is supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(kernel_dims.width, kernel_dims.height)),
+ "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported");
+
+ const std::array<unsigned int, 3> supported_gemm_sizes = { { 8U, 16U, 36U } };
+ ARM_COMPUTE_RETURN_ERROR_ON(std::end(supported_gemm_sizes) == std::find(std::begin(supported_gemm_sizes), std::end(supported_gemm_sizes), input->dimension(2)));
ARM_COMPUTE_UNUSED(kernel_dims);
if(bias != nullptr)
{
@@ -201,7 +211,21 @@ std::pair<Status, Window> validate_and_configure_window_winograd_output_trans(IT
}
} // namespace
-// Weights transform
+template <typename T>
+Status INEWinogradLayerTransformWeightsKernel<T>::validate(const ITensorInfo *input, const ITensorInfo *weights)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ const DataLayout data_layout = input->data_layout();
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(weights->dimension(width_idx), weights->dimension(height_idx))),
+ "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported");
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
+ return Status{};
+}
+
+template class INEWinogradLayerTransformWeightsKernel<float>;
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
unsigned int NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_weight_storage_size(int num_output_channels, int num_input_channels) const
@@ -278,6 +302,8 @@ Status NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols,
template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>;
template class NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>;
+template class NEWinogradLayerTransformWeightsKernel<float, 1, 6, 1, 3>;
+template class NEWinogradLayerTransformWeightsKernel<float, 6, 1, 3, 1>;
// Input transform
@@ -343,14 +369,15 @@ void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Kern
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- const int element_size_in_bytes = _input_nhwc->info()->element_size();
- const int input_col_stride = _input_nhwc->info()->strides_in_bytes().y() / element_size_in_bytes;
- const int input_row_stride = _input_nhwc->info()->strides_in_bytes().z() / element_size_in_bytes;
- const int input_batch_stride = _input_nhwc->info()->strides_in_bytes()[3] / element_size_in_bytes;
-
- InputTransform input_transform(reinterpret_cast<const T *>(_input_nhwc->buffer() + _input_nhwc->info()->offset_first_element_in_bytes()),
+ const int element_size_in_bytes = _input_nhwc->info()->element_size();
+ const int input_col_stride = _input_nhwc->info()->strides_in_bytes().y() / element_size_in_bytes;
+ const int input_row_stride = _input_nhwc->info()->strides_in_bytes().z() / element_size_in_bytes;
+ const int input_batch_stride = _input_nhwc->info()->strides_in_bytes()[3] / element_size_in_bytes;
+ const auto input_nhwc_ptr = reinterpret_cast<const T *>(_input_nhwc->buffer() + _input_nhwc->info()->offset_first_element_in_bytes());
+ auto output_ptr = reinterpret_cast<T *>(_output->buffer() + _output->info()->offset_first_element_in_bytes());
+ InputTransform input_transform(input_nhwc_ptr,
_num_batches, _num_rows, _num_cols, _num_channels, _padding,
- reinterpret_cast<T *>(_output->buffer() + _output->info()->offset_first_element_in_bytes()),
+ output_ptr,
_matrix_stride, _num_channels, input_batch_stride, input_row_stride, input_col_stride);
// The code below cannot be moved to configure because biases hasn't been allocated at that point
@@ -371,6 +398,8 @@ Status NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Ke
template class NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>;
template class NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>;
+template class NEWinogradLayerTransformInputKernel<float, 1, 6, 1, 3>;
+template class NEWinogradLayerTransformInputKernel<float, 6, 1, 3, 1>;
// Output transform
@@ -438,7 +467,6 @@ void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, Ker
Window win;
auto win_last = output_transform.get_window();
win.set(Window::DimX, Window::Dimension(0, win_last, 1));
-
_output_nhwc->info()->set_valid_region(ValidRegion(Coordinates(), _output_nhwc->info()->tensor_shape()));
INEKernel::configure(win);
@@ -452,10 +480,14 @@ void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, Ker
ARM_COMPUTE_ERROR_ON_NULLPTR(_output_workspace);
ARM_COMPUTE_ERROR_ON_NULLPTR(_output_nhwc);
+ const int out_batch_stride = 0;
+ const int out_row_stride = _output_nhwc->info()->strides_in_bytes()[2] / sizeof(T);
+ const int out_col_stride = _output_nhwc->info()->strides_in_bytes()[1] / sizeof(T);
+
OutputTransform output_transform(reinterpret_cast<T *>(_output_workspace->buffer()), _matrix_stride, _matrix_row_stride,
(_biases ? reinterpret_cast<T *>(_biases->buffer() + _biases->info()->offset_first_element_in_bytes()) : nullptr),
reinterpret_cast<T *>(_output_nhwc->buffer() + _output_nhwc->info()->offset_first_element_in_bytes()),
- _num_batches, _num_rows, _num_cols, _num_channels, 0, _output_nhwc->info()->strides_in_bytes()[2] / sizeof(T), _output_nhwc->info()->strides_in_bytes()[1] / sizeof(T));
+ _num_batches, _num_rows, _num_cols, _num_channels, out_batch_stride, out_row_stride, out_col_stride);
// The code below cannot be moved to configure because biases hasn't been allocated at that point
const size_t fst = window.x().start();
@@ -478,5 +510,7 @@ Status NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, K
template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>;
template class NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>;
+template class NEWinogradLayerTransformOutputKernel<float, 1, 6, 1, 3>;
+template class NEWinogradLayerTransformOutputKernel<float, 6, 1, 3, 1>;
} // namespace arm_compute