diff options
author | Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> | 2018-05-11 16:23:53 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:52:35 +0000 |
commit | cb0010b02281245c66d5c996fa9ef8b22f036a2d (patch) | |
tree | 35240d40501fd41bd41faf7240ed42b07150f2ca /src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp | |
parent | a8a4d9e943f4874500edc0e41b37fd794c02149f (diff) | |
download | ComputeLibrary-cb0010b02281245c66d5c996fa9ef8b22f036a2d.tar.gz |
COMPMID-1102 : Enable the use of 4x4 tile sizes in neon implementation of winograd conv.
Change-Id: Ibd2f2c6680b647a066255ea77d4a2a172ef76aa3
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130418
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index 7e82dc4ecd..672684d14f 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -173,8 +173,10 @@ Status validate_arguments_winograd_output_trans(const ITensorInfo *input, const const Size2D kernel_dims = winograd_info.kernel_size; // Number of tiles along the X and Y direction - const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); + const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float> + (winograd_info.output_tile_size.width)); + const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float> + (winograd_info.output_tile_size.height)); const Size2D num_tiles = Size2D(num_tiles_x, num_tiles_y); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); @@ -301,6 +303,7 @@ Status NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCol } template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>; +template class NEWinogradLayerBatchedGEMMKernel<float, float, 4, 4, 3, 3>; template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>; // Weights transform @@ -369,6 +372,7 @@ Status NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, } template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>; +template class NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>; template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>; // Input transform @@ -442,6 +446,7 @@ Status NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Ke } template class NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>; +template class NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>; template class NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>; // Output transform @@ -544,6 +549,7 @@ Status NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, K } template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>; +template class NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>; template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>; } // namespace arm_compute |