From cb0010b02281245c66d5c996fa9ef8b22f036a2d Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 11 May 2018 16:23:53 +0100 Subject: COMPMID-1102 : Enable the use of 4x4 tile sizes in neon implementation of winograd conv. Change-Id: Ibd2f2c6680b647a066255ea77d4a2a172ef76aa3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130418 Reviewed-by: Gian Marco Iodice Tested-by: Jenkins --- src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index 7e82dc4ecd..672684d14f 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -173,8 +173,10 @@ Status validate_arguments_winograd_output_trans(const ITensorInfo *input, const const Size2D kernel_dims = winograd_info.kernel_size; // Number of tiles along the X and Y direction - const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f); - const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f); + const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast + (winograd_info.output_tile_size.width)); + const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast + (winograd_info.output_tile_size.height)); const Size2D num_tiles = Size2D(num_tiles_x, num_tiles_y); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); @@ -301,6 +303,7 @@ Status NEWinogradLayerBatchedGEMMKernel; +template class NEWinogradLayerBatchedGEMMKernel; template class NEWinogradLayerBatchedGEMMKernel; // Weights transform @@ -369,6 +372,7 @@ Status NEWinogradLayerTransformWeightsKernel; +template class NEWinogradLayerTransformWeightsKernel; template class NEWinogradLayerTransformWeightsKernel; // Input transform @@ -442,6 +446,7 @@ Status NEWinogradLayerTransformInputKernel; +template class NEWinogradLayerTransformInputKernel; template class NEWinogradLayerTransformInputKernel; // Output transform @@ -544,6 +549,7 @@ Status NEWinogradLayerTransformOutputKernel; +template class NEWinogradLayerTransformOutputKernel; template class NEWinogradLayerTransformOutputKernel; } // namespace arm_compute -- cgit v1.2.1