aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2018-05-11 16:23:53 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:35 +0000
commitcb0010b02281245c66d5c996fa9ef8b22f036a2d (patch)
tree35240d40501fd41bd41faf7240ed42b07150f2ca /src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
parenta8a4d9e943f4874500edc0e41b37fd794c02149f (diff)
downloadComputeLibrary-cb0010b02281245c66d5c996fa9ef8b22f036a2d.tar.gz
COMPMID-1102 : Enable the use of 4x4 tile sizes in neon implementation of winograd conv.
Change-Id: Ibd2f2c6680b647a066255ea77d4a2a172ef76aa3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130418 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
index 7e82dc4ecd..672684d14f 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
@@ -173,8 +173,10 @@ Status validate_arguments_winograd_output_trans(const ITensorInfo *input, const
const Size2D kernel_dims = winograd_info.kernel_size;
// Number of tiles along the X and Y direction
- const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / 2.f);
- const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / 2.f);
+ const unsigned int num_tiles_x = std::ceil((winograd_info.input_dimensions.x() - (kernel_dims.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>
+ (winograd_info.output_tile_size.width));
+ const unsigned int num_tiles_y = std::ceil((winograd_info.input_dimensions.y() - (kernel_dims.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>
+ (winograd_info.output_tile_size.height));
const Size2D num_tiles = Size2D(num_tiles_x, num_tiles_y);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
@@ -301,6 +303,7 @@ Status NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCol
}
template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>;
+template class NEWinogradLayerBatchedGEMMKernel<float, float, 4, 4, 3, 3>;
template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>;
// Weights transform
@@ -369,6 +372,7 @@ Status NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols,
}
template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>;
+template class NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>;
// Input transform
@@ -442,6 +446,7 @@ Status NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Ke
}
template class NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>;
+template class NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>;
// Output transform
@@ -544,6 +549,7 @@ Status NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, K
}
template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>;
+template class NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>;
template class NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>;
} // namespace arm_compute