From eb84d6b82ba744295f5249d2171ee6537e09751a Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 27 Jul 2018 18:28:10 +0100 Subject: COMPMID-1434: Fix NEWinograd for NHWC and sub-tensors Apply offsets and strides to winograd transform functions in NEON. Change-Id: Ia4f44d22244203a5f9d93d2fed73570396b0d28c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141803 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index b295a0c685..3d7a16dd45 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -343,8 +343,15 @@ void NEWinogradLayerTransformInputKernel(_input_nhwc->buffer()), _num_batches, _num_rows, _num_cols, _num_channels, _padding, reinterpret_cast(_output->buffer()), - _matrix_stride, _num_channels); + const int element_size_in_bytes = _input_nhwc->info()->element_size(); + const int input_col_stride = _input_nhwc->info()->strides_in_bytes().y() / element_size_in_bytes; + const int input_row_stride = _input_nhwc->info()->strides_in_bytes().z() / element_size_in_bytes; + const int input_batch_stride = _input_nhwc->info()->strides_in_bytes()[3] / element_size_in_bytes; + + InputTransform input_transform(reinterpret_cast(_input_nhwc->buffer() + _input_nhwc->info()->offset_first_element_in_bytes()), + _num_batches, _num_rows, _num_cols, _num_channels, _padding, + reinterpret_cast(_output->buffer() + _output->info()->offset_first_element_in_bytes()), + _matrix_stride, _num_channels, input_batch_stride, input_row_stride, input_col_stride); // The code below cannot be moved to configure because biases hasn't been allocated at that point const size_t fst = window.x().start(); @@ -446,7 +453,8 @@ void NEWinogradLayerTransformOutputKernel(_output_workspace->buffer()), _matrix_stride, _matrix_row_stride, - (_biases ? reinterpret_cast(_biases->buffer()) : nullptr), reinterpret_cast(_output_nhwc->buffer()), + (_biases ? reinterpret_cast(_biases->buffer() + _biases->info()->offset_first_element_in_bytes()) : nullptr), + reinterpret_cast(_output_nhwc->buffer() + _output_nhwc->info()->offset_first_element_in_bytes()), _num_batches, _num_rows, _num_cols, _num_channels, 0, _output_nhwc->info()->strides_in_bytes()[2] / sizeof(T), _output_nhwc->info()->strides_in_bytes()[1] / sizeof(T)); // The code below cannot be moved to configure because biases hasn't been allocated at that point -- cgit v1.2.1