From 5264b7d5555ec980f9c52c719122479d0d676af8 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Mon, 21 Oct 2019 14:25:41 +0100 Subject: COMPMID-2576: Fuse activation in Winograd output transform. Change-Id: I26dd1307847adeaaefae0a7374b9858c07d71372 Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/2172 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- .../kernels/NEWinogradConvolutionLayerKernel.cpp | 69 ++++++++++++---------- 1 file changed, 39 insertions(+), 30 deletions(-) (limited to 'src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index 263ded0b84..fda384bc62 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" @@ -233,7 +234,7 @@ unsigned int NEWinogradLayerTransformWeightsKernel( // WinogradConv returns the size in bytes, we divide by `sizeof(T)` to express that in units of T - WinogradConv::get_kernel_storage_size(shape) / sizeof(T)); + WinogradConv::get_kernel_storage_size(num_input_channels, num_output_channels) / sizeof(T)); } template @@ -243,9 +244,9 @@ NEWinogradLayerTransformWeightsKernel -int NEWinogradLayerTransformWeightsKernel::get_matrix_stride(const KernelShape &kernel_shape) const +int NEWinogradLayerTransformWeightsKernel::get_matrix_stride(int num_output_channels, int num_input_channels) const { - return WinogradConv::get_kernel_matrix_stride(kernel_shape); + return WinogradConv::get_kernel_matrix_stride(num_input_channels, num_output_channels); } #ifndef DOXYGEN_SKIP_THIS @@ -325,9 +326,8 @@ unsigned int NEWinogradLayerTransformInputKernel(WinogradConv::get_input_storage_size(kern_shape, input_shape, padding) / sizeof(T)); + return static_cast(WinogradConv::get_input_storage_size(num_batches, num_rows, num_cols, num_channels, same_padding) / sizeof(T)); } template @@ -338,9 +338,13 @@ unsigned int NEWinogradLayerTransformInputKernel int NEWinogradLayerTransformInputKernel::get_matrix_stride( - const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const + int num_batches, /* Number of batches in the input tensor. */ + int num_channels, /* Number of feature maps in the input tensor. */ + int num_rows, /* Number of rows in each feature map. */ + int num_cols, /* Number of columns in each feature map. */ + bool same_padding /* Use "SAME" padding, otherwise use "VALID". */) const { - return WinogradConv::get_input_matrix_stride(kernel_shape, input_shape, padding_type); + return WinogradConv::get_input_matrix_stride(num_batches, num_rows, num_cols, num_channels, same_padding); } template @@ -446,21 +450,18 @@ template class NEWinogradLayerTransformInputKernel; template unsigned int NEWinogradLayerTransformOutputKernel::get_output_storage_size( - int num_batches, /* Number of batches in the output tensor. */ - int num_rows, /* Number of rows in each feature map of the input tensor. */ - int num_cols, /* Number of columns in each feature map of the input tensor. */ - int num_output_channels, /* Number of feature maps in the output tensor. */ - bool same_padding /* Use "SAME" padding, otherwise use "VALID". */ + int num_batches, /* Number of batches in the output tensor. */ + int num_rows, /* Number of rows in each feature map of the input tensor. */ + int num_cols, /* Number of columns in each feature map of the input tensor. */ + int num_output_channels /* Number of feature maps in the output tensor. */ ) const { // Construct shapes for the input and kernel tensors. const Tensor4DShape input_shape(num_batches, num_rows, num_cols, 1); const KernelShape kern_shape(num_output_channels, KernelRows, KernelCols, 1); - const PaddingType padding = (same_padding) ? PADDING_SAME : PADDING_VALID; - // Return the size, converted into units of TOut return static_cast( - WinogradConv::get_output_storage_size(kern_shape, input_shape, padding) / sizeof(T)); + WinogradConv::get_output_storage_size(num_batches, num_rows, num_cols, num_output_channels) / sizeof(T)); } template @@ -478,28 +479,36 @@ unsigned int NEWinogradLayerTransformOutputKernel int NEWinogradLayerTransformOutputKernel::get_matrix_stride( - const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const + int num_batches, /* Number of batches in the output tensor. */ + int num_rows, /* Number of rows in each feature map of the input tensor. */ + int num_cols, /* Number of columns in each feature map of the input tensor. */ + int num_output_channels /* Number of feature maps in the output tensor. */ +) const { - return WinogradConv::get_output_matrix_stride(kernel_shape, input_shape, padding_type); + return WinogradConv::get_output_matrix_stride(num_batches, num_rows, num_cols, num_output_channels); } + template -Tensor4DShape NEWinogradLayerTransformOutputKernel::get_output_shape( - const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const +std::pair NEWinogradLayerTransformOutputKernel::get_output_shape( + int num_rows, /* Number of rows in each feature map of the input tensor. */ + int num_cols, /* Number of columns in each feature map of the input tensor. */ + bool padding_same) const { - return WinogradConv::get_output_shape(kernel_shape, in_shape, padding); + return WinogradConv::get_output_shape(std::make_pair(num_rows, num_cols), padding_same); } template void NEWinogradLayerTransformOutputKernel::configure( - const ITensor *biases, - const ITensor *transformed_output, - const int matrix_stride, - ITensor *output_nhwc, - const int num_batches, - const int num_rows, - const int num_cols, - const int num_channels, - ITensor *workspace) + const ITensor *biases, + const ITensor *transformed_output, + const int matrix_stride, + ITensor *output_nhwc, + const int num_batches, + const int num_rows, + const int num_cols, + const int num_channels, + ITensor *workspace, + const arm_gemm::Activation &activation) { _biases = biases; _workspace = workspace; @@ -512,7 +521,7 @@ void NEWinogradLayerTransformOutputKernel(num_batches, num_rows, num_cols, num_channels); + _transform = arm_compute::support::cpp14::make_unique(num_batches, num_rows, num_cols, num_channels, activation); Window win; auto win_last = _transform->get_window(); win.set(Window::DimX, Window::Dimension(0, win_last, 1)); -- cgit v1.2.1