From 5ce897f80a1a6ade8a07d61c7aaaf70d2aa5ee02 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 29 Apr 2020 11:44:10 +0100 Subject: COMPMID-3108: Add Winograd 3x3,4x4 FP16 support for NEON Change-Id: I20680dc74a3d709297539e2132417308a7aecc9d Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3159 Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../kernels/NEWinogradConvolutionLayerKernel.cpp | 50 +++++++++++++++------- 1 file changed, 35 insertions(+), 15 deletions(-) (limited to 'src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp index 68064ee7c7..3100bf7308 100644 --- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp @@ -41,23 +41,34 @@ namespace arm_compute namespace { -inline bool is_kernel_size_supported(Size2D size) +inline bool is_kernel_size_supported(DataType data_type, Size2D size) { - const std::array supported_input_sizes = { { Size2D(1, 3), Size2D(3, 1), Size2D(5, 5), Size2D(3, 3), Size2D(1, 5), Size2D(5, 1), Size2D(7, 1), Size2D(1, 7) } }; - return std::end(supported_input_sizes) != std::find(std::begin(supported_input_sizes), std::end(supported_input_sizes), size); + const std::array f32_support = { { Size2D(1, 3), Size2D(3, 1), Size2D(5, 5), Size2D(3, 3), Size2D(1, 5), Size2D(5, 1), Size2D(7, 1), Size2D(1, 7) } }; + const std::array f16_support = { { Size2D(3, 3) } }; + + switch(data_type) + { + case DataType::F16: + return std::end(f16_support) != std::find(std::begin(f16_support), std::end(f16_support), size); + case DataType::F32: + return std::end(f32_support) != std::find(std::begin(f32_support), std::end(f32_support), size); + default: + return false; + } } Status validate_arguments_winograd_weight_trans(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); const auto input_width = input->dimension(idx_width); const auto input_height = input->dimension(idx_height); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(input_width, input_height)), "Only 1x3, 3x1, 1x5, 5x1, 7x1, 1x7, 3x3 and 5x5 kernels are supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(input->data_type(), Size2D(input_width, input_height)), + "Only 1x3, 3x1, 1x5, 5x1, 7x1, 1x7, 3x3 and 5x5 kernels are supported"); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); const Size2D &output_tile = winograd_info.output_tile_size; const std::array supported_tile_sizes = { { Size2D(2U, 2U), Size2D(4U, 4U), Size2D(1U, 6U), Size2D(6U, 1U), Size2D(4, 1), Size2D(1, 4), Size2D(2, 1), Size2D(1, 2) } }; @@ -89,9 +100,9 @@ Status validate_arguments_winograd_input_trans(const ITensorInfo *input, const I const PadStrideInfo &conv_info = winograd_info.convolution_info; ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd input transform only supports unit strides"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(kernel_dims.width, kernel_dims.height)), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(input->data_type(), Size2D(kernel_dims.width, kernel_dims.height)), "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported"); // Validate configured output @@ -128,9 +139,9 @@ Status validate_arguments_winograd_output_trans(const ITensorInfo *input, const ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != num_tiles.area()); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(kernel_dims.width, kernel_dims.height)), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(input->data_type(), Size2D(kernel_dims.width, kernel_dims.height)), "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported"); const std::array supported_gemm_sizes = { { 8U, 16U, 36U } }; @@ -162,22 +173,19 @@ std::pair validate_and_configure_window_winograd_output_trans(IT } } // namespace -template -Status INEWinogradLayerTransformWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *weights) +Status INEWinogradLayerTransformWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *weights) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); const DataLayout data_layout = input->data_layout(); const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(Size2D(weights->dimension(width_idx), weights->dimension(height_idx))), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_kernel_size_supported(input->data_type(), Size2D(weights->dimension(width_idx), weights->dimension(height_idx))), "Only 1x3, 3x1, 3x3 and 5x5 kernels are supported"); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); return Status{}; } -template class INEWinogradLayerTransformWeightsKernel; - template unsigned int NEWinogradLayerTransformWeightsKernel::get_weight_storage_size(int num_output_channels, int num_input_channels) const { @@ -262,6 +270,11 @@ template class NEWinogradLayerTransformWeightsKernel; template class NEWinogradLayerTransformWeightsKernel; template class NEWinogradLayerTransformWeightsKernel; template class NEWinogradLayerTransformWeightsKernel; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template class NEWinogradLayerTransformWeightsKernel<__fp16, 4, 4, 3, 3>; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + // Input transform template @@ -396,6 +409,10 @@ template class NEWinogradLayerTransformInputKernel; template class NEWinogradLayerTransformInputKernel; template class NEWinogradLayerTransformInputKernel; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template class NEWinogradLayerTransformInputKernel<__fp16, 4, 4, 3, 3>; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + // Output transform template @@ -524,4 +541,7 @@ template class NEWinogradLayerTransformOutputKernel; template class NEWinogradLayerTransformOutputKernel; template class NEWinogradLayerTransformOutputKernel; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template class NEWinogradLayerTransformOutputKernel<__fp16, 4, 4, 3, 3>; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC } // namespace arm_compute -- cgit v1.2.1