From 5ce897f80a1a6ade8a07d61c7aaaf70d2aa5ee02 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 29 Apr 2020 11:44:10 +0100 Subject: COMPMID-3108: Add Winograd 3x3,4x4 FP16 support for NEON Change-Id: I20680dc74a3d709297539e2132417308a7aecc9d Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3159 Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../kernels/NEWinogradConvolutionLayerKernel.h | 22 ++++++++++------------ .../core/NEON/kernels/convolution/common/utils.hpp | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index e2e83319e1..1740df0312 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,10 +31,10 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the NEON kernel to perform Winograd input transform. */ -template class INEWinogradLayerTransformInputKernel : public INEKernel { public: @@ -97,7 +97,7 @@ public: /** NEON kernel to perform Winograd input transform. */ template -class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel +class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel { public: /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -168,7 +168,7 @@ public: /** Configure the output transform kernel. * - * @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC. + * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC. * @param[in] num_batches Number of batches in input tensor. * @param[in] num_rows Number of rows in input tensor. * @param[in] num_cols Number of columns in input tensor. @@ -199,7 +199,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel * - * @param[in] input First tensor input info. Data types supported: F32. + * @param[in] input First tensor input info. Data types supported: F16/F32. * @param[in] output Output tensor info. Data types supported: same as @p input. * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * @@ -227,7 +227,6 @@ private: }; /** Interface for the NEON kernel to perform Winograd output transform. */ -template class INEWinogradLayerTransformOutputKernel : public INEKernel { public: @@ -312,7 +311,7 @@ public: /** NEON kernel to perform Winograd output transform. */ template -class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel +class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel { public: const char *name() const override @@ -410,7 +409,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel * - * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32. + * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32. * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo @@ -438,7 +437,6 @@ private: }; /** Interface for the NEON kernel to perform Winograd weights transform. */ -template class INEWinogradLayerTransformWeightsKernel : public INEKernel { public: @@ -488,7 +486,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel * - * @param[in] input First tensor input info. Data types supported: F32. + * @param[in] input First tensor input info. Data types supported: F16/F32. * @param[in] weights Weights tensor info. Data types supported: same as @p input. * * @return a status @@ -498,7 +496,7 @@ public: /** NEON kernel to perform Winograd weights transform. */ template -class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel +class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel { public: /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -522,7 +520,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel * * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F32. + * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32. * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo * diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp index 25bfa332fb..99b2282f7e 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp @@ -24,6 +24,8 @@ #pragma once +#include + void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); constexpr inline int iceildiv(const int a, const int b) @@ -36,3 +38,23 @@ inline T roundup(const T a, const T b) { return b * iceildiv(a, b); } + +template +struct TypeBounds +{ + static constexpr T lower() noexcept { return std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() + : std::numeric_limits::lowest(); }; + static constexpr T upper() noexcept { return std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); }; +}; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template<> +struct TypeBounds<__fp16> +{ + static constexpr __fp16 lower() noexcept { return -std::numeric_limits::infinity(); }; + static constexpr __fp16 upper() noexcept { return std::numeric_limits::infinity(); } +}; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -- cgit v1.2.1