From 5ce897f80a1a6ade8a07d61c7aaaf70d2aa5ee02 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 29 Apr 2020 11:44:10 +0100
Subject: COMPMID-3108: Add Winograd 3x3,4x4 FP16 support for NEON

Change-Id: I20680dc74a3d709297539e2132417308a7aecc9d
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3159
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../kernels/NEWinogradConvolutionLayerKernel.h     | 22 ++++++++++------------
 .../core/NEON/kernels/convolution/common/utils.hpp | 22 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'arm_compute/core/NEON')
diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
index e2e83319e1..1740df0312 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,10 +31,10 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the NEON kernel to perform Winograd input transform. */
-template <typename T>
 class INEWinogradLayerTransformInputKernel : public INEKernel
 {
 public:
@@ -97,7 +97,7 @@ public:
 
 /** NEON kernel to perform Winograd input transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
+class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
 {
 public:
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -168,7 +168,7 @@ public:
 
     /** Configure the output transform kernel.
      *
-     * @param[in]  input_nhwc    Input tensor.  Data types supported: F32. Layout supported NHWC.
+     * @param[in]  input_nhwc    Input tensor.  Data types supported: F16/F32. Layout supported NHWC.
      * @param[in]  num_batches   Number of batches in input tensor.
      * @param[in]  num_rows      Number of rows in input tensor.
      * @param[in]  num_cols      Number of columns in input tensor.
@@ -199,7 +199,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
      *
-     * @param[in] input         First tensor input info. Data types supported: F32.
+     * @param[in] input         First tensor input info. Data types supported: F16/F32.
      * @param[in] output        Output tensor info. Data types supported: same as @p input.
      * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
      *
@@ -227,7 +227,6 @@ private:
 };
 
 /** Interface for the NEON kernel to perform Winograd output transform. */
-template <typename T>
 class INEWinogradLayerTransformOutputKernel : public INEKernel
 {
 public:
@@ -312,7 +311,7 @@ public:
 
 /** NEON kernel to perform Winograd output transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
+class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
 {
 public:
     const char *name() const override
@@ -410,7 +409,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
      *
-     * @param[in] input         Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
+     * @param[in] input         Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
      * @param[in] bias          Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
      * @param[in] output        Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
      * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
@@ -438,7 +437,6 @@ private:
 };
 
 /** Interface for the NEON kernel to perform Winograd weights transform. */
-template <typename T>
 class INEWinogradLayerTransformWeightsKernel : public INEKernel
 {
 public:
@@ -488,7 +486,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
      *
-     * @param[in] input   First tensor input info. Data types supported: F32.
+     * @param[in] input   First tensor input info. Data types supported: F16/F32.
      * @param[in] weights Weights tensor info. Data types supported: same as @p input.
      *
      * @return a status
@@ -498,7 +496,7 @@ public:
 
 /** NEON kernel to perform Winograd weights transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
+class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
 {
 public:
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -522,7 +520,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
      *
      * @param[in] input         Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
-     *                          kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
+     *                          kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
      * @param[in] output        Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
      * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
      *
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
index 25bfa332fb..99b2282f7e 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
@@ -24,6 +24,8 @@
 
 #pragma once
 
+#include <limits>
+
 void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
 
 constexpr inline int iceildiv(const int a, const int b)
@@ -36,3 +38,23 @@ inline T roundup(const T a, const T b)
 {
     return b * iceildiv(a, b);
 }
+
+template<typename T>
+struct TypeBounds
+{
+    static constexpr T lower() noexcept { return std::numeric_limits<T>::has_infinity
+                                                 ? -std::numeric_limits<T>::infinity()
+                                                 : std::numeric_limits<T>::lowest(); };
+    static constexpr T upper() noexcept { return std::numeric_limits<T>::has_infinity
+                                                 ? std::numeric_limits<T>::infinity()
+                                                 : std::numeric_limits<T>::max(); };
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template<>
+struct TypeBounds<__fp16>
+{
+    static constexpr __fp16 lower() noexcept { return -std::numeric_limits<float>::infinity(); };
+    static constexpr __fp16 upper() noexcept { return std::numeric_limits<float>::infinity(); }
+};
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-- 
cgit v1.2.1