From 8cffcd6b6e4e95f97767f2a25ccc8826dd69c358 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 16 Nov 2018 17:11:50 +0000
Subject: COMPMID-1644: NEDepthwiseConvolution for FP16 NHWC

Change-Id: I6e7dee8bd615a5eff01c523f208a218574ee5eab
---
 arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h          | 6 +++---
 arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h  | 4 ++--
 arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h  | 8 +++++---
 arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h | 4 ++--
 arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 6 +++---
 5 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
index 0d61d3ea38..de671361d6 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
@@ -55,7 +55,7 @@ public:
     /** Set the input and output of the kernel.
      *
      * @param[in]  input            The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                              while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32
+     *                              while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
      * @param[out] output           The output tensor. First 3 lower dimensions represent a transform of each 3D input,
      *                              while every dimension above 3 represents a batch. Data types supported: Same as @p input
      * @param[in]  kernel_dims      The kernel dimensions (width and height).
@@ -68,7 +68,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseIm2ColKernel
      *
      * @param[in] input            The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                             while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32
+     *                             while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
      * @param[in] output           The output tensor. First 3 lower dimensions represent a transform of each 3D input,
      *                             while every dimension above 3 represents a batch. Data types supported: Same as @p input
      * @param[in] kernel_dims      The kernel dimensions (width and height).
@@ -105,5 +105,5 @@ private:
     bool                       _has_bias;
     unsigned int               _depth_multiplier;
 };
-} // arm_compute
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
index 00977a91b4..25af7a29cc 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
@@ -56,7 +56,7 @@ public:
     NEDepthwiseVectorToTensorKernel &operator=(NEDepthwiseVectorToTensorKernel &&) = default;
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input  The input vector to convert. Data type supported: QASYMM8/S32/F32.
+     * @param[in]  input  The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
      * @param[out] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
      * @param[in]  conv_w The converted tensor's width.
      * @param[in]  conv_h The converted tensor's height.
@@ -64,7 +64,7 @@ public:
     void configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseVectorToTensorKernel
      *
-     * @param[in] input  The input vector to convert. Data type supported: QASYMM8/S32/F32.
+     * @param[in] input  The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
      * @param[in] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
      * @param[in] conv_w The converted tensor's width.
      * @param[in] conv_h The converted tensor's height.
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
index b78684f993..dcf52442a9 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
@@ -53,7 +53,8 @@ public:
     NEDepthwiseWeightsReshapeKernel &operator=(NEDepthwiseWeightsReshapeKernel &&) = default;
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: QASYMM8, F32.
+     * @param[in]  input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
+     *                    Data type supported: QASYMM8/F16/F32.
      * @param[out] output The output tensor. Data type supported: same as @p input.
      * @param[in]  biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
      */
@@ -61,7 +62,8 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseWeightsReshapeKernel
      *
-     * @param[in] input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: QASYMM8, F32.
+     * @param[in] input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
+     *                   Data type supported: QASYMM8/F16/F32.
      * @param[in] output The output tensor. Data type supported: same as @p input.
      * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
      *
@@ -81,5 +83,5 @@ private:
     ITensor                         *_output;
     const ITensor                   *_biases;
 };
-} // arm_compute
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
index 7dddaca3a0..c355875c24 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
@@ -50,7 +50,7 @@ public:
     NEGEMMMatrixVectorMultiplyKernel &operator=(NEGEMMMatrixVectorMultiplyKernel &&) = default;
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input0 First Input tensor. Data types supported: QASYMM8/F32
+     * @param[in]  input0 First Input tensor. Data types supported: QASYMM8/F16/F32
      * @param[in]  input1 Second Input tensor. Data types supported: same as @p input.
      * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8 input.
      */
@@ -58,7 +58,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixVectorMultiplyKernel
      *
-     * @param[in] input0 First Input tensor. Data types supported: QASYMM8/F32
+     * @param[in] input0 First Input tensor. Data types supported: QASYMM8/F16/F32
      * @param[in] input1 Second Input tensor. Data types supported: same as @p input.
      * @param[in] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8 input.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 288d5136d2..e2fe11ea7f 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -132,7 +132,7 @@ public:
     NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
     /** Initialize the function's source, destination, weights and convolution information.
      *
-     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
      * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
      * @param[in]      biases           (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
@@ -146,7 +146,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
      *
-     * @param[in] input            Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+     * @param[in] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
      * @param[in] output           Destination tensor. Data type supported: same as @p input.
      * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
      * @param[in] biases           (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
@@ -189,5 +189,5 @@ private:
     bool                                      _is_activationlayer_enabled;
     const ITensor                            *_original_weights;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */
\ No newline at end of file
-- 
cgit v1.2.1