From d93e263e70e3101422402c95946e520fef34c4c7 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Tue, 15 Oct 2019 11:09:33 +0100
Subject: COMPMID-2708 NEDepthwiseConvolution Generic: support for
 QUANT8_PER_CHANNEL_SYMM

COMPMID-2470 Implement a new and generic depthwise convolution for NEON QASYMM8 NHWC
COMPMID-2477 Enable FP16 data type for the new generic convolution on NEON for NHWC
COMPMID-2625 Remove old implementation files for the generic NEDepthwiseConvolution

Change-Id: I8f6deda4fc69dd7e472fba3228b1ed5dad172f3e
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2094
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/NEON/NEKernels.h                  |   3 -
 .../NEDepthwiseConvolutionLayerNativeKernel.h      |  20 +++-
 .../core/NEON/kernels/NEDepthwiseIm2ColKernel.h    | 114 ---------------------
 .../NEON/kernels/NEDepthwiseVectorToTensorKernel.h |  97 ------------------
 .../NEON/kernels/NEDepthwiseWeightsReshapeKernel.h |  87 ----------------
 .../NEON/functions/NEDepthwiseConvolutionLayer.h   |  58 +++--------
 6 files changed, 31 insertions(+), 348 deletions(-)
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h

(limited to 'arm_compute')

diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 5eaf8ad445..33a640fa05 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -55,9 +55,6 @@
 #include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
index 5db79f8bf7..a0205f1ea6 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
@@ -26,6 +26,10 @@
 
 #include "arm_compute/core/NEON/INEKernel.h"
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
 namespace arm_compute
 {
 // Forward declarations
@@ -53,7 +57,7 @@ public:
      *
      * @note Supported data layouts: NHWC
      *
-     * @param[in]  input            Source tensor. DataType supported: F32.
+     * @param[in]  input            Source tensor. DataType supported: QASYMM8/F16/F32.
      * @param[in]  weights          Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
      * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
      * @param[out] output           Destination tensor. Data type supported: Same as @p input.
@@ -68,7 +72,7 @@ public:
      *
      * @note Supported data layouts: NHWC
      *
-     * @param[in] input            Source tensor info. DataType supported: F32.
+     * @param[in] input            Source tensor info. DataType supported: QASYMM8/F16/F32.
      * @param[in] weights          Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
      * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
      * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
@@ -86,7 +90,15 @@ public:
     BorderSize border_size() const override;
 
 private:
-    template <typename T, int S, bool has_biases>
+    template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same<T, float>::value
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+                                                                                                               || std::is_same<T, float16_t>::value
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+                                                                                                               ,
+                                                                                                               int >::type = 0 >
+    void run_depthwise(const Window &window);
+
+    template <typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type = 0>
     void run_depthwise(const Window &window);
 
     /** Common signature for all the specialised depthwise convolution native functions
@@ -104,6 +116,8 @@ private:
     PadStrideInfo        _conv_info;
     unsigned int         _depth_multiplier;
     Size2D               _dilation;
+    std::vector<int>     _output_multiplier;
+    std::vector<int>     _output_shift;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
deleted file mode 100644
index 3e123b4839..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise im2col reshape kernel.
- *  This kernel reshape the input low 3 dimensions to a new 3D shape  where the output's first dimension is
- *  the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged .
- **/
-class NEDepthwiseIm2ColKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthwiseIm2ColKernel";
-    }
-    /** Default constructor */
-    NEDepthwiseIm2ColKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseIm2ColKernel(const NEDepthwiseIm2ColKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseIm2ColKernel &operator=(const NEDepthwiseIm2ColKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseIm2ColKernel(NEDepthwiseIm2ColKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseIm2ColKernel &operator=(NEDepthwiseIm2ColKernel &&) = default;
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input            The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                              while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
-     * @param[out] output           The output tensor. First 3 lower dimensions represent a transform of each 3D input,
-     *                              while every dimension above 3 represents a batch. Data types supported: Same as @p input
-     * @param[in]  kernel_dims      The kernel dimensions (width and height).
-     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in]  has_bias         Boolean that specifies if the depthwise convolution has bias.
-     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     */
-    void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1,
-                   const Size2D &dilation = Size2D(1U, 1U));
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseIm2ColKernel
-     *
-     * @param[in] input            The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                             while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
-     * @param[in] output           The output tensor info. First 3 lower dimensions represent a transform of each 3D input,
-     *                             while every dimension above 3 represents a batch. Data types supported: Same as @p input
-     * @param[in] kernel_dims      The kernel dimensions (width and height).
-     * @param[in] conv_info        Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in] has_bias         Boolean that specifies if the depthwise convolution has bias.
-     * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1,
-                           const Size2D &dilation = Size2D(1U, 1U));
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the im2col used for the depthwise convolution layer case
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run_generic(const Window &window);
-    /** Common signature for all the specialised depthwise im2col functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using DepthwiseIm2ColFunctionPtr = void (NEDepthwiseIm2ColKernel::*)(const Window &window);
-
-private:
-    DepthwiseIm2ColFunctionPtr _func;
-    const ITensor             *_input;
-    ITensor                   *_output;
-    Size2D                     _kernel_dims;
-    PadStrideInfo              _conv_info;
-    bool                       _has_bias;
-    unsigned int               _depth_multiplier;
-    Size2D                     _dilation;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
deleted file mode 100644
index 25af7a29cc..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise vector to tensor kernel.
- *
- *  This kernel takes the 1D tensor that's been produced by the MatrixVectorMultiply
- *  kernel and reshapes it to given width and height (previously calculated, based
- *  on input/weights dimensions and convolution strides and padding).
- *
- **/
-class NEDepthwiseVectorToTensorKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthwiseVectorToTensorKernel";
-    }
-    /** Default constructor */
-    NEDepthwiseVectorToTensorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseVectorToTensorKernel(const NEDepthwiseVectorToTensorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseVectorToTensorKernel &operator=(const NEDepthwiseVectorToTensorKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseVectorToTensorKernel(NEDepthwiseVectorToTensorKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseVectorToTensorKernel &operator=(NEDepthwiseVectorToTensorKernel &&) = default;
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input  The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
-     * @param[out] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
-     * @param[in]  conv_w The converted tensor's width.
-     * @param[in]  conv_h The converted tensor's height.
-     */
-    void configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseVectorToTensorKernel
-     *
-     * @param[in] input  The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
-     * @param[in] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
-     * @param[in] conv_w The converted tensor's width.
-     * @param[in] conv_h The converted tensor's height.
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the vector to tensor reshape used for the depthwise convolution layer case
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void vector_to_tensor(const Window &window);
-    /** Common signature for all the specialised depthwise vector to tensor functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using DepthwiseVectorToTensorFunctionPtr = void (NEDepthwiseVectorToTensorKernel::*)(const Window &window);
-
-private:
-    DepthwiseVectorToTensorFunctionPtr _func;
-    const ITensor                     *_input;
-    ITensor                           *_output;
-    std::pair<size_t, size_t> _conv_dims;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
deleted file mode 100644
index dcf52442a9..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise weights reshape kernel.
- *  This kernel reshape original weights' low 2D dimensions into a single col and
- *  have the second dimension as the original depth size.
- **/
-class NEDepthwiseWeightsReshapeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthwiseWeightsReshapeKernel";
-    }
-    /** Default constructor */
-    NEDepthwiseWeightsReshapeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseWeightsReshapeKernel(const NEDepthwiseWeightsReshapeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseWeightsReshapeKernel &operator=(const NEDepthwiseWeightsReshapeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseWeightsReshapeKernel(NEDepthwiseWeightsReshapeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthwiseWeightsReshapeKernel &operator=(NEDepthwiseWeightsReshapeKernel &&) = default;
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
-     *                    Data type supported: QASYMM8/F16/F32.
-     * @param[out] output The output tensor. Data type supported: same as @p input.
-     * @param[in]  biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output, const ITensor *biases);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseWeightsReshapeKernel
-     *
-     * @param[in] input  The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
-     *                   Data type supported: QASYMM8/F16/F32.
-     * @param[in] output The output tensor. Data type supported: same as @p input.
-     * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using DepthwiseWeightsReshapeFunction = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window);
-
-private:
-    DepthwiseWeightsReshapeFunction *_func;
-    const ITensor                   *_input;
-    ITensor                         *_output;
-    const ITensor                   *_biases;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 87405fdb14..ea3ef9bf38 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -26,21 +26,11 @@
 
 #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Macros.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
@@ -279,17 +269,10 @@ private:
     bool                                      _is_prepared;
 };
 
-/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
+/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
  *
- * If data type is F32 and data layout is NHWC:
  * -# @ref NEDepthwiseConvolutionLayerNativeKernel
  *
- * Otherwise:
- * -# @ref NEDepthwiseIm2ColKernel
- * -# @ref NEDepthwiseWeightsReshapeKernel
- * -# @ref NEGEMMMatrixVectorMultiplyKernel
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
  */
 class NEDepthwiseConvolutionLayer : public IFunction
 {
@@ -341,32 +324,19 @@ public:
     void prepare() override;
 
 private:
-    NEDepthwiseIm2ColKernel                   _im2col_kernel;
-    NEDepthwiseWeightsReshapeKernel           _weights_reshape_kernel;
-    NEGEMMMatrixVectorMultiplyKernel          _v2mm_kernel;
-    NEDepthwiseConvolutionLayerNativeKernel   _depthwise_conv_kernel;
-    NEDepthwiseVectorToTensorKernel           _vector_to_tensor_kernel;
-    NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
-    NEFillBorderKernel                        _fill_border;
-    NEFillBorderKernel                        _v2mm_input_fill_border;
-    NEFillBorderKernel                        _v2mm_weights_fill_border;
-    NEPermute                                 _permute_input;
-    NEPermute                                 _permute_weights;
-    NEPermute                                 _permute_output;
-    NEActivationLayer                         _activationlayer_function;
-    Tensor                                    _input_reshaped;
-    Tensor                                    _weights_reshaped;
-    Tensor                                    _v2mm_output;
-    Tensor                                    _output_reshaped;
-    Tensor                                    _permuted_input;
-    Tensor                                    _permuted_weights;
-    Tensor                                    _permuted_output;
-    bool                                      _is_prepared;
-    bool                                      _is_quantized;
-    bool                                      _is_nhwc;
-    bool                                      _is_activationlayer_enabled;
-    bool                                      _is_optimized;
-    const ITensor                            *_original_weights;
+    NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
+    NEFillBorderKernel                      _fill_border;
+    NEPermute                               _permute_input;
+    NEPermute                               _permute_weights;
+    NEPermute                               _permute_output;
+    NEActivationLayer                       _activationlayer_function;
+    Tensor                                  _permuted_input;
+    Tensor                                  _permuted_weights;
+    Tensor                                  _permuted_output;
+    bool                                    _is_prepared;
+    bool                                    _is_nchw;
+    bool                                    _is_activationlayer_enabled;
+    const ITensor                          *_original_weights;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */
\ No newline at end of file
-- 
cgit v1.2.1