aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2019-10-15 11:09:33 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2019-10-21 10:14:20 +0000
commitd93e263e70e3101422402c95946e520fef34c4c7 (patch)
treef79d3b325ed6881fb9252cb7ee0b7573739e00be /arm_compute
parentab5b1a279284bed350d3bb75f3d9d3aec6edca0e (diff)
downloadComputeLibrary-d93e263e70e3101422402c95946e520fef34c4c7.tar.gz
COMPMID-2708 NEDepthwiseConvolution Generic: support for QUANT8_PER_CHANNEL_SYMM
COMPMID-2470 Implement a new and generic depthwise convolution for NEON QASYMM8 NHWC COMPMID-2477 Enable FP16 data type for the new generic convolution on NEON for NHWC COMPMID-2625 Remove old implementation files for the generic NEDepthwiseConvolution Change-Id: I8f6deda4fc69dd7e472fba3228b1ed5dad172f3e Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/2094 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEKernels.h3
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h20
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h114
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h97
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h87
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h58
6 files changed, 31 insertions, 348 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 5eaf8ad445..33a640fa05 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -55,9 +55,6 @@
#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
index 5db79f8bf7..a0205f1ea6 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
@@ -26,6 +26,10 @@
#include "arm_compute/core/NEON/INEKernel.h"
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
namespace arm_compute
{
// Forward declarations
@@ -53,7 +57,7 @@ public:
*
* @note Supported data layouts: NHWC
*
- * @param[in] input Source tensor. DataType supported: F32.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
@@ -68,7 +72,7 @@ public:
*
* @note Supported data layouts: NHWC
*
- * @param[in] input Source tensor info. DataType supported: F32.
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
* @param[in] output Destination tensor info. Data type supported: Same as @p input.
@@ -86,7 +90,15 @@ public:
BorderSize border_size() const override;
private:
- template <typename T, int S, bool has_biases>
+ template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same<T, float>::value
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ || std::is_same<T, float16_t>::value
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ ,
+ int >::type = 0 >
+ void run_depthwise(const Window &window);
+
+ template <typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type = 0>
void run_depthwise(const Window &window);
/** Common signature for all the specialised depthwise convolution native functions
@@ -104,6 +116,8 @@ private:
PadStrideInfo _conv_info;
unsigned int _depth_multiplier;
Size2D _dilation;
+ std::vector<int> _output_multiplier;
+ std::vector<int> _output_shift;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
deleted file mode 100644
index 3e123b4839..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise im2col reshape kernel.
- * This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is
- * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged .
- **/
-class NEDepthwiseIm2ColKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseIm2ColKernel";
- }
- /** Default constructor */
- NEDepthwiseIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseIm2ColKernel(const NEDepthwiseIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseIm2ColKernel &operator=(const NEDepthwiseIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthwiseIm2ColKernel(NEDepthwiseIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthwiseIm2ColKernel &operator=(NEDepthwiseIm2ColKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
- * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseIm2ColKernel
- *
- * @param[in] input The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
- * @param[in] output The output tensor info. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the im2col used for the depthwise convolution layer case
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_generic(const Window &window);
- /** Common signature for all the specialised depthwise im2col functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseIm2ColFunctionPtr = void (NEDepthwiseIm2ColKernel::*)(const Window &window);
-
-private:
- DepthwiseIm2ColFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _kernel_dims;
- PadStrideInfo _conv_info;
- bool _has_bias;
- unsigned int _depth_multiplier;
- Size2D _dilation;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
deleted file mode 100644
index 25af7a29cc..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise vector to tensor kernel.
- *
- * This kernel takes the 1D tensor that's been produced by the MatrixVectorMultiply
- * kernel and reshapes it to given width and height (previously calculated, based
- * on input/weights dimensions and convolution strides and padding).
- *
- **/
-class NEDepthwiseVectorToTensorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseVectorToTensorKernel";
- }
- /** Default constructor */
- NEDepthwiseVectorToTensorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseVectorToTensorKernel(const NEDepthwiseVectorToTensorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseVectorToTensorKernel &operator=(const NEDepthwiseVectorToTensorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthwiseVectorToTensorKernel(NEDepthwiseVectorToTensorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthwiseVectorToTensorKernel &operator=(NEDepthwiseVectorToTensorKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
- * @param[out] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
- * @param[in] conv_w The converted tensor's width.
- * @param[in] conv_h The converted tensor's height.
- */
- void configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseVectorToTensorKernel
- *
- * @param[in] input The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
- * @param[in] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
- * @param[in] conv_w The converted tensor's width.
- * @param[in] conv_h The converted tensor's height.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the vector to tensor reshape used for the depthwise convolution layer case
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void vector_to_tensor(const Window &window);
- /** Common signature for all the specialised depthwise vector to tensor functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseVectorToTensorFunctionPtr = void (NEDepthwiseVectorToTensorKernel::*)(const Window &window);
-
-private:
- DepthwiseVectorToTensorFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- std::pair<size_t, size_t> _conv_dims;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEVECTORTOTENSORKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
deleted file mode 100644
index dcf52442a9..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depthwise weights reshape kernel.
- * This kernel reshape original weights' low 2D dimensions into a single col and
- * have the second dimension as the original depth size.
- **/
-class NEDepthwiseWeightsReshapeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseWeightsReshapeKernel";
- }
- /** Default constructor */
- NEDepthwiseWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseWeightsReshapeKernel(const NEDepthwiseWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseWeightsReshapeKernel &operator=(const NEDepthwiseWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthwiseWeightsReshapeKernel(NEDepthwiseWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthwiseWeightsReshapeKernel &operator=(NEDepthwiseWeightsReshapeKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
- * Data type supported: QASYMM8/F16/F32.
- * @param[out] output The output tensor. Data type supported: same as @p input.
- * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *biases);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseWeightsReshapeKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
- * Data type supported: QASYMM8/F16/F32.
- * @param[in] output The output tensor. Data type supported: same as @p input.
- * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using DepthwiseWeightsReshapeFunction = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window);
-
-private:
- DepthwiseWeightsReshapeFunction *_func;
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_biases;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 87405fdb14..ea3ef9bf38 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -26,21 +26,11 @@
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Macros.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
@@ -279,17 +269,10 @@ private:
bool _is_prepared;
};
-/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
+/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
*
- * If data type is F32 and data layout is NHWC:
* -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
- * Otherwise:
- * -# @ref NEDepthwiseIm2ColKernel
- * -# @ref NEDepthwiseWeightsReshapeKernel
- * -# @ref NEGEMMMatrixVectorMultiplyKernel
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
*/
class NEDepthwiseConvolutionLayer : public IFunction
{
@@ -341,32 +324,19 @@ public:
void prepare() override;
private:
- NEDepthwiseIm2ColKernel _im2col_kernel;
- NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
- NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
- NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
- NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _fill_border;
- NEFillBorderKernel _v2mm_input_fill_border;
- NEFillBorderKernel _v2mm_weights_fill_border;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _input_reshaped;
- Tensor _weights_reshaped;
- Tensor _v2mm_output;
- Tensor _output_reshaped;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_nhwc;
- bool _is_activationlayer_enabled;
- bool _is_optimized;
- const ITensor *_original_weights;
+ NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
+ NEFillBorderKernel _fill_border;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ bool _is_prepared;
+ bool _is_nchw;
+ bool _is_activationlayer_enabled;
+ const ITensor *_original_weights;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file