aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-08 09:36:26 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-15 10:27:18 +0000
commita046e164b96a8441b2fa14ef578f7db46a0e97da (patch)
tree9fa2b7e003342b608acd3ed627f47f9d027ef72c /arm_compute
parent76c996f3b240eb1f60a566e5b0a5e61fe363685a (diff)
downloadComputeLibrary-a046e164b96a8441b2fa14ef578f7db46a0e97da.tar.gz
COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC
The NCHW case is supported at function level by permuting the inputs/outputs to NHWC. This patch also removes CLDirectConvolutionLayerOutputStageKernel which is deprecated and some kernels which were only used in the generic case of depthwise convolution. Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2056 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/CLKernels.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h79
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h79
-rw-r--r--arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h91
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h47
7 files changed, 21 insertions, 373 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 3d9b2c81cd..c3c485db7c 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -56,15 +56,11 @@
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index f8c841ab6a..31ec871123 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -49,7 +49,7 @@ public:
CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
/** Initialize the function's source, destination and parameters
*
- * @param[in] input Source tensor. Data type supported: FP32/FP16. Data layout supported: NHWC
+ * @param[in] input Source tensor. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
* @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input.
@@ -64,7 +64,7 @@ public:
const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U));
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
*
- * @param[in] input Source tensor info. Data type supported: FP32/FP16. Data layout supported: NHWC
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
* @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h
deleted file mode 100644
index 3f969957e1..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depthwise weights reshape kernel.
- * This kernel reshape original weights' low 2D dimensions into a single row and
- * have the second dimension as the original depth size.
- *
- **/
-class CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel(const CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel(CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
- * Data type supported: QASYMM8/F16/F32.
- * @param[out] output The output tensor. Data type supported: same as @p input.
- * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *biases = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
- * Data type supported: QASYMM8/F32.
- * @param[in] output The output tensor. Data type supported: same as @p input.
- * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_biases;
- ICLTensor *_output;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
deleted file mode 100644
index 15798471a8..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depthwise im2col reshape kernel.
- * This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is
- * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches per image and third dimension unchanged .
- **/
-class CLDepthwiseIm2ColKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthwiseIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseIm2ColKernel(const CLDepthwiseIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseIm2ColKernel &operator=(const CLDepthwiseIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthwiseIm2ColKernel(CLDepthwiseIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthwiseIm2ColKernel &operator=(CLDepthwiseIm2ColKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
- * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseIm2ColKernel
- *
- * @param[in] input The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32
- * @param[in] output The output tensor info. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h
deleted file mode 100644
index c9ec8e13bf..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depthwise vector to tensor kernel.
- *
- * This kernel takes the 1D tensor that's been produced by the MatrixVectorMultiply
- * kernel and reshapes it to given width and height (previously calculated, based
- * on input/weights dimensions and convolution strides and padding).
- *
- **/
-class CLDepthwiseVectorToTensorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthwiseVectorToTensorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseVectorToTensorKernel(const CLDepthwiseVectorToTensorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseVectorToTensorKernel &operator=(const CLDepthwiseVectorToTensorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthwiseVectorToTensorKernel(CLDepthwiseVectorToTensorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthwiseVectorToTensorKernel &operator=(CLDepthwiseVectorToTensorKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
- * @param[out] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
- * @param[in] conv_w The converted tensor's width.
- * @param[in] conv_h The converted tensor's height.
- */
- void configure(const ICLTensor *input, ICLTensor *output, size_t conv_w, size_t conv_h);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseVectorToTensorKernel
- *
- * @param[in] input The input vector to convert. Data type supported: QASYMM8/S32/F16/F32.
- * @param[in] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
- * @param[in] conv_w The converted tensor's width.
- * @param[in] conv_h The converted tensor's height.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h);
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h
deleted file mode 100644
index 80bc012d9f..0000000000
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
-#define __ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-/** OpenCL kernel to accumulate the biases, if provided, or downscale in case of quantized input.
- *
- * @deprecated This kernel is deprecated and will be removed in release 19.05
- *
- * @note We assume bias to be shared
- *
- */
-class CLDirectConvolutionLayerOutputStageKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDirectConvolutionLayerOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerOutputStageKernel(const CLDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerOutputStageKernel &operator=(const CLDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerOutputStageKernel(CLDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerOutputStageKernel &operator=(CLDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Default destructor */
- ~CLDirectConvolutionLayerOutputStageKernel() = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: S32/F16/F32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Required parameter if output is of QASYMM8 type.
- * Data types supported: QASYMM8/F16/F32
- * @param[in] result_fixedpoint_multiplier (Optional)Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift (Optional)Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift (Optional)Offset to be applied to result before converting it back to QASYMM8
- */
- void configure(ICLTensor *input, const ICLTensor *bias = nullptr, ICLTensor *output = nullptr,
- int result_fixedpoint_multiplier = 0, int result_shift = 0, int result_offset_after_shift = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerOutputStageKernel
- *
- * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: F16/F32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Data type supported: F16/F32
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index d177f4505a..98581a21fe 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -26,17 +26,12 @@
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -121,17 +116,15 @@ private:
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
*
- * -# @ref CLDepthwiseIm2ColKernel
- * -# @ref CLGEMMMatrixVectorMultiplyKernel
- * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel
- * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
+ * -# @ref CLDepthwiseConvolutionLayerNativeKernel
+ * -# @ref CLPermute (x 3) if the data layout is NCHW
*
*/
class CLDepthwiseConvolutionLayer : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer();
+ CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move constructor */
@@ -177,23 +170,21 @@ public:
void prepare() override;
private:
- CLDepthwiseIm2ColKernel _im2col_kernel;
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel _weights_reshape_kernel;
- CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
- CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
- CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- CLActivationLayer _activationlayer_function;
- CLFillBorderKernel _v2mm_input_fill_border;
- CLFillBorderKernel _v2mm_weights_fill_border;
- CLTensor _input_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _v2mm_output;
- CLTensor _output_reshaped;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_activationlayer_enabled;
- const ICLTensor *_original_weights;
- std::unique_ptr<IFunction> _optimised_function;
+ MemoryGroup _memory_group;
+
+ std::unique_ptr<IFunction> _optimised_function;
+ CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
+
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+
+ bool _needs_permute;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */