aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-08 09:36:26 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-15 10:27:18 +0000
commita046e164b96a8441b2fa14ef578f7db46a0e97da (patch)
tree9fa2b7e003342b608acd3ed627f47f9d027ef72c /arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
parent76c996f3b240eb1f60a566e5b0a5e61fe363685a (diff)
downloadComputeLibrary-a046e164b96a8441b2fa14ef578f7db46a0e97da.tar.gz
COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC
The NCHW case is supported at function level by permuting the inputs/outputs to NHWC. This patch also removes CLDirectConvolutionLayerOutputStageKernel which is deprecated and some kernels which were only used in the generic case of depthwise convolution. Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2056 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h47
1 files changed, 19 insertions, 28 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index d177f4505a..98581a21fe 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -26,17 +26,12 @@
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -121,17 +116,15 @@ private:
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
*
- * -# @ref CLDepthwiseIm2ColKernel
- * -# @ref CLGEMMMatrixVectorMultiplyKernel
- * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel
- * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
+ * -# @ref CLDepthwiseConvolutionLayerNativeKernel
+ * -# @ref CLPermute (x 3) if the data layout is NCHW
*
*/
class CLDepthwiseConvolutionLayer : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer();
+ CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move constructor */
@@ -177,23 +170,21 @@ public:
void prepare() override;
private:
- CLDepthwiseIm2ColKernel _im2col_kernel;
- CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel _weights_reshape_kernel;
- CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
- CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
- CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- CLActivationLayer _activationlayer_function;
- CLFillBorderKernel _v2mm_input_fill_border;
- CLFillBorderKernel _v2mm_weights_fill_border;
- CLTensor _input_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _v2mm_output;
- CLTensor _output_reshaped;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_activationlayer_enabled;
- const ICLTensor *_original_weights;
- std::unique_ptr<IFunction> _optimised_function;
+ MemoryGroup _memory_group;
+
+ std::unique_ptr<IFunction> _optimised_function;
+ CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
+
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+
+ bool _needs_permute;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */