diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-10-08 09:36:26 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-10-15 10:27:18 +0000 |
commit | a046e164b96a8441b2fa14ef578f7db46a0e97da (patch) | |
tree | 9fa2b7e003342b608acd3ed627f47f9d027ef72c /arm_compute/runtime/CL | |
parent | 76c996f3b240eb1f60a566e5b0a5e61fe363685a (diff) | |
download | ComputeLibrary-a046e164b96a8441b2fa14ef578f7db46a0e97da.tar.gz |
COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC
The NCHW case is supported at function level by permuting the
inputs/outputs to NHWC.
This patch also removes CLDirectConvolutionLayerOutputStageKernel which
is deprecated and some kernels which were only used in the generic case
of depthwise convolution.
Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2056
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h | 47 |
1 files changed, 19 insertions, 28 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index d177f4505a..98581a21fe 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -26,17 +26,12 @@ #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -121,17 +116,15 @@ private: /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: * - * -# @ref CLDepthwiseIm2ColKernel - * -# @ref CLGEMMMatrixVectorMultiplyKernel - * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel - * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) + * -# @ref CLDepthwiseConvolutionLayerNativeKernel + * -# @ref CLPermute (x 3) if the data layout is NCHW * */ class CLDepthwiseConvolutionLayer : public IFunction { public: /** Default constructor */ - CLDepthwiseConvolutionLayer(); + CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete; /** Default move constructor */ @@ -177,23 +170,21 @@ public: void prepare() override; private: - CLDepthwiseIm2ColKernel _im2col_kernel; - CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel _weights_reshape_kernel; - CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; - CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; - CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - CLActivationLayer _activationlayer_function; - CLFillBorderKernel _v2mm_input_fill_border; - CLFillBorderKernel _v2mm_weights_fill_border; - CLTensor _input_reshaped; - CLTensor _weights_reshaped; - CLTensor _v2mm_output; - CLTensor _output_reshaped; - bool _is_prepared; - bool _is_quantized; - bool _is_activationlayer_enabled; - const ICLTensor *_original_weights; - std::unique_ptr<IFunction> _optimised_function; + MemoryGroup _memory_group; + + std::unique_ptr<IFunction> _optimised_function; + CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel; + CLPermute _permute_input_to_nhwc; + CLPermute _permute_weights_to_nhwc; + CLPermute _permute_output_to_nchw; + + CLTensor _permuted_input; + CLTensor _permuted_weights; + CLTensor _permuted_output; + const ITensor *_original_weights; + + bool _needs_permute; + bool _is_prepared; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ |