diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-04-16 12:41:45 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-20 13:32:10 +0000 |
commit | dcf4c87cf78a5f1667699c1a3511d09356938660 (patch) | |
tree | 28aa191a226e4bf4350d622fcb668abaa17e8677 /arm_compute/runtime | |
parent | 562bee584f3633167725af7915f50d07b0597f10 (diff) | |
download | ComputeLibrary-dcf4c87cf78a5f1667699c1a3511d09356938660.tar.gz |
CLDepthwiseConvolutionLayer rework - Part 1
Remove the reshaped variant for CLDepthwiseConvolutionLayer 3x3 NHWC Quantized
- Remove kernel selection by GPUTarget
- Remove unused quantized support from the NHWC kernel
- Remove CLDepthwiseConvolutionLayerReshapeWeightsKernel
- Remove OpenCL kernels for reshaped dwc 3x3 quantized and weights reshape
- Remove the "_bifrost" suffix in common OpenCL kernel
- Remove the ICLDepthwiseConvolutionLayer3x3Kernel common interface
Resolve COMPMID-3864, COMPMID-3907
Change-Id: Icfac0fb6c00e214985beb05dad7c0cdbbee7d830
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5447
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h | 49 |
1 files changed, 23 insertions, 26 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index e2c5d683cf..1af9e1dc6f 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -35,8 +35,8 @@ namespace arm_compute class CLCompileContext; class CLFillBorderKernel; class CLDepthwiseConvolutionLayerNativeKernel; -class CLDepthwiseConvolutionLayerReshapeWeightsKernel; -class ICLDepthwiseConvolutionLayer3x3Kernel; +class CLDepthwiseConvolutionLayer3x3NCHWKernel; +class CLDepthwiseConvolutionLayer3x3NHWCKernel; class ICLTensor; /** Function to execute a depthwise convolution @@ -123,19 +123,17 @@ private: * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. * * @return a Depthwise Convolution Function */ static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD); + ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels: * * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW) * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC) - * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC) * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) * */ @@ -200,7 +198,7 @@ private: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U)); + ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; @@ -212,26 +210,25 @@ private: }; private: - MemoryGroup _memory_group; - std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel; - std::unique_ptr<CLFillBorderKernel> _border_handler; - CLPermute _permute_input_to_nchw; - CLPermute _permute_weights_to_nchw; - CLPermute _permute_output_to_nhwc; - std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights; - CLTensor _permuted_input; - CLTensor _permuted_weights; - CLTensor _permuted_output; - CLTensor _output_multipliers; - CLTensor _output_shifts; - const ITensor *_original_weights; - const ITensor *_input; - const ITensor *_output; - bool _needs_permute; - bool _needs_weights_reshape; - bool _is_prepared; - bool _is_quantized; - bool _is_nhwc; + MemoryGroup _memory_group; + std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw; + std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc; + std::unique_ptr<CLFillBorderKernel> _border_handler; + CLPermute _permute_input_to_nchw; + CLPermute _permute_weights_to_nchw; + CLPermute _permute_output_to_nhwc; + CLTensor _permuted_input; + CLTensor _permuted_weights; + CLTensor _permuted_output; + CLTensor _output_multipliers; + CLTensor _output_shifts; + const ITensor *_original_weights; + const ITensor *_input; + const ITensor *_output; + bool _needs_permute; + bool _is_prepared; + bool _is_quantized; + bool _is_nhwc; }; /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: |