diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-04-16 15:08:59 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-06-24 11:16:30 +0000 |
commit | 561c176598cd14245e2e7918fdf136d1c888d1da (patch) | |
tree | 82adfff6de30292dabbbcc7ced4ae35cac3d45cf /src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h | |
parent | 31c7c26822270f1c4952c8973aa8bfb38e0a7c68 (diff) | |
download | ComputeLibrary-561c176598cd14245e2e7918fdf136d1c888d1da.tar.gz |
Rework OpenCL Depthwise Convolution
- Remove dedicated kernels for NCHW. Now we only use NHWC with permute
- Remove specialized kernels for 3x3 NHWC
- Simplify CLDepthwiseConvolutionLayer.cpp to call just the native
implementation for both floating-point and quantized data types
- Develop two parametric opencl kernels for depthwise convolution layer NHWC
(floating-point and quantized)
- Add support to export the weights to cl_image
- Extend test for depthwise convolution on opencl
Resolves COMPMID-4417
Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h | 33 |
1 files changed, 11 insertions, 22 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h index 325f4e7067..68e4ccfc1e 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,19 +55,15 @@ public: * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] conv_info Convolution info (padding, stride, dilation, ...) * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); /** Initialize the function's source, destination and parameters * * @param[in] compile_context The compile context to be used. @@ -77,19 +73,15 @@ public: * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] conv_info Convolution info (padding, stride, dilation, ...) * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel * * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC @@ -98,11 +90,8 @@ public: * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] conv_info Convolution info (padding, stride, dilation, ...) * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, @@ -110,9 +99,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -125,6 +113,7 @@ private: unsigned int _depth_multiplier; const ICLTensor *_output_multipliers; const ICLTensor *_output_shifts; + bool _export_to_cl_image; bool _is_quantized; }; } // namespace arm_compute |