aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-04-16 15:08:59 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-06-24 11:16:30 +0000
commit561c176598cd14245e2e7918fdf136d1c888d1da (patch)
tree82adfff6de30292dabbbcc7ced4ae35cac3d45cf /src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
parent31c7c26822270f1c4952c8973aa8bfb38e0a7c68 (diff)
downloadComputeLibrary-561c176598cd14245e2e7918fdf136d1c888d1da.tar.gz
Rework OpenCL Depthwise Convolution
- Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h33
1 files changed, 11 insertions, 22 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 325f4e7067..68e4ccfc1e 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,19 +55,15 @@ public:
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
* @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] conv_info Convolution info (padding, stride, dilation, ...)
* @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
* @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Initialize the function's source, destination and parameters
*
* @param[in] compile_context The compile context to be used.
@@ -77,19 +73,15 @@ public:
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
* @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] conv_info Convolution info (padding, stride, dilation, ...)
* @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
* @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
*
* @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
@@ -98,11 +90,8 @@ public:
* @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
* @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] conv_info Convolution info (padding, stride, dilation, ...)
* @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
* the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
* @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
@@ -110,9 +99,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -125,6 +113,7 @@ private:
unsigned int _depth_multiplier;
const ICLTensor *_output_multipliers;
const ICLTensor *_output_shifts;
+ bool _export_to_cl_image;
bool _is_quantized;
};
} // namespace arm_compute