diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-04-16 15:08:59 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-06-24 11:16:30 +0000 |
commit | 561c176598cd14245e2e7918fdf136d1c888d1da (patch) | |
tree | 82adfff6de30292dabbbcc7ced4ae35cac3d45cf /src/core/gpu | |
parent | 31c7c26822270f1c4952c8973aa8bfb38e0a7c68 (diff) | |
download | ComputeLibrary-561c176598cd14245e2e7918fdf136d1c888d1da.tar.gz |
Rework OpenCL Depthwise Convolution
- Remove dedicated kernels for NCHW. Now we only use NHWC with permute
- Remove specialized kernels for 3x3 NHWC
- Simplify CLDepthwiseConvolutionLayer.cpp to call just the native
implementation for both floating-point and quantized data types
- Develop two parametric opencl kernels for depthwise convolution layer NHWC
(floating-point and quantized)
- Add support to export the weights to cl_image
- Extend test for depthwise convolution on opencl
Resolves COMPMID-4417
Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/gpu')
-rw-r--r-- | src/core/gpu/cl/ClKernelLibrary.cpp | 30 |
1 files changed, 10 insertions, 20 deletions
diff --git a/src/core/gpu/cl/ClKernelLibrary.cpp b/src/core/gpu/cl/ClKernelLibrary.cpp index 9d516e54a7..73da93c1f5 100644 --- a/src/core/gpu/cl/ClKernelLibrary.cpp +++ b/src/core/gpu/cl/ClKernelLibrary.cpp @@ -223,23 +223,13 @@ const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map = { "crop_tensor", "crop_tensor.cl" }, { "deconvolution_reshape", "deconvolution_layer.cl" }, { "deconvolution_upsample", "deconvolution_layer.cl" }, - { "depthwise_convolution_3x3", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_nhwc", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_nhwc_stride1", "depthwise_convolution.cl" }, - { "dwc_MxN_native_fp_nhwc", "depthwise_convolution.cl" }, - { "dwc_MxN_native_quantized8_nhwc", "depthwise_convolution_quantized.cl" }, - { "dwc_3x3_native_quantized8_nchw", "depthwise_convolution_quantized.cl" }, - { "dwc_3x3_native_quantized8_dot8_nchw", "depthwise_convolution_quantized.cl" }, { "depth_to_space_nchw", "depth_to_space.cl" }, { "depth_to_space_nhwc", "depth_to_space.cl" }, - { "depthwise_convolution_3x3_stridex1_stridey1_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex2_stridey2_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex1_stridey1_f32", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex2_stridey2_f32", "depthwise_convolution.cl" }, { "dequantization_layer", "dequantization_layer.cl" }, { "dequantization_layer_per_channel_nhwc", "dequantization_layer.cl" }, { "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" }, + { "dwc_native_fp_nhwc", "dwc_native_fp_nhwc.cl" }, + { "dwc_native_quantized_nhwc", "dwc_native_quantized_nhwc.cl" }, { "direct_convolution_nhwc", "direct_convolution.cl" }, { "direct_convolution1x1", "direct_convolution1x1.cl" }, { "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" }, @@ -575,14 +565,6 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = #include "./cl_kernels/depth_to_space.clembed" }, { - "depthwise_convolution.cl", -#include "./cl_kernels/depthwise_convolution.clembed" - }, - { - "depthwise_convolution_quantized.cl", -#include "./cl_kernels/depthwise_convolution_quantized.clembed" - }, - { "dequantization_layer.cl", #include "./cl_kernels/dequantization_layer.clembed" }, @@ -607,6 +589,14 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = #include "./cl_kernels/direct_convolution.clembed" }, { + "dwc_native_fp_nhwc.cl", +#include "./cl_kernels/dwc_native_fp_nhwc.clembed" + }, + { + "dwc_native_quantized_nhwc.cl", +#include "./cl_kernels/dwc_native_quantized_nhwc.clembed" + }, + { "elementwise_operation.cl", #include "./cl_kernels/elementwise_operation.clembed" }, |