From 8155c0253c00aa9e26651361460c66feb39829a6 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 16 Apr 2021 15:08:59 +0100 Subject: Rework OpenCL Depthwise Convolution - Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: Ibe533f79c2860f9cac8e921895d5a8f947753a5c Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5893 Reviewed-by: Giorgio Arena Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/gpu/cl/ClKernelLibrary.cpp | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'src/core/gpu/cl') diff --git a/src/core/gpu/cl/ClKernelLibrary.cpp b/src/core/gpu/cl/ClKernelLibrary.cpp index 9d516e54a7..73da93c1f5 100644 --- a/src/core/gpu/cl/ClKernelLibrary.cpp +++ b/src/core/gpu/cl/ClKernelLibrary.cpp @@ -223,23 +223,13 @@ const std::map ClKernelLibrary::_kernel_program_map = { "crop_tensor", "crop_tensor.cl" }, { "deconvolution_reshape", "deconvolution_layer.cl" }, { "deconvolution_upsample", "deconvolution_layer.cl" }, - { "depthwise_convolution_3x3", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_nhwc", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_nhwc_stride1", "depthwise_convolution.cl" }, - { "dwc_MxN_native_fp_nhwc", "depthwise_convolution.cl" }, - { "dwc_MxN_native_quantized8_nhwc", "depthwise_convolution_quantized.cl" }, - { "dwc_3x3_native_quantized8_nchw", "depthwise_convolution_quantized.cl" }, - { "dwc_3x3_native_quantized8_dot8_nchw", "depthwise_convolution_quantized.cl" }, { "depth_to_space_nchw", "depth_to_space.cl" }, { "depth_to_space_nhwc", "depth_to_space.cl" }, - { "depthwise_convolution_3x3_stridex1_stridey1_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex2_stridey2_f16", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex1_stridey1_f32", "depthwise_convolution.cl" }, - { "depthwise_convolution_3x3_stridex2_stridey2_f32", "depthwise_convolution.cl" }, { "dequantization_layer", "dequantization_layer.cl" }, { "dequantization_layer_per_channel_nhwc", "dequantization_layer.cl" }, { "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" }, + { "dwc_native_fp_nhwc", "dwc_native_fp_nhwc.cl" }, + { "dwc_native_quantized_nhwc", "dwc_native_quantized_nhwc.cl" }, { "direct_convolution_nhwc", "direct_convolution.cl" }, { "direct_convolution1x1", "direct_convolution1x1.cl" }, { "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" }, @@ -573,14 +563,6 @@ const std::map ClKernelLibrary::_program_source_map = { "depth_to_space.cl", #include "./cl_kernels/depth_to_space.clembed" - }, - { - "depthwise_convolution.cl", -#include "./cl_kernels/depthwise_convolution.clembed" - }, - { - "depthwise_convolution_quantized.cl", -#include "./cl_kernels/depthwise_convolution_quantized.clembed" }, { "dequantization_layer.cl", @@ -605,6 +587,14 @@ const std::map ClKernelLibrary::_program_source_map = { "direct_convolution.cl", #include "./cl_kernels/direct_convolution.clembed" + }, + { + "dwc_native_fp_nhwc.cl", +#include "./cl_kernels/dwc_native_fp_nhwc.clembed" + }, + { + "dwc_native_quantized_nhwc.cl", +#include "./cl_kernels/dwc_native_quantized_nhwc.clembed" }, { "elementwise_operation.cl", -- cgit v1.2.1