aboutsummaryrefslogtreecommitdiff
path: root/src/core/gpu/cl/ClKernelLibrary.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-04-16 15:08:59 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-06-24 11:16:30 +0000
commit561c176598cd14245e2e7918fdf136d1c888d1da (patch)
tree82adfff6de30292dabbbcc7ced4ae35cac3d45cf /src/core/gpu/cl/ClKernelLibrary.cpp
parent31c7c26822270f1c4952c8973aa8bfb38e0a7c68 (diff)
downloadComputeLibrary-561c176598cd14245e2e7918fdf136d1c888d1da.tar.gz
Rework OpenCL Depthwise Convolution
- Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/gpu/cl/ClKernelLibrary.cpp')
-rw-r--r--src/core/gpu/cl/ClKernelLibrary.cpp30
1 files changed, 10 insertions, 20 deletions
diff --git a/src/core/gpu/cl/ClKernelLibrary.cpp b/src/core/gpu/cl/ClKernelLibrary.cpp
index 9d516e54a7..73da93c1f5 100644
--- a/src/core/gpu/cl/ClKernelLibrary.cpp
+++ b/src/core/gpu/cl/ClKernelLibrary.cpp
@@ -223,23 +223,13 @@ const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map =
{ "crop_tensor", "crop_tensor.cl" },
{ "deconvolution_reshape", "deconvolution_layer.cl" },
{ "deconvolution_upsample", "deconvolution_layer.cl" },
- { "depthwise_convolution_3x3", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_f16", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_nhwc", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_nhwc_stride1", "depthwise_convolution.cl" },
- { "dwc_MxN_native_fp_nhwc", "depthwise_convolution.cl" },
- { "dwc_MxN_native_quantized8_nhwc", "depthwise_convolution_quantized.cl" },
- { "dwc_3x3_native_quantized8_nchw", "depthwise_convolution_quantized.cl" },
- { "dwc_3x3_native_quantized8_dot8_nchw", "depthwise_convolution_quantized.cl" },
{ "depth_to_space_nchw", "depth_to_space.cl" },
{ "depth_to_space_nhwc", "depth_to_space.cl" },
- { "depthwise_convolution_3x3_stridex1_stridey1_f16", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_stridex2_stridey2_f16", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_stridex1_stridey1_f32", "depthwise_convolution.cl" },
- { "depthwise_convolution_3x3_stridex2_stridey2_f32", "depthwise_convolution.cl" },
{ "dequantization_layer", "dequantization_layer.cl" },
{ "dequantization_layer_per_channel_nhwc", "dequantization_layer.cl" },
{ "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" },
+ { "dwc_native_fp_nhwc", "dwc_native_fp_nhwc.cl" },
+ { "dwc_native_quantized_nhwc", "dwc_native_quantized_nhwc.cl" },
{ "direct_convolution_nhwc", "direct_convolution.cl" },
{ "direct_convolution1x1", "direct_convolution1x1.cl" },
{ "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" },
@@ -575,14 +565,6 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
#include "./cl_kernels/depth_to_space.clembed"
},
{
- "depthwise_convolution.cl",
-#include "./cl_kernels/depthwise_convolution.clembed"
- },
- {
- "depthwise_convolution_quantized.cl",
-#include "./cl_kernels/depthwise_convolution_quantized.clembed"
- },
- {
"dequantization_layer.cl",
#include "./cl_kernels/dequantization_layer.clembed"
},
@@ -607,6 +589,14 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
#include "./cl_kernels/direct_convolution.clembed"
},
{
+ "dwc_native_fp_nhwc.cl",
+#include "./cl_kernels/dwc_native_fp_nhwc.clembed"
+ },
+ {
+ "dwc_native_quantized_nhwc.cl",
+#include "./cl_kernels/dwc_native_quantized_nhwc.clembed"
+ },
+ {
"elementwise_operation.cl",
#include "./cl_kernels/elementwise_operation.clembed"
},