aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/CLKernelLibrary.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-01-02 09:58:51 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-19 13:43:52 +0000
commitff1fe3e32e25069fed750cdfe3046b7d8d5a2628 (patch)
tree9c01379de63f6ab218c7890dc91b10ac8faac157 /src/core/CL/CLKernelLibrary.cpp
parent6124390be4690ba06c404d56449f7e5d390cef53 (diff)
downloadComputeLibrary-ff1fe3e32e25069fed750cdfe3046b7d8d5a2628.tar.gz
Remove padding from direct convolution - OpenCL
- Refactor direct convolution for NHWC - Remove old kernels for NHWC - Change the heuristic in CLConvolutionLayer.cpp. The new direct convolution implementation is faster than FFT Resolves COMPMID-3908 Change-Id: Iee15ce7b04e21847b6eaae5c6d3c1b18180e7efc Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4876 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/CLKernelLibrary.cpp')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp11
1 files changed, 4 insertions, 7 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index dadb3f4db1..3e5b70a142 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -137,17 +137,14 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" },
{ "derivative", "derivative.cl" },
{ "dilate", "dilate.cl" },
+ { "direct_convolution_nhwc", "direct_convolution.cl" },
{ "direct_convolution1x1", "direct_convolution1x1.cl" },
- { "direct_convolution1x1_nhwc", "direct_convolution1x1.cl" },
{ "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" },
{ "direct_convolution3x3", "direct_convolution3x3.cl" },
- { "direct_convolution3x3_nhwc", "direct_convolution3x3.cl" },
{ "direct_convolution3x3_f32_bifrost", "direct_convolution3x3.cl" },
{ "direct_convolution5x5", "direct_convolution5x5.cl" },
- { "direct_convolution5x5_nhwc", "direct_convolution5x5.cl" },
{ "direct_convolution5x5_f32_bifrost", "direct_convolution5x5.cl" },
{ "direct_convolution_quantized", "direct_convolution_quantized.cl" },
- { "direct_convolution9x9_nhwc", "direct_convolution9x9.cl" },
{ "elementwise_operation_ADD", "elementwise_operation.cl" },
{ "elementwise_operation_SUB", "elementwise_operation.cl" },
{ "elementwise_operation_MAX", "elementwise_operation.cl" },
@@ -631,8 +628,8 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/direct_convolution_quantized.clembed"
},
{
- "direct_convolution9x9.cl",
-#include "./cl_kernels/direct_convolution9x9.clembed"
+ "direct_convolution.cl",
+#include "./cl_kernels/direct_convolution.clembed"
},
{
"elementwise_operation.cl",