diff options
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/ICLKernel.cpp | 2 | ||||
-rw-r--r-- | src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp | 18 | ||||
-rw-r--r-- | src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 8 | ||||
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 11 | ||||
-rw-r--r-- | src/core/CL/kernels/CLIm2ColKernel.cpp | 12 |
5 files changed, 47 insertions, 4 deletions
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index 7a95374bbf..b0ac40adf7 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -60,7 +60,7 @@ void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Wind } ICLKernel::ICLKernel() - : _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD) + : _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id("") { } diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 75e6d5e971..4224d9bb8e 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -230,6 +230,24 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL ICLKernel::configure(win); } + + // Set config_id for enabling LWS tuning + _config_id = "direct_convolution_"; + _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(kernel_size); + _config_id += "_"; + _config_id += support::cpp11::to_string(_conv_pad_x); + _config_id += "_"; + _config_id += support::cpp11::to_string(_conv_pad_y); + _config_id += "_"; + _config_id += support::cpp11::to_string(_conv_stride_x); + _config_id += "_"; + _config_id += support::cpp11::to_string(_conv_stride_y); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(1)); } void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index 5b6e0ec6af..268260b8d5 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -81,6 +81,14 @@ void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *out output_access.set_valid_region(win, input->info()->valid_region()); ICLKernel::configure(win); + + // Set config_id for enabling LWS tuning + _config_id = "interleave4x4_"; + _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(1)); } void CLGEMMInterleave4x4Kernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 684e3232d5..b184c507ff 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -157,6 +157,17 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen output_access.set_valid_region(win, ValidRegion(coord, output->info()->tensor_shape())); ICLKernel::configure(win); + + // Set config_id for enabling LWS tuning + _config_id = "gemm_"; + _config_id += (is_interleaved_transposed ? "reshaped_" : ""); + _config_id += lower_string(string_from_data_type(input0->info()->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(0)); + _config_id += "_"; + _config_id += (is_interleaved_transposed ? support::cpp11::to_string(input1->info()->dimension(0)) : support::cpp11::to_string(input1->info()->dimension(1))); } } diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 3d21a9e3c0..98a799f783 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -123,6 +123,15 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const } ICLKernel::configure(win); + + // Set config_id for enabling LWS tuning + _config_id = "im2col_"; + _config_id += (run_img2col_reduced ? "reduced_" : ""); + _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(1)); } void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) @@ -160,9 +169,6 @@ void CLIm2ColKernel::run_generic(const Window &window, cl::CommandQueue &queue) slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1)); slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1)); - // Set the local-workgroup size - _lws_hint = cl::NDRange(4, 4, 4); - do { unsigned int idx = 0; |