aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-03-23 15:13:15 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commitc0d1c86b1bb1b4e129c292549845e00dfd8abfee (patch)
treedfb24aa860615f45a430a5c921df7a1a20eb52d8 /src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
parent7fad9b1d00f3ee1488ba4038d1371f6ea219f8b7 (diff)
downloadComputeLibrary-c0d1c86b1bb1b4e129c292549845e00dfd8abfee.tar.gz
COMPMID-734: CLTuner rework
Change-Id: I8f20d6ea8a09869d71003e7b08e0d33775282f6c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125802 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp73
1 files changed, 0 insertions, 73 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 56ac0c7250..b5526c4fca 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -315,79 +315,6 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
kernel_name << "_f32_bifrost";
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options()));
-
- // Through extensive experimentation with over 30 representative tensor
- // shapes, we found a small number of local work size configurations
- // that result in nearly optimal execution times. Selecting the right
- // lws for a given shape, however, required a complex decision tree,
- // until we constructed a simple feature as described below.
- //
- // We started from the number of multiply-accumulate operations for a
- // convolution layer, which is equal to the product of the input
- // dimensions 0..2 and the weights dimensions 0..2. Unfortunately,
- // this resulted in ties between distinct shapes that required distinct
- // lws configurations. Replacing the width of the input with the kernel
- // size, however, resulted in nearly optimal predictions. We use underscores
- // in variable names to indicate when they are intentionally misleading.
- const size_t product_of_weights_dimensions = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2);
- const size_t product_of_input_dimensions_ = input->info()->dimension(0) * weights->info()->dimension(1) * input->info()->dimension(2);
- const float mega_ops_ = 1e-6 * product_of_weights_dimensions * product_of_input_dimensions_;
-
- switch(kernel_size)
- {
- case 1:
- {
- if(mega_ops_ < 1.f)
- {
- _lws_hint = cl::NDRange(1, 1, 8);
- }
- else if(mega_ops_ < 7.f)
- {
- _lws_hint = cl::NDRange(1, 1, 4);
- }
- else
- {
- _lws_hint = cl::NDRange(1, 1, 2);
- }
- break;
- }
- case 3:
- {
- if(mega_ops_ < 1.f)
- {
- _lws_hint = cl::NDRange(1, 1, 8);
- }
- else if(mega_ops_ < 13.f)
- {
- _lws_hint = cl::NDRange(2, 1, 4);
- }
- else if(mega_ops_ < 50.f)
- {
- _lws_hint = cl::NDRange(3, 1, 4);
- }
- else
- {
- _lws_hint = cl::NDRange(2, 1, 6);
- }
- break;
- }
- case 5:
- {
- if(mega_ops_ < 2.f || mega_ops_ > 80.f)
- {
- _lws_hint = cl::NDRange(2, 1, 4);
- }
- else
- {
- _lws_hint = cl::NDRange(2, 1, 8);
- }
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Kernel size not optimized for Bifrost");
- }
- }
}
else
{