From c0d1c86b1bb1b4e129c292549845e00dfd8abfee Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 23 Mar 2018 15:13:15 +0000 Subject: COMPMID-734: CLTuner rework Change-Id: I8f20d6ea8a09869d71003e7b08e0d33775282f6c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125802 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 73 ---------------------- 1 file changed, 73 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 56ac0c7250..b5526c4fca 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -315,79 +315,6 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL kernel_name << "_f32_bifrost"; _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options())); - - // Through extensive experimentation with over 30 representative tensor - // shapes, we found a small number of local work size configurations - // that result in nearly optimal execution times. Selecting the right - // lws for a given shape, however, required a complex decision tree, - // until we constructed a simple feature as described below. - // - // We started from the number of multiply-accumulate operations for a - // convolution layer, which is equal to the product of the input - // dimensions 0..2 and the weights dimensions 0..2. Unfortunately, - // this resulted in ties between distinct shapes that required distinct - // lws configurations. Replacing the width of the input with the kernel - // size, however, resulted in nearly optimal predictions. We use underscores - // in variable names to indicate when they are intentionally misleading. - const size_t product_of_weights_dimensions = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2); - const size_t product_of_input_dimensions_ = input->info()->dimension(0) * weights->info()->dimension(1) * input->info()->dimension(2); - const float mega_ops_ = 1e-6 * product_of_weights_dimensions * product_of_input_dimensions_; - - switch(kernel_size) - { - case 1: - { - if(mega_ops_ < 1.f) - { - _lws_hint = cl::NDRange(1, 1, 8); - } - else if(mega_ops_ < 7.f) - { - _lws_hint = cl::NDRange(1, 1, 4); - } - else - { - _lws_hint = cl::NDRange(1, 1, 2); - } - break; - } - case 3: - { - if(mega_ops_ < 1.f) - { - _lws_hint = cl::NDRange(1, 1, 8); - } - else if(mega_ops_ < 13.f) - { - _lws_hint = cl::NDRange(2, 1, 4); - } - else if(mega_ops_ < 50.f) - { - _lws_hint = cl::NDRange(3, 1, 4); - } - else - { - _lws_hint = cl::NDRange(2, 1, 6); - } - break; - } - case 5: - { - if(mega_ops_ < 2.f || mega_ops_ > 80.f) - { - _lws_hint = cl::NDRange(2, 1, 4); - } - else - { - _lws_hint = cl::NDRange(2, 1, 8); - } - break; - } - default: - { - ARM_COMPUTE_ERROR("Kernel size not optimized for Bifrost"); - } - } } else { -- cgit v1.2.1