From 17812ba9f7cf2c8f5121c11760ac45fbbdb7aeaf Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 4 Jun 2018 19:27:13 +0100 Subject: COMPMID-817: Tuner: Port kernels to new design. Change-Id: Iaabb1153c2abe0400ec79d51a21347debe92d642 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134062 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/CL/kernels/CLCol2ImKernel.cpp | 15 ------- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 9 ---- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 48 ++-------------------- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 8 ---- src/core/CL/kernels/CLIm2ColKernel.cpp | 6 +-- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 12 +----- 6 files changed, 7 insertions(+), 91 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index e15da7258a..4e444206f1 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -110,21 +110,6 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p _kernel = static_cast(CLKernelLibrary::get().create_kernel("col2im", build_opts.options())); - // Configure the local work size for Bifrost with a value obtained - // via exhaustive autotuning over 30 representative tensor shapes. - const GPUTarget gpu_target = get_target(); - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX)) - { - if((_convolved_dims.first == 7) || (_convolved_dims.first == 14)) - { - _lws_hint = cl::NDRange(1, 7, 1); - } - else - { - _lws_hint = cl::NDRange(1, 8, 1); - } - } - // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 41ff2202ca..c89b16eedc 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -90,15 +90,6 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu _kernel = static_cast(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options())); - // Configure the local work size for Bifrost with a value obtained - // via exhaustive autotuning for the MobileNets tensor shapes. - const GPUTarget gpu_target = get_target(); - - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX)) - { - _lws_hint = cl::NDRange(1, 2, 1); - } - // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); // CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 7a9760b778..fc52f4e124 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -194,51 +194,9 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen _output = output; _slide_matrix_b = _input1->info()->num_dimensions() >= _input0->info()->num_dimensions(); - const DataType data_type = input0->info()->data_type(); - const int fp_pos = input0->info()->fixed_point_position(); - - // Get target architecture - GPUTarget gpu_target = get_target(); - - // Configure LWS hint - switch(gpu_target) - { - case GPUTarget::MIDGARD: - case GPUTarget::T600: - case GPUTarget::T700: - case GPUTarget::T800: - if(output->info()->dimension(1) == 196) - { - _lws_hint = cl::NDRange(1, 7); - } - else - { - _lws_hint = cl::NDRange(8, 8); - } - break; - case GPUTarget::G71: - case GPUTarget::G72: - case GPUTarget::G51: - case GPUTarget::G51BIG: - case GPUTarget::G51LIT: - case GPUTarget::TNOX: - if(input1->info()->dimension(1) == 24) - { - // LWS optimized for the 11x11 AlexNet convolution on Bifrost. - _lws_hint = cl::NDRange(2, 2); - } - else if(output->info()->dimension(1) == 196) - { - _lws_hint = cl::NDRange(1, 7); - } - else - { - _lws_hint = cl::NDRange(8, 8); - } - break; - default: - _lws_hint = cl::NullRange; - } + const DataType data_type = input0->info()->data_type(); + const int fp_pos = input0->info()->fixed_point_position(); + const GPUTarget gpu_target = get_target(); ElementsProcessed num_elements_processed{}; diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 1d6f388def..d8ecd501b0 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -110,14 +110,6 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const _kernel.setArg(idx++, -_input1->info()->quantization_info().offset); } - // Configure the local work size for Bifrost with a value obtained - // via exhaustive autotuning for the MobileNets tensor shapes. - const GPUTarget gpu_target = get_target(); - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX)) - { - _lws_hint = cl::NDRange(1, 1, 1); - } - // Configure kernel window const unsigned int num_elems_read_per_iteration = 4; diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 378456cde6..53a4dca9a3 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -61,7 +61,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, b } // namespace CLIm2ColKernel::CLIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr), _kernel_dims() + : _input(nullptr), _output(nullptr), _conv_info(), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr), _kernel_dims() { } @@ -74,6 +74,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _input = input; _output = output; + _conv_info = conv_info; _kernel_dims = kernel_dims; const DataType data_type = input->info()->data_type(); @@ -190,10 +191,9 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const { vector_size = kernel_dims.width; } - // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost. + // Vector size optimized for the 11x11 AlexNet convolution on Bifrost. if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX) && kernel_dims.width == 11) { - _lws_hint = cl::NDRange(1, 1, 1); vector_size = 8; } const size_t width_mod_vector_size = kernel_dims.width % vector_size; diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 3091df4665..b242c5550c 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -208,8 +208,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, _output = output; _pool_info = pool_info; - const GPUTarget gpu_target = get_target(); - const DataType data_type = input->info()->data_type(); + const DataType data_type = input->info()->data_type(); // Set build options CLBuildOptions build_opts; @@ -273,20 +272,11 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); ICLKernel::configure(std::get<1>(win_config)); - // Configure the local work size (hint) from the first two dimensions of the global work size. - // On Bifrost, this works for up to 35x35xC filters, for which the pooling_layer_3_optimized - // kernel is launched with gws=(9, 33, C). In any case, the hint will be ignored if it is - // invalid (e.g. exceeds the maximum workgroup size that the kernel can be launched with). if(data_layout == DataLayout::NCHW) { CLPoolingConfig pooling_config = std::get<2>(win_config); _num_elems_processed_per_iteration = pooling_config.first; _border_size = pooling_config.second; - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX)) - { - cl::NDRange gws = ICLKernel::gws_from_window(std::get<1>(win_config)); - _lws_hint = cl::NDRange(gws[0], gws[1], 1); - } } else { -- cgit v1.2.1