From fc1d1e2200f3056572b158b8208bac456f48339f Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 10 Apr 2018 14:24:35 +0100 Subject: COMPMID-959: Add FP32 support to GLES GEMMConvolution The following kernels were supposed to have FP32 support but this was not the case because of bugs and missing shaders: - GCCol2Im - GCIm2Col - GCWeightsReshape Change-Id: Ie6ea464db0612757c71c3d40874e7bb0d60f170a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127572 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp') diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp index af1e34ef59..1554a89672 100644 --- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp @@ -62,30 +62,32 @@ void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output, _output = output; _convolved_dims = convolved_dims; - unsigned int num_elems_processed_per_iteration = 1; + const DataType dt = input->info()->data_type(); + const unsigned int local_size = 1; // Create kernel std::set build_opts; + build_opts.emplace("#define COL2IM "); build_opts.emplace("#define WIDTH_OUTPUT " + support::cpp11::to_string(_convolved_dims.first)); - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + const std::string dt_name = (dt == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(local_size)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(local_size)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(local_size)); _kernel = static_cast(GCKernelLibrary::get().create_kernel("col2im", build_opts)); // Configure window - unsigned int nums = 2; - Window win = calculate_max_window(*output->info(), Steps(nums)); + const unsigned int num_elems_processed_per_iteration = (dt == DataType::F32) ? 1 : 2; - AccessWindowHorizontal output_access(output->info(), 0, 2); + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); const int input_padding = ceil_to_multiple(input->info()->dimension(0), 2) - input->info()->dimension(0); AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + input_padding, input->info()->dimension(1) + 1); - update_window_and_padding(win, input_access, - output_access); + update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, output->info()->valid_region()); -- cgit v1.2.1