aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-04-10 14:24:35 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commitfc1d1e2200f3056572b158b8208bac456f48339f (patch)
tree754c5ea55f2170afc2503d2fd67759c538660715 /src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
parenta72300a5e4d44cdadfe37f69e21f9bf628d19bb3 (diff)
downloadComputeLibrary-fc1d1e2200f3056572b158b8208bac456f48339f.tar.gz
COMPMID-959: Add FP32 support to GLES GEMMConvolution
The following kernels were supposed to have FP32 support but this was not the case because of bugs and missing shaders: - GCCol2Im - GCIm2Col - GCWeightsReshape Change-Id: Ie6ea464db0612757c71c3d40874e7bb0d60f170a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127572 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp22
1 files changed, 12 insertions, 10 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
index af1e34ef59..1554a89672 100644
--- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
@@ -62,30 +62,32 @@ void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output,
_output = output;
_convolved_dims = convolved_dims;
- unsigned int num_elems_processed_per_iteration = 1;
+ const DataType dt = input->info()->data_type();
+ const unsigned int local_size = 1;
// Create kernel
std::set<std::string> build_opts;
+ build_opts.emplace("#define COL2IM ");
build_opts.emplace("#define WIDTH_OUTPUT " + support::cpp11::to_string(_convolved_dims.first));
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+ const std::string dt_name = (dt == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(local_size));
+ build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(local_size));
+ build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(local_size));
_kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("col2im", build_opts));
// Configure window
- unsigned int nums = 2;
- Window win = calculate_max_window(*output->info(), Steps(nums));
+ const unsigned int num_elems_processed_per_iteration = (dt == DataType::F32) ? 1 : 2;
- AccessWindowHorizontal output_access(output->info(), 0, 2);
+ Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
const int input_padding = ceil_to_multiple(input->info()->dimension(0), 2) - input->info()->dimension(0);
AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + input_padding, input->info()->dimension(1) + 1);
- update_window_and_padding(win, input_access,
- output_access);
+ update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, output->info()->valid_region());