diff options
author | Gian Marco <gianmarco.iodice@arm.com> | 2018-02-07 23:13:06 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:46:07 +0000 |
commit | 54f18c4a7a20ff697dc1ba66a73e9d622a407d02 (patch) | |
tree | 782aa3f5054bfbf875a99b3a6df96d5396ec2b64 /src/core/CL/kernels/CLCol2ImKernel.cpp | |
parent | e9146ed3b4ad8501cb17dfe5953ef0259f106c2e (diff) | |
download | ComputeLibrary-54f18c4a7a20ff697dc1ba66a73e9d622a407d02.tar.gz |
COMPMID-901 - Optimizing CLCol2ImKernel
This patch makes col2im on OpenCL 2 times faster
Change-Id: I8d90f5a72a050355ca1fd13433d8c2c26e5e33f5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119442
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLCol2ImKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLCol2ImKernel.cpp | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 499e1e8fe0..c8005ec0f6 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,8 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Create kernel CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size())); + build_opts.add_option("-DWIDTH_INPUT=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.first)); build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); @@ -87,10 +89,15 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p } } + const unsigned int num_elems_read_per_iteration = is_data_type_fixed_point(data_type) ? 1 : 8; + // Configure window - Window win = calculate_max_window(*input->info(), Steps()); + Window win = calculate_max_window(*input->info(), Steps(num_elems_read_per_iteration)); + + // Update window and padding just for the input tensor as we cannot access out-of-bounds elements in the output one + AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration); + update_window_and_padding(win, input_access); - // The CLCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped Coordinates coord; coord.set_num_dimensions(output->info()->num_dimensions()); output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); |