From 54f18c4a7a20ff697dc1ba66a73e9d622a407d02 Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Wed, 7 Feb 2018 23:13:06 +0000 Subject: COMPMID-901 - Optimizing CLCol2ImKernel This patch makes col2im on OpenCL 2 times faster Change-Id: I8d90f5a72a050355ca1fd13433d8c2c26e5e33f5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119442 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- src/core/CL/kernels/CLCol2ImKernel.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src/core/CL/kernels/CLCol2ImKernel.cpp') diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 499e1e8fe0..c8005ec0f6 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,6 +67,8 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Create kernel CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size())); + build_opts.add_option("-DWIDTH_INPUT=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.first)); build_opts.add_option_if(is_data_type_fixed_point(data_type), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); @@ -87,10 +89,15 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p } } + const unsigned int num_elems_read_per_iteration = is_data_type_fixed_point(data_type) ? 1 : 8; + // Configure window - Window win = calculate_max_window(*input->info(), Steps()); + Window win = calculate_max_window(*input->info(), Steps(num_elems_read_per_iteration)); + + // Update window and padding just for the input tensor as we cannot access out-of-bounds elements in the output one + AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration); + update_window_and_padding(win, input_access); - // The CLCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped Coordinates coord; coord.set_num_dimensions(output->info()->num_dimensions()); output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); -- cgit v1.2.1