aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-10-23 15:23:23 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commitc55beee7ef70fa08a5d217619083b288a74fcb27 (patch)
treed4186463153286c0d76f84ea538005e0d5f81b3d /src/core/CL/kernels
parentf5ec981e27633cbc34b36a537d41a6e629bc2fc4 (diff)
downloadComputeLibrary-c55beee7ef70fa08a5d217619083b288a74fcb27.tar.gz
COMPMID-1029: Collapse CLWinogradInputTransform/CLWinogradOutputTransform
Change-Id: I051748502ca24b9952e7313524bbfd708162efb4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/155166 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLWinogradInputTransformKernel.cpp38
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp14
2 files changed, 37 insertions, 15 deletions
diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
index 04067319b0..f76ade1d32 100644
--- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
@@ -116,6 +116,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D output_tile_size = winograd_info.output_tile_size;
const Size2D kernel_size = winograd_info.kernel_size;
+ const DataLayout data_layout = input->info()->data_layout();
const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
@@ -124,7 +125,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
const int num_elements_x = input->info()->dimension(idx_w) - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right();
const int num_elements_y = input->info()->dimension(idx_h) - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom();
- if(input->info()->data_layout() == DataLayout::NCHW)
+ if(data_layout == DataLayout::NCHW)
{
// Check if we need to extend the right or bottom border
const unsigned int extra_border_right = ((num_elements_x % output_tile_size.width) == 0) ? 0u : static_cast<unsigned int>(output_tile_size.width - 1);
@@ -164,12 +165,16 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(winograd_info.kernel_size.height == 1, "-DWINOGRAD_INPUT_TRANSFORM_HORIZONTAL");
build_opts.add_option_if(winograd_info.kernel_size.width == 1, "-DWINOGRAD_INPUT_TRANSFORM_VERTICAL");
-
- if(input->info()->data_layout() == DataLayout::NHWC)
+ if(data_layout == DataLayout::NHWC)
{
+ build_opts.add_option("-DNUM_TILES_Y=" + support::cpp11::to_string(_num_tiles_y));
build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1)));
build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2)));
}
+ else
+ {
+ build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(_input->info()->dimension(2)));
+ }
// Create kernel
std::string kernel_name = "winograd_input_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string();
@@ -178,7 +183,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
const unsigned int tile_max_dim = std::max(output_tile_size.width, output_tile_size.height);
// Check optimized kernel if output_dims == 2x2
- if((tile_max_dim == 2) && (input->info()->data_layout() == DataLayout::NCHW))
+ if((tile_max_dim == 2) && (data_layout == DataLayout::NCHW))
{
_step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2;
}
@@ -186,7 +191,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
// Append stepz and data layout
kernel_name += "_stepz";
kernel_name += support::cpp11::to_string(_step_z);
- kernel_name += "_" + lower_string(string_from_data_layout(input->info()->data_layout()));
+ kernel_name += "_" + lower_string(string_from_data_layout(data_layout));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
@@ -223,17 +228,30 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- const size_t idx_w = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+ const DataLayout data_layout = _input->info()->data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const size_t total_batches = window.shape().total_size_upper(3);
- Window slice = window.first_slice_window_3D();
+ // Collapse window
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+
+ Window slice = window_collapsed.first_slice_window_3D();
slice.set(idx_w, Window::Dimension(0, _num_tiles_x, 1));
slice.set(idx_h, Window::Dimension(0, _num_tiles_y, 1));
+ if(data_layout == DataLayout::NHWC)
+ {
+ slice.set(idx_h, Window::Dimension(0, _num_tiles_y * total_batches, 1));
+ }
ARM_COMPUTE_ERROR_ON(((slice[idx_c].end() - slice[idx_c].start()) % _step_z) != 0);
slice.set(idx_c, Window::Dimension(slice[idx_c].start(), slice[idx_c].end(), _step_z));
+ unsigned int idx = 2 * num_arguments_per_3D_tensor();
+ _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
+ _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
+
do
{
unsigned int idx = 0;
@@ -242,5 +260,5 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue
enqueue(queue, *this, slice, lws_hint());
}
- while(window.slide_window_slice_3D(slice));
+ while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 75988c6ca1..dc0a0e7f8f 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -165,6 +165,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC
build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width));
build_opts.add_option("-DOUTPUT_TILE_H=" + support::cpp11::to_string(output_tile_size.height));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(_input->info()->dimension(2)));
build_opts.add_option_if(winograd_info.kernel_size.height == 1, "-DWINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL");
build_opts.add_option_if(winograd_info.kernel_size.width == 1, "-DWINOGRAD_OUTPUT_TRANSFORM_VERTICAL");
@@ -206,8 +207,11 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+ // Collapse window
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+
// Get initial windows
- Window slice = window.first_slice_window_3D();
+ Window slice = window_collapsed.first_slice_window_4D();
slice.set(Window::DimZ, Window::Dimension(0, 1, 1));
// Setup output slice
@@ -217,7 +221,7 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
if(_bias != nullptr)
{
- unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
+ unsigned int idx1 = 2 * num_arguments_per_4D_tensor();
Window slice_biases;
slice_biases.use_tensor_dimensions(_bias->info()->tensor_shape());
add_1D_tensor_argument(idx1, _bias, slice_biases);
@@ -225,15 +229,15 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
if(_output->info()->data_layout() == DataLayout::NHWC)
{
- unsigned int idx2 = 2 * num_arguments_per_3D_tensor() + ((_bias != nullptr) ? num_arguments_per_1D_tensor() : 0);
+ unsigned int idx2 = 2 * num_arguments_per_4D_tensor() + ((_bias != nullptr) ? num_arguments_per_1D_tensor() : 0);
_kernel.setArg(idx2, static_cast<int>(_output->info()->total_size() - _output->info()->strides_in_bytes().y()));
}
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice_out);
+ add_4D_tensor_argument(idx, _input, slice);
+ add_4D_tensor_argument(idx, _output, slice_out);
enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out));