From 868282a282445e4bc48be3535e303d7e12d9a3af Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 9 Jan 2020 16:45:46 +0000 Subject: COMPMID-2819: Retain layout during configuration for multiple functions. Signed-off-by: Georgios Pinitas Change-Id: Ia528762dc5a93bebfd8fd037bf1f4e75d0b8a6de Reviewed-on: https://review.mlplatform.org/c/2566 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 3 +- .../CL/kernels/CLDirectConvolutionLayerKernel.h | 3 +- arm_compute/core/CL/kernels/CLIm2ColKernel.h | 1 + .../core/CL/kernels/CLUpsampleLayerKernel.h | 1 + .../CL/kernels/CLWinogradInputTransformKernel.h | 1 + .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 21 +++++++------- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 33 +++++++++++----------- src/core/CL/kernels/CLIm2ColKernel.cpp | 13 +++++---- src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 12 ++++---- .../CL/kernels/CLWinogradInputTransformKernel.cpp | 32 ++++++++++----------- 10 files changed, 61 insertions(+), 59 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h index 21d026e0a1..2dd20e9588 100644 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,6 +72,7 @@ private: const ICLTensor *_input; ICLTensor *_output; PadStrideInfo _info; + DataLayout _data_layout; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h index 081b01aad3..faf97e45dc 100644 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -94,6 +94,7 @@ public: const ICLTensor *_biases; const ICLTensor *_weights; ICLTensor *_output; + DataLayout _data_layout; BorderSize _border_size; int _conv_stride_x; int _conv_stride_y; diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index 0647f5dcec..00cb416e90 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -103,6 +103,7 @@ public: public: const ICLTensor *_input; ICLTensor *_output; + DataLayout _data_layout; std::pair _convolved_dims; unsigned int _num_elems_processed_per_iteration; Size2D _kernel_dims; diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h index dcd4f1bdb4..c8c69002c4 100644 --- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h @@ -73,6 +73,7 @@ private: const ICLTensor *_input; ICLTensor *_output; Size2D _info; + DataLayout _data_layout; unsigned int _num_elems_processed_per_iteration_input_x; }; } // namespace arm_compute diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h index bc05a0ebf1..30bd3abb43 100644 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h @@ -93,6 +93,7 @@ private: BorderSize _border_size; const ICLTensor *_input; ICLTensor *_output; + DataLayout _data_layout; int _num_tiles_x; int _num_tiles_y; unsigned int _step_z; diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 819e3c910a..cd9552f149 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ using namespace arm_compute; CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel() - : _input(nullptr), _output(nullptr), _info() + : _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN) { } @@ -72,13 +72,14 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _input = input; - _output = output; - _info = info; - // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), info)); + _input = input; + _output = output; + _info = info; + _data_layout = input->info()->data_layout(); + // Create kernel CLBuildOptions build_opts; build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); @@ -99,10 +100,8 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const DataLayout data_layout = _input->info()->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); const int out_start_x = _info.pad_left(); const int out_end_x = _output->info()->dimension(idx_w) - _info.pad_right() + _info.stride().first - 1; @@ -112,7 +111,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu const int out_end_y = _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1; const int out_step_y = _info.stride().second; - switch(data_layout) + switch(_data_layout) { case DataLayout::NCHW: { diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 7b74a5a98c..e7ff7621ff 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -400,7 +400,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLDirectConvolutionLayerKernel::CLDirectConvolutionLayerKernel() - : _input(nullptr), _biases(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0) + : _input(nullptr), _biases(nullptr), _weights(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _border_size(0), _conv_stride_x(0), _conv_stride_y(0) { } @@ -413,10 +413,10 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - const DataLayout data_layout = input->info()->data_layout(); - const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + _data_layout = input->info()->data_layout(); + const int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); const unsigned int kernel_size = weights->info()->dimension(width_idx); const DataType data_type = input->info()->data_type(); @@ -442,11 +442,11 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL _conv_stride_x = std::get<0>(conv_info.stride()); _conv_stride_y = std::get<1>(conv_info.stride()); - if(data_layout == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { _border_size = BorderSize(conv_info.pad_left(), 0, conv_info.pad_right(), 0); } - else if(data_layout == DataLayout::NCHW) + else if(_data_layout == DataLayout::NCHW) { _border_size = BorderSize(conv_info.pad_top(), conv_info.pad_right(), conv_info.pad_bottom(), conv_info.pad_left()); } @@ -464,15 +464,15 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL std::stringstream kernel_name; kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size; - if(data_layout == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { - kernel_name << "_" << lower_string(string_from_data_layout(data_layout)); + kernel_name << "_" << lower_string(string_from_data_layout(_data_layout)); } CLBuildOptions build_options; build_options.add_option_if(_biases != nullptr, std::string("-DHAS_BIAS")); - const bool run_optimized_for_bifrost = can_run_optimized_kernel_for_bifrost(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, data_layout); + const bool run_optimized_for_bifrost = can_run_optimized_kernel_for_bifrost(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, _data_layout); if(run_optimized_for_bifrost) { @@ -489,9 +489,9 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type))); build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx)))); build_options.add_option(std::string("-DSTRIDE_X=" + support::cpp11::to_string(_conv_stride_x))); - if(data_layout == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { - const bool run_optimized_for_bifrost_nhwc = can_run_optimized_kernel_for_bifrost_nhwc(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, data_layout); + const bool run_optimized_for_bifrost_nhwc = can_run_optimized_kernel_for_bifrost_nhwc(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, _data_layout); build_options.add_option(std::string("-DDATA_LAYOUT_NHWC=1")); build_options.add_option(std::string("-DDST_HEIGHT=" + support::cpp11::to_string(_output->info()->dimension(height_idx)))); build_options.add_option(std::string("-DDST_WIDTH=" + support::cpp11::to_string(_output->info()->dimension(width_idx)))); @@ -561,7 +561,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(height_idx)); _config_id += "_"; - _config_id += lower_string(string_from_data_layout(data_layout)); + _config_id += lower_string(string_from_data_layout(_data_layout)); } Status CLDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, @@ -585,9 +585,8 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue win_in.adjust(Window::DimX, -_border_size.left, true); win_in.adjust(Window::DimY, -_border_size.top, true); - const DataLayout data_layout = _input->info()->data_layout(); - const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); win_in.set_dimension_step(width_idx, window[width_idx].step() * _conv_stride_x); win_in.set_dimension_step(height_idx, window[height_idx].step() * _conv_stride_y); diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 10d6e68cd9..24f22c31a5 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -287,7 +287,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size } // namespace CLIm2ColKernel::CLIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _kernel_dims(), _conv_info(), _num_groups() + : _input(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _convolved_dims(), _num_elems_processed_per_iteration(1), _kernel_dims(), _conv_info(), _num_groups() { } @@ -297,9 +297,10 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups)); - const DataLayout data_layout = input->info()->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + _data_layout = input->info()->data_layout(); + + const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); const unsigned int input_width = input->info()->dimension(width_idx); const unsigned int input_height = input->info()->dimension(height_idx); @@ -336,7 +337,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(1)); _config_id += "_"; - _config_id += lower_string(string_from_data_layout(input->info()->data_layout())); + _config_id += lower_string(string_from_data_layout(_data_layout)); } Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, @@ -369,7 +370,7 @@ void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) Window slice_in = first_slice_3d; Window slice_out = window_output.first_slice_window_2D(); - if(_input->info()->data_layout() == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { const Window tmp_win = window.collapse_if_possible(ICLKernel::window(), 3); const int num_batches = tmp_win[3].end(); diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index 331b02d2e8..9d2532e306 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -37,7 +37,7 @@ namespace arm_compute { CLUpsampleLayerKernel::CLUpsampleLayerKernel() - : _input(nullptr), _output(nullptr), _info(), _num_elems_processed_per_iteration_input_x() + : _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration_input_x() { } @@ -71,13 +71,12 @@ void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, _input = input; _output = output; _info = info; + _data_layout = input->info()->data_layout(); _num_elems_processed_per_iteration_input_x = 1; - const DataLayout data_layout = input->info()->data_layout(); - TensorShape output_shape = misc::shape_calculator::compute_upsample_shape(*input->info(), info); auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - output->info()->set_data_layout(data_layout); + output->info()->set_data_layout(_data_layout); unsigned int num_elems_processed_per_iteration_x = 16; const int output_width_x = output->info()->dimension(0); @@ -88,7 +87,7 @@ void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, Window win{}; - switch(data_layout) + switch(_data_layout) { case DataLayout::NCHW: { @@ -140,8 +139,7 @@ void CLUpsampleLayerKernel::run(const Window &window, cl::CommandQueue &queue) Window slice_out = collapsed_window.first_slice_window_3D(); Window slice_in = collapsed_window.first_slice_window_3D(); - DataLayout data_layout = _input->info()->data_layout(); - switch(data_layout) + switch(_data_layout) { case DataLayout::NCHW: slice_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _num_elems_processed_per_iteration_input_x)); diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index 1c31ceba99..6125790491 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -99,7 +99,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLWinogradInputTransformKernel::CLWinogradInputTransformKernel() - : _border_size(0), _input(nullptr), _output(nullptr), _num_tiles_x(0), _num_tiles_y(0), _step_z(1) + : _border_size(0), _input(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _num_tiles_x(0), _num_tiles_y(0), _step_z(1) { } @@ -116,16 +116,17 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor const PadStrideInfo conv_info = winograd_info.convolution_info; const Size2D output_tile_size = winograd_info.output_tile_size; const Size2D kernel_size = winograd_info.kernel_size; - const DataLayout data_layout = input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); + _data_layout = input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); // Compute number of elements to process in the X and Y direction const int num_elements_x = input->info()->dimension(idx_w) - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); const int num_elements_y = input->info()->dimension(idx_h) - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom(); - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { // Check if we need to extend the right or bottom border const unsigned int extra_border_right = ((num_elements_x % output_tile_size.width) == 0) ? 0u : static_cast(output_tile_size.width - 1); @@ -166,7 +167,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(winograd_info.kernel_size.height == 1, "-DWINOGRAD_INPUT_TRANSFORM_HORIZONTAL"); build_opts.add_option_if(winograd_info.kernel_size.width == 1, "-DWINOGRAD_INPUT_TRANSFORM_VERTICAL"); - if(data_layout == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { build_opts.add_option_if(total_batches > 1, "-DNUM_TILES_Y=" + support::cpp11::to_string(_num_tiles_y)); build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1))); @@ -184,7 +185,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor const unsigned int tile_max_dim = std::max(output_tile_size.width, output_tile_size.height); // Check optimized kernel if output_dims == 2x2 - if((tile_max_dim == 2) && (data_layout == DataLayout::NCHW)) + if((tile_max_dim == 2) && (_data_layout == DataLayout::NCHW)) { _step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2; } @@ -192,7 +193,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor // Append stepz and data layout kernel_name += "_stepz"; kernel_name += support::cpp11::to_string(_step_z); - kernel_name += "_" + lower_string(string_from_data_layout(data_layout)); + kernel_name += "_" + lower_string(string_from_data_layout(_data_layout)); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); @@ -212,7 +213,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor _config_id += "_"; _config_id += support::cpp11::to_string(conv_info.pad_top()); _config_id += "_"; - _config_id += lower_string(string_from_data_layout(input->info()->data_layout())); + _config_id += lower_string(string_from_data_layout(_data_layout)); } Status CLWinogradInputTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) @@ -229,11 +230,10 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - const DataLayout data_layout = _input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const size_t total_batches = window.shape().total_size_upper(3); + const size_t idx_w = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); + const size_t total_batches = window.shape().total_size_upper(3); // Collapse window Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); @@ -241,7 +241,7 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue Window slice = window_collapsed.first_slice_window_3D(); slice.set(idx_w, Window::Dimension(0, _num_tiles_x, 1)); slice.set(idx_h, Window::Dimension(0, _num_tiles_y, 1)); - if(data_layout == DataLayout::NHWC) + if(_data_layout == DataLayout::NHWC) { slice.set(idx_h, Window::Dimension(0, _num_tiles_y * total_batches, 1)); } -- cgit v1.2.1