aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels
diff options
context:
space:
mode:
authorFrank Lei <frank.lei@arm.com>2018-02-01 14:47:14 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:40 +0000
commit4406fd6cc4abded564d3791324e1f48bdfd34273 (patch)
tree22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/core/GLES_COMPUTE/kernels
parent898d399a0f62c15612a52df4bff5018e783214e4 (diff)
downloadComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz
APPBROWSER-391: Fix GLES COMPUTE alignment issues
APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp18
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp18
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp11
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp29
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp20
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp2
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp12
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp21
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp29
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp26
10 files changed, 130 insertions, 56 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
index b8672c662d..d7c645d09d 100644
--- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -109,16 +109,26 @@ void GCActivationLayerKernel::run(const Window &window)
_kernel.use();
- Window slice = window.first_slice_window_3D();
+ _output->set_needs_shifting(true);
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_in = window.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
+ if(_input == _output)
+ {
+ slice_in.shift(Window::DimX, -(_input->info()->padding()).left);
+ }
do
{
unsigned int idx = 0;
unsigned int binding = 1;
- add_3D_tensor_argument(idx, _input, binding++, slice);
+ add_3D_tensor_argument(idx, _input, binding++, slice_in);
add_3D_tensor_argument(idx, _output, binding++, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
index caec324de2..06cf40990c 100644
--- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -135,18 +135,24 @@ void GCArithmeticAdditionKernel::run(const Window &window)
_kernel.use();
- Window slice = window.first_slice_window_2D();
+ _output->set_needs_shifting(true);
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_in = window.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
do
{
unsigned int idx = 0;
unsigned int binding = 1; // SSBO binding starts from 1.
- add_2D_tensor_argument(idx, _input1, binding++, slice);
- add_2D_tensor_argument(idx, _input2, binding++, slice);
- add_2D_tensor_argument(idx, _output, binding++, slice);
+ add_3D_tensor_argument(idx, _input1, binding++, slice_in);
+ add_3D_tensor_argument(idx, _input2, binding++, slice_in);
+ add_3D_tensor_argument(idx, _output, binding++, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window.slide_window_slice_2D(slice));
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
index a41b62fbab..cd93f6997e 100644
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
@@ -119,7 +119,10 @@ void GCBatchNormalizationLayerKernel::run(const Window &window)
_kernel.use();
- Window slice = window.first_slice_window_3D();
+ _output->set_needs_shifting(true);
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_in = window.first_slice_window_3D();
Window vector_slice = window.first_slice_window_1D();
vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
@@ -130,14 +133,16 @@ void GCBatchNormalizationLayerKernel::run(const Window &window)
add_1D_tensor_argument(idx, _beta, 5, vector_slice);
add_1D_tensor_argument(idx, _gamma, 6, vector_slice);
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
do
{
idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice);
+ add_3D_tensor_argument(idx, _input, 1, slice_in);
add_3D_tensor_argument(idx, _output, 2, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
index 7b1848c32b..36d1b29bba 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
@@ -38,7 +38,7 @@
using namespace arm_compute;
GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel()
- : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0)
+ : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
{
}
@@ -61,8 +61,9 @@ void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned i
ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
- _input = input;
- _output = output;
+ _input = input;
+ _output = output;
+ _depth_offset = depth_offset;
// Add build options
std::set<std::string> build_opts;
@@ -76,11 +77,8 @@ void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned i
_left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
_top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
- const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2];
-
- build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right));
- build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom));
- build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes));
+ build_opts.emplace("#define OFFSET_X " + support::cpp11::to_string(_left_right));
+ build_opts.emplace("#define OFFSET_Y " + support::cpp11::to_string(_top_bottom));
// Create kernel
_kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
@@ -118,17 +116,24 @@ void GCDepthConcatenateLayerKernel::run(const Window &window)
_kernel.use();
- Window slice = window.first_slice_window_3D();
+ _output->set_needs_shifting(true);
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_in = window.first_slice_window_3D();
+ Window slice_out = window.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+ slice_out.set(Window::DimZ, Window::Dimension(_depth_offset));
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice);
- add_3D_tensor_argument(idx, _output, 2, slice);
+ add_3D_tensor_argument(idx, _input, 1, slice_in);
+ add_3D_tensor_argument(idx, _output, 2, slice_out);
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
index 28b5bd2d62..9343268d9e 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -173,16 +173,20 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con
const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
// Calculate input right and bottom border
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- const int padding_right = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + 2), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_left - input_width;
- const int padding_bottom = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + 2), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_top - input_height;
+ const int input_width = input->info()->dimension(0);
+ const int input_height = input->info()->dimension(1);
+
+ const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_left)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_left));
+ const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_top)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_top));
+
+ const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_left;
+ const int input_padding_bottom = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_top;
BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
- AccessWindowStatic input_access(input->info(), -_conv_pad_left, -_conv_pad_top, input_width + padding_right, input_height + padding_bottom);
+ AccessWindowStatic input_access(input->info(), -_conv_pad_left, -_conv_pad_top, input_width + input_padding_right, input_height + input_padding_bottom);
AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1);
@@ -224,6 +228,8 @@ void GCDepthwiseConvolutionLayer3x3Kernel::run(const Window &window)
_kernel.use();
+ _output->set_needs_shifting(true);
+
// Create input window and adjust
Window win_in = window;
win_in.adjust(Window::DimX, -_conv_pad_left, true);
@@ -246,6 +252,8 @@ void GCDepthwiseConvolutionLayer3x3Kernel::run(const Window &window)
add_1D_tensor_argument(idx, _biases, 4, slice_biases);
}
+ slice_out.shift(Window::DimX, -(_output->info()->padding()).left);
+
do
{
unsigned int idx = 0;
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 1b94626356..bef30d5042 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -394,6 +394,8 @@ void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
_kernel.use();
+ _output->set_needs_shifting(true);
+
// Get initial windows
Window slice = window.first_slice_window_3D();
Window win_in = window;
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
index bc9c7eb55a..fac29024e3 100644
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -89,6 +89,8 @@ void GCNormalizePlanarYUVLayerKernel::run(const Window &window)
_kernel.use();
+ _output->set_needs_shifting(true);
+
Window slice = window.first_slice_window_3D();
Window slice_in;
@@ -100,15 +102,19 @@ void GCNormalizePlanarYUVLayerKernel::run(const Window &window)
add_1D_tensor_argument(idx, _mean, 3, slice_in);
add_1D_tensor_argument(idx, _sd, 4, slice_in);
+ slice_in = window.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
do
{
idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice);
+ add_3D_tensor_argument(idx, _input, 1, slice_in);
add_3D_tensor_argument(idx, _output, 2, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window.slide_window_slice_3D(slice));
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index c688cd4567..3a0944cd48 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -198,11 +198,14 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
const int output_height = output->dimension(1);
const int output_padding_right = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width;
const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height;
- const int input_padding_right = ceil_to_multiple(input_width + 2 * border_size.right, num_elems_processed_per_iteration) - (input_width + 2 * border_size.right);
- const int input_padding_bottom = ceil_to_multiple(input_height + 2 * border_size.bottom, 1) - (input_height + 2 * border_size.bottom);
+
+ const int input_total_width = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x));
+ const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x;
+ const int input_total_height = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y));
+ const int input_padding_bottom = input_total_height - input_height - pool_pad_y;
// Configure kernel window
- AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right + input_padding_right, input_height + border_size.bottom + input_padding_bottom);
+ AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom);
AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
bool window_changed = update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
@@ -340,13 +343,19 @@ void GCPoolingLayerKernel::run(const Window &window)
_kernel.use();
+ _output->set_needs_shifting(true);
+
Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = window_collapsed.first_slice_window_3D();
+
+ Window slice = window_collapsed.first_slice_window_3D();
+ Window slice_in_orig = window_collapsed.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
do
{
// Upsample input by pool size
- Window in_slice(slice); // NOLINT
+ Window in_slice(slice_in_orig); // NOLINT
in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
@@ -358,5 +367,5 @@ void GCPoolingLayerKernel::run(const Window &window)
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window_collapsed.slide_window_slice_3D(slice));
+ while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig));
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
index f307cfb239..46d7ff9172 100644
--- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -128,9 +128,34 @@ void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, Interpo
IGCKernel::configure(win);
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the tensor parameters
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the tensor parameters
_kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(0)));
_kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(1)));
_kernel.set_argument<float>(idx++, wr);
_kernel.set_argument<float>(idx++, hr);
}
+
+void GCScaleKernel::run(const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ _kernel.use();
+
+ _output->set_needs_shifting(true);
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_in = window.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, 1, slice_in);
+ add_3D_tensor_argument(idx, _output, 2, slice);
+ _kernel.update_shader_params();
+ enqueue(*this, slice);
+ }
+ while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
index c2182171a6..21946b7f8d 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
@@ -39,7 +39,7 @@ using namespace arm_compute;
using namespace arm_compute::gles_compute;
GCTensorShiftKernel::GCTensorShiftKernel()
- : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U))
+ : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)), _left_padding(0)
{
}
@@ -59,18 +59,18 @@ void GCTensorShiftKernel::configure(IGCTensor *input)
options.emplace(("#define " + dt_name));
unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right;
- unsigned int num_elems_written_per_iteration_y = 1;
- unsigned int num_elems_written_per_iteration_z = 1;
std::stringstream kernel_name;
kernel_name << "tensorshift";
_kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
- Window win = calculate_max_enlarged_window(*input->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_written_per_iteration_x);
+ Window win;
+ win.set(Window::DimX, Window::Dimension(0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_x));
+ win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimY);
+ win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimZ);
- update_window_and_padding(win, input_access);
+ _left_padding = _input->info()->padding().left;
IGCKernel::configure(win);
}
@@ -80,6 +80,11 @@ void GCTensorShiftKernel::run(const Window &window)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+ if(int(_left_padding) == 0 || !_input->needs_shifting())
+ {
+ return;
+ }
+
_kernel.use();
// Get initial windows
@@ -92,14 +97,7 @@ void GCTensorShiftKernel::run(const Window &window)
add_3D_tensor_argument(idx, _input, 1, slice);
- const PaddingSize &padding1 = _input->info()->padding();
-
- if(int(padding1.left) == 0)
- {
- break;
- }
-
- _kernel.set_argument(idx++, static_cast<unsigned int>(padding1.left));
+ _kernel.set_argument(idx++, static_cast<unsigned int>(_left_padding));
_kernel.update_shader_params();
enqueue(*this, slice, _lws);