aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
diff options
context:
space:
mode:
authorFrank Lei <frank.lei@arm.com>2018-02-01 14:47:14 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:40 +0000
commit4406fd6cc4abded564d3791324e1f48bdfd34273 (patch)
tree22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
parent898d399a0f62c15612a52df4bff5018e783214e4 (diff)
downloadComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz
APPBROWSER-391: Fix GLES COMPUTE alignment issues
APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp21
1 files changed, 15 insertions, 6 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index c688cd4567..3a0944cd48 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -198,11 +198,14 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
const int output_height = output->dimension(1);
const int output_padding_right = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width;
const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height;
- const int input_padding_right = ceil_to_multiple(input_width + 2 * border_size.right, num_elems_processed_per_iteration) - (input_width + 2 * border_size.right);
- const int input_padding_bottom = ceil_to_multiple(input_height + 2 * border_size.bottom, 1) - (input_height + 2 * border_size.bottom);
+
+ const int input_total_width = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x));
+ const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x;
+ const int input_total_height = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y));
+ const int input_padding_bottom = input_total_height - input_height - pool_pad_y;
// Configure kernel window
- AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right + input_padding_right, input_height + border_size.bottom + input_padding_bottom);
+ AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom);
AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
bool window_changed = update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
@@ -340,13 +343,19 @@ void GCPoolingLayerKernel::run(const Window &window)
_kernel.use();
+ _output->set_needs_shifting(true);
+
Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = window_collapsed.first_slice_window_3D();
+
+ Window slice = window_collapsed.first_slice_window_3D();
+ Window slice_in_orig = window_collapsed.first_slice_window_3D();
+
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
do
{
// Upsample input by pool size
- Window in_slice(slice); // NOLINT
+ Window in_slice(slice_in_orig); // NOLINT
in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
@@ -358,5 +367,5 @@ void GCPoolingLayerKernel::run(const Window &window)
_kernel.update_shader_params();
enqueue(*this, slice);
}
- while(window_collapsed.slide_window_slice_3D(slice));
+ while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig));
}