From 4406fd6cc4abded564d3791324e1f48bdfd34273 Mon Sep 17 00:00:00 2001 From: Frank Lei Date: Thu, 1 Feb 2018 14:47:14 +0800 Subject: APPBROWSER-391: Fix GLES COMPUTE alignment issues APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp') diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp index c688cd4567..3a0944cd48 100644 --- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp @@ -198,11 +198,14 @@ std::tuple validate_and_configure_window(ITenso const int output_height = output->dimension(1); const int output_padding_right = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width; const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height; - const int input_padding_right = ceil_to_multiple(input_width + 2 * border_size.right, num_elems_processed_per_iteration) - (input_width + 2 * border_size.right); - const int input_padding_bottom = ceil_to_multiple(input_height + 2 * border_size.bottom, 1) - (input_height + 2 * border_size.bottom); + + const int input_total_width = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x)); + const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x; + const int input_total_height = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y)); + const int input_padding_bottom = input_total_height - input_height - pool_pad_y; // Configure kernel window - AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right + input_padding_right, input_height + border_size.bottom + input_padding_bottom); + AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom); AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); bool window_changed = update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); @@ -340,13 +343,19 @@ void GCPoolingLayerKernel::run(const Window &window) _kernel.use(); + _output->set_needs_shifting(true); + Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_3D(); + + Window slice = window_collapsed.first_slice_window_3D(); + Window slice_in_orig = window_collapsed.first_slice_window_3D(); + + slice.shift(Window::DimX, -(_output->info()->padding()).left); do { // Upsample input by pool size - Window in_slice(slice); // NOLINT + Window in_slice(slice_in_orig); // NOLINT in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration)); in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y)); @@ -358,5 +367,5 @@ void GCPoolingLayerKernel::run(const Window &window) _kernel.update_shader_params(); enqueue(*this, slice); } - while(window_collapsed.slide_window_slice_3D(slice)); + while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig)); } -- cgit v1.2.1