diff options
author | Frank Lei <frank.lei@arm.com> | 2018-02-01 14:47:14 +0800 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:40 +0000 |
commit | 4406fd6cc4abded564d3791324e1f48bdfd34273 (patch) | |
tree | 22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp | |
parent | 898d399a0f62c15612a52df4bff5018e783214e4 (diff) | |
download | ComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz |
APPBROWSER-391: Fix GLES COMPUTE alignment issues
APPBROWSER-402: Performance optimization for squeezenet/xray model
Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp')
-rw-r--r-- | src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp index caec324de2..06cf40990c 100644 --- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -135,18 +135,24 @@ void GCArithmeticAdditionKernel::run(const Window &window) _kernel.use(); - Window slice = window.first_slice_window_2D(); + _output->set_needs_shifting(true); + + Window slice = window.first_slice_window_3D(); + Window slice_in = window.first_slice_window_3D(); + + slice.shift(Window::DimX, -(_output->info()->padding()).left); + do { unsigned int idx = 0; unsigned int binding = 1; // SSBO binding starts from 1. - add_2D_tensor_argument(idx, _input1, binding++, slice); - add_2D_tensor_argument(idx, _input2, binding++, slice); - add_2D_tensor_argument(idx, _output, binding++, slice); + add_3D_tensor_argument(idx, _input1, binding++, slice_in); + add_3D_tensor_argument(idx, _input2, binding++, slice_in); + add_3D_tensor_argument(idx, _output, binding++, slice); _kernel.update_shader_params(); enqueue(*this, slice); } - while(window.slide_window_slice_2D(slice)); + while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); } |