diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-05-04 18:52:25 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:51:17 +0000 |
commit | 535fedd992006b671ec194bafdc18246a27121b5 (patch) | |
tree | c6833dd68b25eda420d73e44cbcf5824c0be4f36 /src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | |
parent | 6cf188c33aa89d2dba5367ad43f1d5f07b46cef2 (diff) | |
download | ComputeLibrary-535fedd992006b671ec194bafdc18246a27121b5.tar.gz |
COMPMID-1117: TransposeAccessWindow leads to high padding
Switches CLGEMMMatrixMultiplyKernel and CLGEMMTranspose1xWKernel to use
AccessWindowStatic
Change-Id: I21533d4218215d5b8f84b23c603062678eccb1ed
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130244
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index f69a39e4ad..54abb1cde0 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -80,7 +81,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen // Configure window in case of configured output if(output->total_size() != 0) { - AccessWindowTranspose output_access(output, 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); + AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), scale_x), output->dimension(1)); window_changed = window_changed || update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->tensor_shape())); } |