From 535fedd992006b671ec194bafdc18246a27121b5 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 4 May 2018 18:52:25 +0100 Subject: COMPMID-1117: TransposeAccessWindow leads to high padding Switches CLGEMMMatrixMultiplyKernel and CLGEMMTranspose1xWKernel to use AccessWindowStatic Change-Id: I21533d4218215d5b8f84b23c603062678eccb1ed Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130244 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 4 +++- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 805a594af6..2761247684 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -126,7 +126,9 @@ inline std::pair validate_and_configure_window(ITensorInfo *inpu win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); AccessWindowRectangle input0_access(input0, 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f); - AccessWindowTranspose input1_access(input1, 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f); + AccessWindowStatic input1_access(input1, 0, 0, + ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), + ceil_to_multiple(input1->dimension(1), num_elems_processed_per_iteration_y)); AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); window_changed = update_window_and_padding(win, input0_access, input1_access, output_access); diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index f69a39e4ad..54abb1cde0 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/AccessWindowTranspose.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -80,7 +81,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen // Configure window in case of configured output if(output->total_size() != 0) { - AccessWindowTranspose output_access(output, 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); + AccessWindowStatic output_access(output, 0, 0, ceil_to_multiple(output->dimension(0), scale_x), output->dimension(1)); window_changed = window_changed || update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->tensor_shape())); } -- cgit v1.2.1