From 2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 9 Apr 2019 12:03:05 +0100 Subject: COMPMID-2110: Enable CLGEMMLowpMatrixMultiplyReshapeOnlyRHSKernel in CLGEMMLowp Change-Id: Ic32c803c3e2a067de10a7e46c85c962a970957b6 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/969 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/gemmlowp.cl | 2 +- .../CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp | 14 +++----------- .../CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index cf377e1114..033b4b4942 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -4006,4 +4006,4 @@ __kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src // Store the result vstore4(res, 0, dst_addr); } -#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET) +#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET) \ No newline at end of file diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp index f696f0b253..483bab832f 100644 --- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp @@ -133,21 +133,13 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi { if(m == 1) { - if(n > 2048) - { - const unsigned int h0 = std::max(n / 4, static_cast(1)); - return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true); - } - else - { - const unsigned int h0 = std::max(n / 2, static_cast(1)); - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true); - } + const unsigned int h0 = std::max(n / 2, static_cast(1)); + return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true); } else { const unsigned int h0 = std::max(n / 4, static_cast(1)); - return configure_lhs_rhs_info(m, n, 4, 1, 16, 1, h0, false, true, false, true); + return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, h0, false, true, false, true); } } } diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp index b1b0a16b5d..eca24169b9 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -152,7 +152,7 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe window_changed = update_window_and_padding(win, input0_access, input1_access) || // window used by the execute_window_loop update_window_and_padding(win_out, output_access); // window used to update the padding requirements of output tensor - output_access.set_valid_region(win_out, ValidRegion(Coordinates(0, 0), output->tensor_shape())); + output_access.set_valid_region(win_out, ValidRegion(Coordinates(), output->tensor_shape())); // Collapse along the Z direction // This collapse needs to be here in order to tune the Z dimension of LWS -- cgit v1.2.1