diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2021-12-10 16:17:56 +0000 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2022-07-13 14:47:44 +0000 |
commit | 4bfc70e31766587c951204c93a127a486e007d0c (patch) | |
tree | 198b2150c43b14c571c100b8dfa0d3aaa4c968d0 /src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp | |
parent | 29cab36ddd73c174bf6b2de453663aa49c1cc576 (diff) | |
download | ComputeLibrary-4bfc70e31766587c951204c93a127a486e007d0c.tar.gz |
Add Gemm MMUL Reshaped Only Rhs Support for FP32/FP16
This patch introduces a GEMM routine that is optimized for Arm(R) Mali(TM)-G715 and Arm(R) Mali(TM)-G615
Resolves: COMPMID-5216
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I2e5d7806f5904347185bb3e250f73d73d6669dba
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7914
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp')
-rw-r--r-- | src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp index 1bf27ba277..67da06102d 100644 --- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp +++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -110,6 +110,23 @@ Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, return Status{}; } + +bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b, + const DataType data_type, unsigned int &best_m0, unsigned int &best_n0) +{ + ARM_COMPUTE_UNUSED(n, k, b, data_type); + + const unsigned int mmul_k0 = 4; + best_m0 = 4; + best_n0 = 4; + + const unsigned int ceil_to_multiple_m_m0 = ceil_to_multiple(m, best_m0); + const unsigned int m_div_m0 = ceil_to_multiple_m_m0 / best_m0; + const unsigned int ceil_to_multiple_m_div_m0_mmul_k0 = ceil_to_multiple(m_div_m0, mmul_k0); + const unsigned int gws_y = ceil_to_multiple_m_div_m0_mmul_k0 / mmul_k0; + + return ((k % mmul_k0) == 0) && (gws_y > 4); +} } // namespace gemm } // namespace kernels } // namespace opencl |