From 85cafff0dd99b6f94a77a7d7933682fa7c6a4a70 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Mon, 18 Dec 2023 13:29:31 +0000 Subject: =?UTF-8?q?Add=20Mali=E2=84=A2-G720=20and=20Mali=E2=84=A2-G620=20a?= =?UTF-8?q?s=20GpuTargets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds adds the latest Gpus as Gpu Target and sets up kernel selection heuristics for MatMul to address some nightly issues. Resolves: COMPMID-6766 Change-Id: I29dbb08c5ecfb3fcd63230b0b1675ab557074aca Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10902 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Jakub Sujak Benchmark: Arm Jenkins --- .../kernels/gemm/native/ClGemmNativeKernelConfig.h | 9 ++-- .../gemm/reshaped/ClGemmReshapedKernelConfig.h | 9 ++-- .../ClGemmReshapedOnlyRhsKernelConfig.h | 9 ++-- src/gpu/cl/operators/ClMatMul.cpp | 62 ++++------------------ 4 files changed, 24 insertions(+), 65 deletions(-) (limited to 'src/gpu') diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h index 955bb3c01a..22aa1e2034 100644 --- a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h +++ b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H -#define ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H +#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H +#define ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h" #include "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h" @@ -58,6 +58,7 @@ public: case GPUTarget::BIFROST: return std::make_unique(gpu); case GPUTarget::VALHALL: + case GPUTarget::FIFTHGEN: return std::make_unique(gpu); default: ARM_COMPUTE_ERROR("Not supported GPU target"); @@ -68,4 +69,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /*ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H */ +#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h index 83928b3f4f..6327ee3027 100644 --- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h +++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H -#define ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H +#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H +#define ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h" #include "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h" @@ -56,6 +56,7 @@ public: case GPUTarget::BIFROST: return std::make_unique(gpu); case GPUTarget::VALHALL: + case GPUTarget::FIFTHGEN: return std::make_unique(gpu); default: ARM_COMPUTE_ERROR("Not supported GPU target"); @@ -66,4 +67,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H */ +#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h index e07ad993ed..1f0c5c2d87 100644 --- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h +++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H -#define ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H +#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H +#define ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h" #include "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h" @@ -56,6 +56,7 @@ public: case GPUTarget::BIFROST: return std::make_unique(gpu); case GPUTarget::VALHALL: + case GPUTarget::FIFTHGEN: return std::make_unique(gpu); default: ARM_COMPUTE_ERROR("Not supported GPU target"); @@ -66,4 +67,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H */ +#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp index 43303001d0..28a2aa2540 100644 --- a/src/gpu/cl/operators/ClMatMul.cpp +++ b/src/gpu/cl/operators/ClMatMul.cpp @@ -34,6 +34,7 @@ #include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h" #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h" #include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h" +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h" #include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h" using namespace arm_compute::cl_matmul; @@ -42,57 +43,6 @@ namespace arm_compute { namespace opencl { -namespace -{ -enum class MatMulKernelType -{ - /** Native matrix multiplication for FP types */ - NATIVE_FP, - - /** Native matrix multiplication for quantized types */ - NATIVE_QUANTIZED, - - /** Native matrix multiplication using MMUL extension for FP types */ - NATIVE_MMUL_FP, - - /** Native matrix multiplication using MMUL extension for Quantized types */ - NATIVE_MMUL_QUANTIZED -}; - -MatMulKernelType get_matmul_kernel(const ITensorInfo *lhs, - const ITensorInfo *rhs, - const MatMulInfo &matmul_info, - const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_UNUSED(lhs, rhs, matmul_info, act_info); - - const bool is_quantized = is_data_type_quantized_asymmetric(lhs->data_type()); - const bool is_mmul_supported = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()); - - const int k = matmul_info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x(); - - if (is_quantized) - { - // MMUL kernel works only when K is a multiple of 16 - if (is_mmul_supported && !act_info.enabled() && k % 16 == 0) - { - return MatMulKernelType::NATIVE_MMUL_QUANTIZED; - } - - return MatMulKernelType::NATIVE_QUANTIZED; - } - else - { - // MMUL kernel works only when K is a multiple of 4 - if (is_mmul_supported && !act_info.enabled() && k % 4 == 0) - { - return MatMulKernelType::NATIVE_MMUL_FP; - } - - return MatMulKernelType::NATIVE_FP; - } -} -} // namespace using namespace arm_compute::opencl::kernels; ClMatMul::ClMatMul() @@ -117,7 +67,10 @@ Status ClMatMul::validate(const ITensorInfo *lhs, const MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info); - switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info)) + const auto kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target); + const MatMulKernelType kernel_type = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info); + + switch (kernel_type) { case MatMulKernelType::NATIVE_FP: return ClMatMulNativeKernel::validate(lhs, rhs, nullptr /* bias */, dst, kernel_info, act_info); @@ -149,7 +102,10 @@ void ClMatMul::configure(const CLCompileContext &compile_context, const auto kernel_config = ClMatMulNativeKernelConfigurationFactory::create(gpu_target); const MatMulKernelInfo kernel_info = kernel_config->configure(lhs, rhs, matmul_info); - switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info)) + const auto kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target); + const MatMulKernelType kernel_type = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info); + + switch (kernel_type) { case MatMulKernelType::NATIVE_FP: { -- cgit v1.2.1