From 352c07ddd49842b5c3a8e5a2b5a90832bfb70091 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 3 May 2023 12:21:38 +0100 Subject: =?UTF-8?q?Implement=20OpenCL=20MatMul=20heuristic=20for=20Arm?= =?UTF-8?q?=C2=AE=20Mali=E2=84=A2-G710?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add heuristic for f32/f16 and int8 quantized data types - Include MatMul configuration selection in the CLMatMul operator Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925, COMPMID-5926, COMPMID-5927, COMPMID-5928 Signed-off-by: Gian Marco Iodice Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../ClMatMulNativeDefaultConfigValhall.cpp | 423 +++++++++++++++++++++ .../ClMatMulNativeDefaultConfigValhall.h | 53 +++ .../matmul_native/ClMatMulNativeHelpers.cpp | 116 ++++++ .../matmul_native/ClMatMulNativeHelpers.h | 72 ++++ .../matmul_native/ClMatMulNativeKernelConfig.h | 61 +++ .../matmul_native/IClMatMulNativeKernelConfig.h | 115 ++++++ 6 files changed, 840 insertions(+) create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h create mode 100644 src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h (limited to 'src/runtime') diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp new file mode 100644 index 0000000000..01102b3d60 --- /dev/null +++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/TensorInfo.h" +#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h" +#include + +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h" + +namespace arm_compute +{ +namespace cl_matmul +{ +ClMatMulNativeDefaultConfigValhall::ClMatMulNativeDefaultConfigValhall(GPUTarget gpu) + : IClMatMulNativeKernelConfig(gpu) +{ +} + +MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) +{ + using ConfigurationFunctionExecutorPtr = MatMulKernelInfo (ClMatMulNativeDefaultConfigValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo & info); + + ClMatMulNativeConfigArray configs_G710(&ClMatMulNativeDefaultConfigValhall::configure_G710_f32, + &ClMatMulNativeDefaultConfigValhall::configure_G710_f16, + &ClMatMulNativeDefaultConfigValhall::configure_G710_u8); + + ConfigurationFunctionExecutorPtr func = nullptr; + switch(_target) + { + case GPUTarget::G710: + default: + func = configs_G710.get_function(lhs->data_type()); + break; + } + + const bool adj_lhs = info.adj_lhs(); + const bool adj_rhs = info.adj_rhs(); + + TensorShape lhs_shape = lhs->tensor_shape(); + TensorShape rhs_shape = rhs->tensor_shape(); + + const bool is_batched = lhs_shape.num_dimensions() > 2; + + if(is_batched == true) + { + lhs_shape.collapse_from(2); + } + + const unsigned int m = adj_lhs ? lhs_shape.x() : lhs_shape.y(); + const unsigned int n = adj_rhs ? rhs_shape.y() : rhs_shape.x(); + const unsigned int k = adj_lhs ? lhs_shape.y() : lhs_shape.x(); + const unsigned int b = lhs_shape.z(); + + ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native"); + return (this->*func)(m, n, k, b, rhs->lock_paddings(), info); +} + +MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info) +{ + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = + { + { 3136, 64, 64, 36, 4, 4, 16, 1 }, + { 4096, 48, 32, 36, 4, 4, 4, 1 }, + { 688, 92, 68, 32, 2, 8, 4, 1 }, + { 24, 464, 412, 24, 2, 8, 4, 1 }, + { 112, 184, 144, 28, 4, 4, 16, 1 }, + { 5776, 64, 32, 36, 2, 4, 16, 1 }, + { 1568, 64, 40, 36, 2, 8, 8, 1 }, + { 2920, 64, 64, 24, 4, 4, 16, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = + { + { 3136, 64, 64, 36, 4, 4, 8, 0 }, + { 4096, 48, 32, 36, 4, 4, 8, 0 }, + { 688, 92, 68, 32, 5, 4, 4, 0 }, + { 24, 464, 412, 24, 6, 2, 8, 0 }, + { 112, 184, 144, 28, 6, 4, 4, 0 }, + { 5776, 64, 32, 36, 5, 4, 4, 0 }, + { 1568, 64, 40, 36, 4, 4, 8, 0 }, + { 2920, 64, 64, 24, 4, 4, 8, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = + { + { 3136, 64, 64, 36, 4, 4, 4, 1 }, + { 4096, 48, 32, 36, 2, 2, 16, 1 }, + { 688, 92, 68, 32, 4, 4, 4, 1 }, + { 24, 464, 412, 24, 6, 2, 8, 1 }, + { 112, 184, 144, 28, 4, 2, 16, 1 }, + { 5776, 64, 32, 36, 4, 4, 4, 1 }, + { 1568, 64, 40, 36, 4, 4, 8, 1 }, + { 2920, 64, 64, 24, 4, 4, 4, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = + { + { 3136, 64, 64, 36, 5, 4, 4, 0 }, + { 4096, 48, 32, 36, 5, 4, 4, 0 }, + { 688, 92, 68, 32, 5, 4, 4, 0 }, + { 24, 464, 412, 24, 6, 2, 4, 0 }, + { 112, 184, 144, 28, 5, 4, 4, 0 }, + { 5776, 64, 32, 36, 5, 4, 4, 0 }, + { 1568, 64, 40, 36, 5, 4, 4, 0 }, + { 2920, 64, 64, 24, 6, 2, 4, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = + { + { 3136, 64, 64, 36, 4, 4, 16, 1 }, + { 4096, 48, 32, 36, 4, 4, 4, 1 }, + { 688, 92, 68, 32, 2, 8, 4, 1 }, + { 24, 464, 412, 24, 2, 8, 4, 1 }, + { 112, 184, 144, 28, 4, 4, 16, 1 }, + { 5776, 64, 32, 36, 2, 8, 8, 1 }, + { 1568, 64, 40, 36, 4, 4, 8, 1 }, + { 2920, 64, 64, 24, 4, 4, 16, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = + { + { 3136, 64, 64, 36, 4, 4, 4, 0 }, + { 4096, 48, 32, 36, 4, 4, 4, 0 }, + { 688, 92, 68, 32, 4, 4, 4, 0 }, + { 24, 464, 412, 24, 4, 4, 4, 0 }, + { 112, 184, 144, 28, 4, 4, 4, 0 }, + { 5776, 64, 32, 36, 4, 4, 8, 0 }, + { 1568, 64, 40, 36, 4, 4, 4, 0 }, + { 2920, 64, 64, 24, 4, 4, 4, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = + { + { 3136, 64, 64, 36, 4, 4, 4, 1 }, + { 4096, 48, 32, 36, 4, 4, 4, 1 }, + { 688, 92, 68, 32, 4, 4, 4, 1 }, + { 24, 464, 412, 24, 2, 2, 16, 1 }, + { 112, 184, 144, 28, 4, 4, 4, 1 }, + { 5776, 64, 32, 36, 4, 4, 4, 1 }, + { 1568, 64, 40, 36, 4, 4, 4, 1 }, + { 2920, 64, 64, 24, 4, 4, 4, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = + { + { 3136, 64, 64, 36, 4, 4, 4, 0 }, + { 4096, 48, 32, 36, 4, 4, 4, 0 }, + { 688, 92, 68, 32, 4, 4, 4, 0 }, + { 24, 464, 412, 24, 4, 2, 8, 0 }, + { 112, 184, 144, 28, 4, 4, 4, 0 }, + { 5776, 64, 32, 36, 4, 4, 4, 0 }, + { 1568, 64, 40, 36, 4, 4, 4, 0 }, + { 2920, 64, 64, 24, 4, 4, 4, 0 } + }; + + const bool adj_lhs = info.adj_lhs(); + const bool adj_rhs = info.adj_rhs(); + + const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr; + const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr; + + if((adj_lhs == false) && (adj_rhs == false)) + { + configs_best_to_use = &configs_mnkb_best_nt_nt; + configs_fallback_to_use = &configs_mnkb_fallback_nt_nt; + } + else if((adj_lhs == false) && (adj_rhs == true)) + { + configs_best_to_use = &configs_mnkb_best_nt_t; + configs_fallback_to_use = &configs_mnkb_fallback_nt_t; + } + else if((adj_lhs == true) && (adj_rhs == false)) + { + configs_best_to_use = &configs_mnkb_best_t_nt; + configs_fallback_to_use = &configs_mnkb_fallback_t_nt; + } + else + { + configs_best_to_use = &configs_mnkb_best_t_t; + configs_fallback_to_use = &configs_mnkb_fallback_t_t; + } + + MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b); + MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b); + + return select_info(desc0, + desc1, + m, n, k, b, DataType::F32, rhs_lock_padding); +} + +MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info) +{ + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = + { + { 3136, 64, 64, 36, 4, 4, 16, 1 }, + { 4096, 48, 32, 36, 4, 4, 8, 1 }, + { 688, 92, 68, 32, 4, 4, 16, 1 }, + { 24, 464, 412, 24, 4, 4, 4, 1 }, + { 112, 184, 144, 28, 4, 4, 16, 1 }, + { 5776, 64, 32, 36, 4, 4, 8, 1 }, + { 1568, 64, 40, 36, 4, 4, 8, 1 }, + { 2920, 64, 64, 24, 4, 4, 16, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = + { + { 3136, 64, 64, 36, 6, 4, 8, 0 }, + { 4096, 48, 32, 36, 6, 4, 8, 0 }, + { 688, 92, 68, 32, 6, 4, 8, 0 }, + { 24, 464, 412, 24, 4, 4, 8, 0 }, + { 112, 184, 144, 28, 6, 4, 8, 0 }, + { 5776, 64, 32, 36, 6, 4, 8, 0 }, + { 1568, 64, 40, 36, 6, 4, 8, 0 }, + { 2920, 64, 64, 24, 6, 4, 8, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = + { + { 3136, 64, 64, 36, 6, 4, 8, 1 }, + { 4096, 48, 32, 36, 6, 4, 8, 1 }, + { 688, 92, 68, 32, 4, 4, 4, 1 }, + { 24, 464, 412, 24, 6, 2, 4, 1 }, + { 112, 184, 144, 28, 4, 2, 16, 1 }, + { 5776, 64, 32, 36, 6, 4, 8, 1 }, + { 1568, 64, 40, 36, 6, 4, 8, 1 }, + { 2920, 64, 64, 24, 6, 4, 8, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = + { + { 3136, 64, 64, 36, 6, 2, 16, 0 }, + { 4096, 48, 32, 36, 5, 4, 8, 0 }, + { 688, 92, 68, 32, 6, 2, 16, 0 }, + { 24, 464, 412, 24, 6, 2, 16, 0 }, + { 112, 184, 144, 28, 6, 2, 16, 0 }, + { 5776, 64, 32, 36, 5, 4, 8, 0 }, + { 1568, 64, 40, 36, 5, 4, 8, 0 }, + { 2920, 64, 64, 24, 6, 2, 16, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = + { + { 3136, 64, 64, 36, 4, 4, 16, 1 }, + { 4096, 48, 32, 36, 4, 4, 4, 1 }, + { 688, 92, 68, 32, 4, 4, 4, 1 }, + { 24, 464, 412, 24, 4, 4, 4, 1 }, + { 112, 184, 144, 28, 4, 4, 4, 1 }, + { 5776, 64, 32, 36, 4, 4, 4, 1 }, + { 1568, 64, 40, 36, 4, 4, 4, 1 }, + { 2920, 64, 64, 24, 4, 4, 4, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = + { + { 3136, 64, 64, 36, 4, 4, 4, 0 }, + { 4096, 48, 32, 36, 4, 4, 4, 0 }, + { 688, 92, 68, 32, 4, 4, 4, 0 }, + { 24, 464, 412, 24, 4, 4, 4, 0 }, + { 112, 184, 144, 28, 4, 4, 4, 0 }, + { 5776, 64, 32, 36, 4, 4, 4, 0 }, + { 1568, 64, 40, 36, 4, 4, 4, 0 }, + { 2920, 64, 64, 24, 4, 4, 4, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = + { + { 3136, 64, 64, 36, 4, 4, 16, 1 }, + { 4096, 48, 32, 36, 4, 4, 8, 1 }, + { 688, 92, 68, 32, 4, 4, 4, 1 }, + { 24, 464, 412, 24, 4, 2, 8, 1 }, + { 112, 184, 144, 28, 4, 2, 16, 1 }, + { 5776, 64, 32, 36, 4, 4, 16, 1 }, + { 1568, 64, 40, 36, 4, 4, 8, 1 }, + { 2920, 64, 64, 24, 4, 4, 16, 1 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = + { + { 3136, 64, 64, 36, 4, 4, 8, 0 }, + { 4096, 48, 32, 36, 4, 4, 8, 0 }, + { 688, 92, 68, 32, 4, 4, 8, 0 }, + { 24, 464, 412, 24, 4, 4, 8, 0 }, + { 112, 184, 144, 28, 4, 4, 8, 0 }, + { 5776, 64, 32, 36, 4, 4, 8, 0 }, + { 1568, 64, 40, 36, 4, 4, 8, 0 }, + { 2920, 64, 64, 24, 4, 4, 8, 0 } + }; + + const bool adj_lhs = info.adj_lhs(); + const bool adj_rhs = info.adj_rhs(); + + const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr; + const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr; + + if((adj_lhs == false) && (adj_rhs == false)) + { + configs_best_to_use = &configs_mnkb_best_nt_nt; + configs_fallback_to_use = &configs_mnkb_fallback_nt_nt; + } + else if((adj_lhs == false) && (adj_rhs == true)) + { + configs_best_to_use = &configs_mnkb_best_nt_t; + configs_fallback_to_use = &configs_mnkb_fallback_nt_t; + } + else if((adj_lhs == true) && (adj_rhs == false)) + { + configs_best_to_use = &configs_mnkb_best_t_nt; + configs_fallback_to_use = &configs_mnkb_fallback_t_nt; + } + else + { + configs_best_to_use = &configs_mnkb_best_t_t; + configs_fallback_to_use = &configs_mnkb_fallback_t_t; + } + + MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b); + MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b); + + return select_info(desc0, + desc1, + m, n, k, b, DataType::F16, rhs_lock_padding); +} + +MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info) +{ + ARM_COMPUTE_UNUSED(rhs_lock_padding); + + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = + { + { 3136, 64, 64, 36, 6, 4, 4, 0 }, + { 4096, 48, 32, 36, 6, 4, 4, 0 }, + { 688, 92, 68, 32, 2, 8, 4, 0 }, + { 24, 464, 412, 24, 4, 4, 4, 0 }, + { 112, 184, 144, 28, 6, 4, 4, 0 }, + { 5776, 64, 32, 36, 6, 4, 4, 0 }, + { 1568, 64, 40, 36, 6, 4, 4, 0 }, + { 2920, 64, 64, 24, 5, 4, 4, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = + { + { 3136, 64, 64, 36, 4, 4, 16, 0 }, + { 4096, 48, 32, 36, 4, 4, 16, 0 }, + { 688, 92, 68, 32, 4, 4, 16, 0 }, + { 24, 464, 412, 24, 6, 2, 16, 0 }, + { 112, 184, 144, 28, 4, 4, 16, 0 }, + { 5776, 64, 32, 36, 4, 4, 16, 0 }, + { 1568, 64, 40, 36, 6, 4, 4, 0 }, + { 2920, 64, 64, 24, 4, 4, 16, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = + { + { 3136, 64, 64, 36, 4, 4, 8, 0 }, + { 4096, 48, 32, 36, 4, 4, 8, 0 }, + { 688, 92, 68, 32, 4, 4, 4, 0 }, + { 24, 464, 412, 24, 4, 4, 4, 0 }, + { 112, 184, 144, 28, 4, 4, 8, 0 }, + { 5776, 64, 32, 36, 4, 4, 8, 0 }, + { 1568, 64, 40, 36, 4, 4, 8, 0 }, + { 2920, 64, 64, 24, 4, 4, 8, 0 } + }; + + const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = + { + { 3136, 64, 64, 36, 4, 2, 16, 0 }, + { 4096, 48, 32, 36, 4, 4, 4, 0 }, + { 688, 92, 68, 32, 4, 4, 8, 0 }, + { 24, 464, 412, 24, 4, 2, 16, 0 }, + { 112, 184, 144, 28, 4, 2, 16, 0 }, + { 5776, 64, 32, 36, 4, 4, 4, 0 }, + { 1568, 64, 40, 36, 4, 4, 8, 0 }, + { 2920, 64, 64, 24, 4, 2, 16, 0 } + }; + + const bool adj_lhs = info.adj_lhs(); + const bool adj_rhs = info.adj_rhs(); + + if((adj_lhs == false) && (adj_rhs == false)) + { + return find_info(configs_mnkb_best_nt_nt, adj_lhs, adj_rhs, m, n, k, b); + } + else if((adj_lhs == false) && (adj_rhs == true)) + { + return find_info(configs_mnkb_best_nt_t, adj_lhs, adj_rhs, m, n, k, b); + } + else if((adj_lhs == true) && (adj_rhs == false)) + { + return find_info(configs_mnkb_best_t_nt, adj_lhs, adj_rhs, m, n, k, b); + } + else + { + return find_info(configs_mnkb_best_t_t, adj_lhs, adj_rhs, m, n, k, b); + } +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h new file mode 100644 index 0000000000..fe167d18dd --- /dev/null +++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL +#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL + +#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h" + +namespace arm_compute +{ +namespace cl_matmul +{ +/** Valhall based OpenCL matmul configuration */ +class ClMatMulNativeDefaultConfigValhall final : public IClMatMulNativeKernelConfig +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + ClMatMulNativeDefaultConfigValhall(GPUTarget gpu); + + // Inherited overridden method + MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) override; + +private: + MatMulKernelInfo configure_G710_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info); + MatMulKernelInfo configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info); + MatMulKernelInfo configure_G710_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info); +}; +} // namespace opencl +} // namespace arm_compute +#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL */ diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp new file mode 100644 index 0000000000..b9e0d5adf8 --- /dev/null +++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h" + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h" + +#include +#include + +namespace arm_compute +{ +namespace cl_matmul +{ +MatMulKernelInfo select_info(const MatMulKernelInfo &info0, + const MatMulKernelInfo &info1, + unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool rhs_lock_padding) +{ + ARM_COMPUTE_ERROR_ON_MSG(info1.export_rhs_to_cl_image == true, "The fallback MatMul configuration cannot have export_to_cl_image = true"); + ARM_COMPUTE_ERROR_ON_MSG(info0.adj_lhs != info1.adj_lhs, "The MatMul configurations must have the same adj_lhs value"); + ARM_COMPUTE_ERROR_ON_MSG(info0.adj_rhs != info1.adj_rhs, "The MatMul configurations must have the same adj_rhs value"); + + const bool adj_lhs = info0.adj_lhs; + const bool adj_rhs = info0.adj_rhs; + + TensorInfo lhs_info = !adj_lhs ? TensorInfo(TensorShape(k, m, b), 1, data_type) : TensorInfo(TensorShape(m, k, b), 1, data_type); + TensorInfo rhs_info = !adj_rhs ? TensorInfo(TensorShape(n, k, b), 1, data_type) : TensorInfo(TensorShape(k, n, b), 1, data_type); + TensorInfo dst_info; + + if(rhs_lock_padding == false) + { + if(bool(opencl::kernels::ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &dst_info, info0))) + { + return info0; + } + else + { + return info1; + } + } + else + { + return info1; + } +} + +MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs, bool adj_lhs, bool adj_rhs, unsigned int m, unsigned int n, unsigned int k, unsigned int b) +{ + size_t min_acc = std::numeric_limits::max(); + size_t min_idx = 0; + + ARM_COMPUTE_ERROR_ON(configs.size() == 0); + const size_t num_rows = configs.size(); + const size_t num_cols = configs[0].size(); + + ARM_COMPUTE_ERROR_ON_MSG(num_cols != 8U, "The entry should have 8 integer values representing: M, N, K, B, M0, N0. K0, IMG_RHS"); + ARM_COMPUTE_UNUSED(num_cols); + + // Find nearest GeMM workload + // Note: the workload does not depend on the K dimension + for(size_t y = 0; y < num_rows; ++y) + { + size_t mc0 = static_cast(configs[y][0]); + size_t nc0 = static_cast(configs[y][1]); + size_t kc0 = static_cast(configs[y][2]); + size_t bc0 = static_cast(configs[y][3]); + + size_t acc = 0; + acc += (m - mc0) * (m - mc0); + acc += (n - nc0) * (n - nc0); + acc += (k - kc0) * (k - kc0); + acc += (b - bc0) * (b - bc0); + acc = std::sqrt(acc); + if(acc < min_acc) + { + min_acc = acc; + min_idx = y; + } + } + + // Get the configuration from the nearest GeMM shape + MatMulKernelInfo desc; + desc.adj_lhs = adj_lhs; + desc.adj_rhs = adj_rhs; + desc.m0 = configs[min_idx][4]; + desc.n0 = configs[min_idx][5]; + desc.k0 = configs[min_idx][6]; + desc.export_rhs_to_cl_image = configs[min_idx][7]; + + return desc; +} +} // namespace cl_matmul +} // namespace arm_compute diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h new file mode 100644 index 0000000000..3881617558 --- /dev/null +++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS +#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +// Forward declaration +struct MatMulKernelInfo; + +namespace cl_matmul +{ +using MatMulNativeConfigsMatrix = std::vector>; + +/** This function accepts two MatMulKernelInfo objects where only the first can be with cl_image2d support enabled. + * The aim of this function is to check whether the first MatMulKernelInfo object is valid. If not, the function will + * return the second MatMulKernelInfo object. Otherwise, the first one. + * + * @param[in] info0 MatMulKernelInfo with cl_image2d support + * @param[in] info1 MatMulKernelInfo to fall-back if cl_image2d cannot be used + * @param[in] m Number of rows (M) of the LHS matrix + * @param[in] n Number of columns (N) in the RHS matrix not reshaped + * @param[in] k Number of rows (K) in the RHS matrix not reshaped + * @param[in] b Batch size + * @param[in] data_type Data type + * @param[in] rhs_lock_padding Flag used to know whether the RHS paddings are locked + * + * @return @ref MatMulKernelInfo + */ +MatMulKernelInfo select_info(const MatMulKernelInfo &info0, + const MatMulKernelInfo &info1, + unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool rhs_lock_padding); + +/** Find the preferred configurations for the MatMul Native kernel using the MatMulNativeConfigsMatrix provided by the user + * + * @param[in] configs List of best configurations for a limited number of MatMul shapes + * @param[in] adj_lhs Adjoint LHS flag value + * @param[in] adj_rhs Adjoint RHS flag value + * @param[in] m Number of rows (M) of the LHS matrix + * @param[in] n Number of columns (N) in the RHS matrix not reshaped + * @param[in] k Number of rows (K) in the RHS matrix not reshaped + * @param[in] b Batch size + * + * @return @ref MatMulKernelInfo + */ +MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs, bool adj_lhs, bool adj_rhs, unsigned int m, unsigned int n, unsigned int k, unsigned int b); +} // namespace cl_matmul +} // namespace arm_compute +#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS */ diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h new file mode 100644 index 0000000000..a2dbfc7dd5 --- /dev/null +++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG +#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG + +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h" +#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h" + +#include + +namespace arm_compute +{ +namespace cl_matmul +{ +/** ClMatMul configuration factory class */ +class ClMatMulNativeKernelConfigurationFactory final +{ +public: + /** Static method to call the ClMatMul configuration class accordingly with the GPU target + * + * @param[in] gpu GPU target + * + * @return IClMatMulNativeKernelConfig + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + case GPUTarget::BIFROST: + case GPUTarget::VALHALL: + return std::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace opencl +} // namespace arm_compute +#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG */ diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h new file mode 100644 index 0000000000..ee9b03e63b --- /dev/null +++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG +#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG + +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "src/core/common/Macros.h" + +namespace arm_compute +{ +namespace cl_matmul +{ +/** Basic container for the OpenCL MatMul Native configuration functions */ +template +class ClMatMulNativeConfigArray +{ +public: + /** Alias for F32 index */ + static constexpr size_t DT_F32 = 0; + /** Alias for F16 index */ + static constexpr size_t DT_F16 = 1; + /** Alias for Int8 index */ + static constexpr size_t DT_INT8 = 2; + + /** Constructor + * + * @param[in] func_f32 Function to call for matmul native F32 + * @param[in] func_f16 Function to call for matmul native F16 + * @param[in] func_int8 Function to call for matmul native Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL) + * + */ + ClMatMulNativeConfigArray(T func_f32, T func_f16, T func_int8) + : _configs{ func_f32, func_f16, func_int8 } + { + } + + /** Method to return the matmul native configuration function based on data type + * + * @param[in] data_type Input data type + * + * @return the valid function otherwise it returns nullptr if the data type is not valid + */ + T get_function(DataType data_type) + { + switch(data_type) + { + case DataType::F32: + return _configs.at(DT_F32); + case DataType::F16: + return _configs.at(DT_F16); + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + return _configs.at(DT_INT8); + default: + return nullptr; + } + } + +private: + std::array _configs; +}; + +/** Basic interface for the matmul native kernel configuration */ +class IClMatMulNativeKernelConfig +{ +public: + /** Constructor + * + * @param[in] arch GPU target + */ + IClMatMulNativeKernelConfig(GPUTarget arch) + : _target(arch) + { + } + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClMatMulNativeKernelConfig); + /** Virtual destructor */ + virtual ~IClMatMulNativeKernelConfig() = default; + /** This method returns the @ref MatMulKernelInfo for the given inputs + * + * @param[in] lhs LHS tensor + * @param[in] rhs RHS tensor + * @param[in] info MatMul info + */ + virtual MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) = 0; + +protected: + GPUTarget _target; +}; +} // namespace opencl +} // namespace arm_compute +#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG */ -- cgit v1.2.1