aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2021-02-04 13:12:19 +0000
committerSiCong Li <sicong.li@arm.com>2021-02-09 13:45:09 +0000
commitbbd8fac8e0cd6a403ddb6262be84f15a25f5cb3e (patch)
treeec913ffd4e703ddb953bf3db5c41ee8bab2fc9d8
parentbd8b1e2246226c665ec9f4cc36d9b63399f7ac4e (diff)
downloadComputeLibrary-bbd8fac8e0cd6a403ddb6262be84f15a25f5cb3e.tar.gz
Integrate MLGO into CLGEMM and CLGEMMLowpMatrixMultiplyCore: Part4
Apply cl_gemm::auto_heuristics to CLGEMMLowpMatrixMultiplyCore for the selection of gemm config reshaped only rhs and gemm kernel type Resolves: COMPMID-3843, COMPMID-3844 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I351c76b052a1e52acec23a217bb111da8e40518e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4992 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp83
1 files changed, 67 insertions, 16 deletions
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 4bf5bde61e..6c4d9ef54a 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Log.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
@@ -44,6 +45,8 @@
#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
+#include "utils/TypePrinter.h"
namespace arm_compute
{
@@ -52,19 +55,61 @@ using namespace arm_compute::cl_gemm;
namespace
{
-inline bool is_gemm_reshaped(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run)
+inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output,
+ unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d)
{
- std::unique_ptr<ICLGEMMKernelSelection> gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target());
- ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_kernel.get());
+ // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
+ TensorInfo tmp_b_info{};
+ // Validate reshape RHS kernel
+ auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
+ if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
+ {
+ return false;
+ }
+ // Validate mm kernel
+ // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info
+ // NOTE: This assumes:
+ // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments).
+ // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window).
+ GEMMKernelInfo gemm_kernel_info;
+ gemm_kernel_info.m = m;
+ gemm_kernel_info.n = n;
+ gemm_kernel_info.k = k;
+ gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
+ gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
+ gemm_kernel_info.lhs_info = lhs_info;
+ gemm_kernel_info.rhs_info = rhs_info;
+ // Since we ignore the output stage, output data type has to be S32 to pass the validation
+ TensorInfo output_info_copy(*output);
+ output_info_copy.set_data_type(DataType::S32);
+ if(!bool(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info)))
+ {
+ return false;
+ }
+ return true;
+}
- CLGEMMKernelSelectionParams params;
- params.m = m;
- params.n = n;
- params.k = k;
- params.is_rhs_constant = reshape_b_only_on_first_run;
- params.data_type = data_type;
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d,
+ const ITensorInfo *a,
+ const ITensorInfo *b, const ITensorInfo *output)
+{
+ auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
+ if(config)
+ {
+ if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d))
+ {
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+ return { config.lhs_info, config.rhs_info };
+ }
+ }
+ config = select_default_gemm_config_reshaped_only_rhs(query);
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+ return { config.lhs_info, config.rhs_info };
+}
- switch(gemm_kernel->select_kernel(params))
+inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
+{
+ switch(kernel_type)
{
case CLGEMMKernelType::NATIVE:
return false;
@@ -151,7 +196,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
// Check if we need to reshape the matrix A and matrix B
- _is_gemm_reshaped = is_gemm_reshaped(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run);
+ _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->info()->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run));
if(_convert_to_qasymm8)
{
@@ -173,8 +218,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
}
// Pick up the GEMM configuration
- // Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED doesn't matter, since it only affect the shape configuration
- std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
+ // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
+ std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d,
+ depth_output_gemm3d,
+ a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
// Configure reshape RHS kernel
_mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
@@ -344,7 +391,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- bool reshape_matrix_b = is_gemm_reshaped(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run());
+ bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run()));
const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
@@ -363,7 +410,11 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
matrix_b_info = &tmp_b_info;
// Pick up the GEMM configuration
- std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
+ // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
+ // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
+ const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
+ lhs_info = res.lhs_info;
+ rhs_info = res.rhs_info;
// Validate reshape RHS kernel
auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));