aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLGEMM.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-22 16:37:12 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-26 14:46:44 +0000
commit9ae06d4986bc3055f7786c1097b465bd321cf8eb (patch)
treeadb50e965f860893fe83e3937026056bf1f054c9 /src/runtime/CL/functions/CLGEMM.cpp
parent5f91041aef3eb7373d5d2cebcbe60f279da85904 (diff)
downloadComputeLibrary-9ae06d4986bc3055f7786c1097b465bd321cf8eb.tar.gz
COMPMID-3925: Dispatch CLGEMM with no padding y requirement
- Add has_pad_y flag in GEMMKernelInfo - Skip reinterpret as 3D in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel if has_pad_y = false - Add test to validate CLGEMMMatrixMultiplyReshapedOnlyRHSkernel with had_pad_y = false/true - Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel to run with has_pad_y = false/true in CLGEMM - Check if the lhs/dst tensors have pad y. If not, run CLGEMMMatrixMultiplyReshapedOnlyRHSKernel without padding requirement Change-Id: I68bb43389789736d676b899ac7c77fd9138babaf Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4248 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLGEMM.cpp')
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp52
1 files changed, 39 insertions, 13 deletions
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index ccae6713a6..80c5496ede 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -60,11 +60,15 @@ CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *
_reshape_rhs_kernel_managed(),
_mm_reshaped_kernel(),
_mm_reshaped_only_rhs_kernel(),
+ _mm_reshaped_only_rhs_fallback_kernel(),
_tmp_a(),
_tmp_b(),
_original_b(nullptr),
+ _lhs(nullptr),
+ _dst(nullptr),
_reshape_b_only_on_first_run(false),
_is_prepared(false),
+ _has_pad_y(false),
_gemm_kernel_type(CLGEMMKernelType::NATIVE_V1)
{
}
@@ -295,16 +299,8 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);
ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());
- unsigned int m_internal = m;
- unsigned int b_internal = batch_size;
- if(reinterpret_input_as_3d)
- {
- m_internal = a->info()->dimension(1);
- b_internal = a->info()->dimension(2);
- }
-
// Configure lhs_info and rhs_info
- std::tie(lhs_info, rhs_info) = gemm_config->configure(m_internal, n, k, b_internal, data_type);
+ std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
@@ -317,9 +313,18 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
_reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
}
- // Configure and tune matrix multiply kernel
+ // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)
+ // During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have
+ // pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false
+
+ // Configure matrix multiply kernel with no y padding support
+ kernel_info.has_pad_y = false;
_mm_reshaped_only_rhs_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ // Configure matrix multiply kernel with y padding support
+ kernel_info.has_pad_y = true;
+ _mm_reshaped_only_rhs_fallback_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+
if(!_reshape_b_only_on_first_run && use_mm_b)
{
_tmp_b.allocator()->allocate();
@@ -493,6 +498,10 @@ Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));
// Validate matrix multiply
+ kernel_info.has_pad_y = false;
+ ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
+
+ kernel_info.has_pad_y = true;
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
return Status{};
@@ -514,6 +523,8 @@ void CLGEMM::configure(const CLCompileContext &compile_context, const ICLTensor
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
_is_prepared = gemm_info.retain_internal_weights();
_original_b = b;
+ _lhs = a;
+ _dst = output;
// Get the GPU target
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
@@ -608,7 +619,6 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
void CLGEMM::run()
{
prepare();
-
MemoryGroupResourceScope scope_mg(_memory_group);
// Run matrix multiply kernel
@@ -675,8 +685,14 @@ void CLGEMM::run()
CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
}
}
-
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true);
+ if(_has_pad_y)
+ {
+ CLScheduler::get().enqueue(_mm_reshaped_only_rhs_fallback_kernel, true);
+ }
+ else
+ {
+ CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true);
+ }
break;
}
default:
@@ -690,6 +706,16 @@ void CLGEMM::prepare()
{
if(!_is_prepared)
{
+ // In case of RESHAPED_ONLY_RHS, we need to check the padding requirement
+ if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED_ONLY_RHS)
+ {
+ // Check if the lhs or dst tensors have padding
+ const unsigned int cross_plane_pad_lhs = _lhs->info()->padding().top + _lhs->info()->padding().bottom;
+ const unsigned int cross_plane_pad_dst = _dst->info()->padding().top + _dst->info()->padding().bottom;
+
+ _has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);
+ }
+
if(_gemm_kernel_type != CLGEMMKernelType::NATIVE_V1 && _reshape_b_only_on_first_run)
{
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))