aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators
diff options
context:
space:
mode:
authorramelg01 <ramy.elgammal@arm.com>2021-11-11 10:05:00 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-11-20 17:38:07 +0000
commit9cca592c13f1e688a35698641069bcd37a525f0c (patch)
tree8f69b654c5f543d918ec5d61140af30bbadbd390 /src/gpu/cl/operators
parente330fb41d85d7058f74902ce1d47b2dc00b10a52 (diff)
downloadComputeLibrary-9cca592c13f1e688a35698641069bcd37a525f0c.tar.gz
Improve start-up timer for GeMM (floating-point):
- Pass M,N,K at runtime as kernel parameters - Add a guard macro to compile only kernel of interest - Move reshpaing kernels to gemm_utils.cl - Remove the fallback reshaping kernel with Y-Padding support Resolves: COMPMID-4888 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Change-Id: Ida3851326f0b77e410633271de9ecca106e37931 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6662 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators')
-rw-r--r--src/gpu/cl/operators/ClGemm.cpp8
-rw-r--r--src/gpu/cl/operators/ClGemm.h1
2 files changed, 1 insertions, 8 deletions
diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp
index 50ecb214e3..555738531a 100644
--- a/src/gpu/cl/operators/ClGemm.cpp
+++ b/src/gpu/cl/operators/ClGemm.cpp
@@ -191,7 +191,6 @@ ClGemm::ClGemm()
_mm_native_kernel(std::make_unique<ClGemmMatrixMultiplyNativeKernel>()),
_mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),
_mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
- _mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
_tmp_a(),
_tmp_b(),
_reshape_b_only_on_first_run(false),
@@ -303,7 +302,6 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context
// Set the target for the kernels
_mm_reshaped_only_rhs_kernel->set_target(gpu_target);
- _mm_reshaped_only_rhs_fallback_kernel->set_target(gpu_target);
GEMMLHSMatrixInfo lhs_info{};
GEMMRHSMatrixInfo rhs_info{};
@@ -322,10 +320,6 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context
kernel_info.has_pad_y = false;
_mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
- // Configure matrix multiply kernel with y padding support
- kernel_info.has_pad_y = true;
- _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
// Request memory for RHS reshape matrix
_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
}
@@ -625,7 +619,7 @@ void ClGemm::run(ITensorPack &tensors)
if(has_pad_y)
{
- CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true);
+ ARM_COMPUTE_ERROR_ON(has_pad_y);
}
else
{
diff --git a/src/gpu/cl/operators/ClGemm.h b/src/gpu/cl/operators/ClGemm.h
index e084e53fe4..3c0cad3ca4 100644
--- a/src/gpu/cl/operators/ClGemm.h
+++ b/src/gpu/cl/operators/ClGemm.h
@@ -121,7 +121,6 @@ private:
std::unique_ptr<kernels::ClGemmMatrixMultiplyNativeKernel> _mm_native_kernel;
std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedKernel> _mm_reshaped_kernel;
std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
- std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_fallback_kernel;
TensorInfo _tmp_a;
TensorInfo _tmp_b;
bool _reshape_b_only_on_first_run;