From 9cca592c13f1e688a35698641069bcd37a525f0c Mon Sep 17 00:00:00 2001 From: ramelg01 Date: Thu, 11 Nov 2021 10:05:00 +0000 Subject: Improve start-up timer for GeMM (floating-point): - Pass M,N,K at runtime as kernel parameters - Add a guard macro to compile only kernel of interest - Move reshpaing kernels to gemm_utils.cl - Remove the fallback reshaping kernel with Y-Padding support Resolves: COMPMID-4888 Signed-off-by: Ramy Elgammal Change-Id: Ida3851326f0b77e410633271de9ecca106e37931 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6662 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/gpu/cl/operators/ClGemm.cpp | 8 +------- src/gpu/cl/operators/ClGemm.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'src/gpu/cl/operators') diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp index 50ecb214e3..555738531a 100644 --- a/src/gpu/cl/operators/ClGemm.cpp +++ b/src/gpu/cl/operators/ClGemm.cpp @@ -191,7 +191,6 @@ ClGemm::ClGemm() _mm_native_kernel(std::make_unique()), _mm_reshaped_kernel(std::make_unique()), _mm_reshaped_only_rhs_kernel(std::make_unique()), - _mm_reshaped_only_rhs_fallback_kernel(std::make_unique()), _tmp_a(), _tmp_b(), _reshape_b_only_on_first_run(false), @@ -303,7 +302,6 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context // Set the target for the kernels _mm_reshaped_only_rhs_kernel->set_target(gpu_target); - _mm_reshaped_only_rhs_fallback_kernel->set_target(gpu_target); GEMMLHSMatrixInfo lhs_info{}; GEMMRHSMatrixInfo rhs_info{}; @@ -322,10 +320,6 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context kernel_info.has_pad_y = false; _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - // Configure matrix multiply kernel with y padding support - kernel_info.has_pad_y = true; - _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - // Request memory for RHS reshape matrix _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); } @@ -625,7 +619,7 @@ void ClGemm::run(ITensorPack &tensors) if(has_pad_y) { - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true); + ARM_COMPUTE_ERROR_ON(has_pad_y); } else { diff --git a/src/gpu/cl/operators/ClGemm.h b/src/gpu/cl/operators/ClGemm.h index e084e53fe4..3c0cad3ca4 100644 --- a/src/gpu/cl/operators/ClGemm.h +++ b/src/gpu/cl/operators/ClGemm.h @@ -121,7 +121,6 @@ private: std::unique_ptr _mm_native_kernel; std::unique_ptr _mm_reshaped_kernel; std::unique_ptr _mm_reshaped_only_rhs_kernel; - std::unique_ptr _mm_reshaped_only_rhs_fallback_kernel; TensorInfo _tmp_a; TensorInfo _tmp_b; bool _reshape_b_only_on_first_run; -- cgit v1.2.1