From 593c2425e6b94828fb486244e42c275a89a71aff Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 11 Aug 2021 14:06:28 +0100 Subject: Fix performance regression due to clFinish() - In ClGemmLowpMatrixMultiplyCore::prepare we always called clFinish() also when the workload was already prepared Resolves COMPMID-4707 Change-Id: Icdcee528590e2c5efb75325a80c2a45ec84993d1 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6082 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena --- src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp b/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp index 64c8743f13..0c72912642 100644 --- a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp +++ b/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp @@ -773,9 +773,9 @@ void ClGemmLowpMatrixMultiplyCore::prepare(ITensorPack &tensors) shifts_tensor->unmap(CLScheduler::get().queue()); } } + CLScheduler::get().queue().finish(); _is_prepared = true; } - CLScheduler::get().queue().finish(); } experimental::MemoryRequirements ClGemmLowpMatrixMultiplyCore::workspace() const -- cgit v1.2.1