From 1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 28 Jan 2019 10:01:50 +0000 Subject: COMPMID-1823: Increase scheduling granularity in NEGemmInterleaved. Change-Id: I68c6744885c8aa56a882cf5267061deeebcdc197 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/623 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/runtime/NEON/functions/assembly') diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp index 695fc859de..34aaea0ef1 100644 --- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp +++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp @@ -207,7 +207,7 @@ void NEGEMMInterleavedWrapper::prepare() //Maximum number of workloads to create: const unsigned int num_threads = NEScheduler::get().num_threads(); - const unsigned int max_iterations = num_threads == 1 ? 1 : num_threads; + const unsigned int max_iterations = std::max(num_threads, _num_windows); //Maximum number of iterations the parameters allow: const unsigned int num_iterations = _batch_window.num_iterations_total(); // Keep the smallest of the two: @@ -357,6 +357,7 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe // Get strategy std::unique_ptr strategy = detail::create_strategy(gemm_kernel_info.name); + _num_windows = iceildiv(_params.M, strategy->out_height()) * _params.batches; ARM_COMPUTE_ERROR_ON(strategy == nullptr); if(!_pretranspose_b) -- cgit v1.2.1