aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/assembly
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-28 10:01:50 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-02-04 11:54:20 +0000
commit1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c (patch)
tree6991f867fc487f08d68a0b0a5e3f06f5a21cba40 /src/runtime/NEON/functions/assembly
parenta41c54b7151edd9eb139c1080c79123d09f93037 (diff)
downloadComputeLibrary-1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c.tar.gz
COMPMID-1823: Increase scheduling granularity in NEGemmInterleaved.
Change-Id: I68c6744885c8aa56a882cf5267061deeebcdc197 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/623 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/assembly')
-rw-r--r--src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp3
1 files changed, 2 insertions, 1 deletions
diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
index 695fc859de..34aaea0ef1 100644
--- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
+++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
@@ -207,7 +207,7 @@ void NEGEMMInterleavedWrapper::prepare()
//Maximum number of workloads to create:
const unsigned int num_threads = NEScheduler::get().num_threads();
- const unsigned int max_iterations = num_threads == 1 ? 1 : num_threads;
+ const unsigned int max_iterations = std::max(num_threads, _num_windows);
//Maximum number of iterations the parameters allow:
const unsigned int num_iterations = _batch_window.num_iterations_total();
// Keep the smallest of the two:
@@ -357,6 +357,7 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe
// Get strategy
std::unique_ptr<detail::IInterleavedStrategy> strategy = detail::create_strategy(gemm_kernel_info.name);
+ _num_windows = iceildiv(_params.M, strategy->out_height()) * _params.batches;
ARM_COMPUTE_ERROR_ON(strategy == nullptr);
if(!_pretranspose_b)