From 1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 28 Jan 2019 10:01:50 +0000 Subject: COMPMID-1823: Increase scheduling granularity in NEGemmInterleaved. Change-Id: I68c6744885c8aa56a882cf5267061deeebcdc197 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/623 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- .../runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h | 1 + src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h | 9 +++++++++ src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp | 3 ++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h index 3ccfbc512b..949564750b 100644 --- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h +++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h @@ -140,6 +140,7 @@ private: std::vector _mm_workloads{}; std::vector _workloads{}; std::string _tag{}; + unsigned int _num_windows{ 1 }; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */ diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h index da6ef2dea9..26d9e9999d 100644 --- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h +++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h @@ -52,6 +52,11 @@ class IInterleavedStrategy public: /** Virtual Destructor */ virtual ~IInterleavedStrategy() = default; + /** Return output height of the interleaved strategy + * + * @return Output height of strategy + */ + virtual unsigned int out_height() const = 0; /** Instantiate and configure a prepareB Kernel * * @param[in] b Input tensor B. @@ -117,6 +122,10 @@ public: public: // Inherited methods overridden + unsigned int out_height() const override + { + return strategy::out_height(); + } std::unique_ptr instantiate_prepareB(const ITensor *b, ITensor *transformed_b, const INEGEMMWrapperKernel::Params ¶ms, diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp index 695fc859de..34aaea0ef1 100644 --- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp +++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp @@ -207,7 +207,7 @@ void NEGEMMInterleavedWrapper::prepare() //Maximum number of workloads to create: const unsigned int num_threads = NEScheduler::get().num_threads(); - const unsigned int max_iterations = num_threads == 1 ? 1 : num_threads; + const unsigned int max_iterations = std::max(num_threads, _num_windows); //Maximum number of iterations the parameters allow: const unsigned int num_iterations = _batch_window.num_iterations_total(); // Keep the smallest of the two: @@ -357,6 +357,7 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe // Get strategy std::unique_ptr strategy = detail::create_strategy(gemm_kernel_info.name); + _num_windows = iceildiv(_params.M, strategy->out_height()) * _params.batches; ARM_COMPUTE_ERROR_ON(strategy == nullptr); if(!_pretranspose_b) -- cgit v1.2.1