aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-28 10:01:50 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-02-04 11:54:20 +0000
commit1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c (patch)
tree6991f867fc487f08d68a0b0a5e3f06f5a21cba40
parenta41c54b7151edd9eb139c1080c79123d09f93037 (diff)
downloadComputeLibrary-1509e4bfcfd4b613e2f1ad584c51b80b5fb05a8c.tar.gz
COMPMID-1823: Increase scheduling granularity in NEGemmInterleaved.
Change-Id: I68c6744885c8aa56a882cf5267061deeebcdc197 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/623 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h1
-rw-r--r--src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h9
-rw-r--r--src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp3
3 files changed, 12 insertions, 1 deletions
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
index 3ccfbc512b..949564750b 100644
--- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
+++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
@@ -140,6 +140,7 @@ private:
std::vector<MatrixMultiplyWorkload> _mm_workloads{};
std::vector<IScheduler::Workload> _workloads{};
std::string _tag{};
+ unsigned int _num_windows{ 1 };
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */
diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
index da6ef2dea9..26d9e9999d 100644
--- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
+++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
@@ -52,6 +52,11 @@ class IInterleavedStrategy
public:
/** Virtual Destructor */
virtual ~IInterleavedStrategy() = default;
+ /** Return output height of the interleaved strategy
+ *
+ * @return Output height of strategy
+ */
+ virtual unsigned int out_height() const = 0;
/** Instantiate and configure a prepareB Kernel
*
* @param[in] b Input tensor B.
@@ -117,6 +122,10 @@ public:
public:
// Inherited methods overridden
+ unsigned int out_height() const override
+ {
+ return strategy::out_height();
+ }
std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b,
ITensor *transformed_b,
const INEGEMMWrapperKernel::Params &params,
diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
index 695fc859de..34aaea0ef1 100644
--- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
+++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
@@ -207,7 +207,7 @@ void NEGEMMInterleavedWrapper::prepare()
//Maximum number of workloads to create:
const unsigned int num_threads = NEScheduler::get().num_threads();
- const unsigned int max_iterations = num_threads == 1 ? 1 : num_threads;
+ const unsigned int max_iterations = std::max(num_threads, _num_windows);
//Maximum number of iterations the parameters allow:
const unsigned int num_iterations = _batch_window.num_iterations_total();
// Keep the smallest of the two:
@@ -357,6 +357,7 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe
// Get strategy
std::unique_ptr<detail::IInterleavedStrategy> strategy = detail::create_strategy(gemm_kernel_info.name);
+ _num_windows = iceildiv(_params.M, strategy->out_height()) * _params.batches;
ARM_COMPUTE_ERROR_ON(strategy == nullptr);
if(!_pretranspose_b)