aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgiuros01 <giuseppe.rossini@arm.com>2019-10-01 12:17:49 +0100
committerGiuseppe Rossini <giuseppe.rossini@arm.com>2019-10-01 14:01:22 +0000
commit3d8fe497fc76ec6ad265c03fe02e29ed2ddf2d93 (patch)
tree1b5274641d01819b7f59c1834df9cfffb81629d6
parent2aa7fd011a4baff52dceb00a71b3674f819df8fc (diff)
downloadComputeLibrary-3d8fe497fc76ec6ad265c03fe02e29ed2ddf2d93.tar.gz
INFPRF-609:Performance Issue of the Latest ArmCL
We were creating too many small GEMM workloads. It affects performance when the number of thread is small and the matrices are bigger (especially when more single-threaded process are running on the same machine) Change-Id: I807019a7b2d043ca72b4bca11eb0b1960da00694 Signed-off-by: giuros01 <giuseppe.rossini@arm.com> Reviewed-on: https://review.mlplatform.org/c/2012 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h1
-rw-r--r--src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp3
2 files changed, 1 insertions, 3 deletions
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
index d3dda9a95f..eeea0babf1 100644
--- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
+++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
@@ -141,7 +141,6 @@ private:
std::vector<MatrixMultiplyWorkload> _mm_workloads{};
std::vector<IScheduler::Workload> _workloads{};
std::string _tag{};
- unsigned int _num_windows{ 1 };
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */
diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
index 41d7d1ff76..79e40a7181 100644
--- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
+++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
@@ -212,7 +212,7 @@ void NEGEMMInterleavedWrapper::prepare()
//Maximum number of workloads to create:
const unsigned int num_threads = NEScheduler::get().num_threads();
- const unsigned int max_iterations = std::max(num_threads, _num_windows);
+ const unsigned int max_iterations = num_threads == 1 ? 1 : num_threads;
//Maximum number of iterations the parameters allow:
const unsigned int num_iterations = _batch_window.num_iterations_total();
// Keep the smallest of the two:
@@ -362,7 +362,6 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe
// Get strategy
std::unique_ptr<detail::IInterleavedStrategy> strategy = detail::create_strategy(gemm_kernel_info.name);
- _num_windows = iceildiv(_params.M, strategy->out_height()) * _params.batches;
ARM_COMPUTE_ERROR_ON(strategy == nullptr);
if(!_pretranspose_b)