diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-11-08 12:01:21 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-11-09 11:29:14 +0000 |
commit | d4a9cc00a666c7d4c2a35c49d71b322f27e369fc (patch) | |
tree | c46a58f9679aa1b7cfb14511a16f5052e0f50ca2 /src/cpu/kernels | |
parent | d158609e9ab13069a0a4d2d01d3f1a739a678dd0 (diff) | |
download | ComputeLibrary-d4a9cc00a666c7d4c2a35c49d71b322f27e369fc.tar.gz |
Fix CPU multiplication layer threading overhead
* When the tensors are reinterpreted as 1D, any thing smaller than
10KB won't be splitted into different thread.
Resolves: COMPMID-5630
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Icff7089e37c85c8b325f099008a080a5805d36a2
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8581
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 16 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 1 |
2 files changed, 17 insertions, 0 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index 487954b889..35a9958f65 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1941,10 +1941,26 @@ void CpuMulKernel::run_op(ITensorPack &tensors, const Window &window, const Thre (*_func_float)(src1, src2, dst, window, _scale); } } + const char *CpuMulKernel::name() const { return "CpuMulKernel"; } + +size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return 10240; + } + + return default_mws; +} + namespace { Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 5727b9d012..c92e1efdf4 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -79,6 +79,7 @@ public: // Inherited methods overridden void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. * |