diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 16 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 1 |
2 files changed, 17 insertions, 0 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index 487954b889..35a9958f65 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1941,10 +1941,26 @@ void CpuMulKernel::run_op(ITensorPack &tensors, const Window &window, const Thre (*_func_float)(src1, src2, dst, window, _scale); } } + const char *CpuMulKernel::name() const { return "CpuMulKernel"; } + +size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(platform, thread_count); + + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return 10240; + } + + return default_mws; +} + namespace { Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 5727b9d012..c92e1efdf4 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -79,6 +79,7 @@ public: // Inherited methods overridden void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. * |