diff options
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 31 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 1 |
2 files changed, 15 insertions, 17 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index 81bb85c3dd..cc7efe0a1d 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -38,6 +38,7 @@ namespace { static constexpr size_t default_mws_N1_fp32_neon = 22447; static constexpr size_t default_mws_V1_fp32_neon = 38982; + static constexpr size_t default_mws_other_platforms_1d_tensor = 10240; } namespace arm_compute { @@ -1932,7 +1933,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const } else { - return ICPPKernel::default_mws; + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return default_mws_other_platforms_1d_tensor; + } + return default_mws; } // tensor is 1D or was re-interpreted as 1D @@ -1952,7 +1959,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const #else /* ENABLE_FP32_KERNELS */ ARM_COMPUTE_UNUSED(platform); #endif /* ENABLE_FP32_KERNELS */ - return ICPPKernel::default_mws; + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return default_mws_other_platforms_1d_tensor; + } + return default_mws; } Status CpuMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, @@ -1994,20 +2007,6 @@ const char *CpuMulKernel::name() const return "CpuMulKernel"; } -size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const -{ - ARM_COMPUTE_UNUSED(platform, thread_count); - - if(_split_dimension == Window::DimX) - { - // Don't split the work load too small if the tensor has been reinterpreted as 1D. - // This number is loosely chosen as threading overhead in each platform varies wildly. - return 10240; - } - - return default_mws; -} - namespace { Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 73ffc0dd2b..0d6f586117 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -79,7 +79,6 @@ public: // Inherited methods overridden void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; - size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; /** Return minimum workload size of the relevant kernel * |