diff options
author | fadara01 <fadi.arafeh@arm.com> | 2022-11-22 18:25:55 +0000 |
---|---|---|
committer | fadi.arafeh <fadi.arafeh@arm.com> | 2022-11-23 13:32:42 +0000 |
commit | e112ef1cc70bcdc52ded44350e61eb16d74559b3 (patch) | |
tree | 15c0af1c7696397f896ae71e9e02623422f79f74 /src/cpu/kernels | |
parent | 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 (diff) | |
download | ComputeLibrary-e112ef1cc70bcdc52ded44350e61eb16d74559b3.tar.gz |
ONCPUML-1072: Remove double definition of get_mws for Mul kernel
Signed-off-by: fadara01 <fadi.arafeh@arm.com>
Change-Id: Ieaa2fa4a6a69e7e0a48633967dabe91c786b42b7
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8682
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 31 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 1 |
2 files changed, 15 insertions, 17 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index 81bb85c3dd..cc7efe0a1d 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -38,6 +38,7 @@ namespace { static constexpr size_t default_mws_N1_fp32_neon = 22447; static constexpr size_t default_mws_V1_fp32_neon = 38982; + static constexpr size_t default_mws_other_platforms_1d_tensor = 10240; } namespace arm_compute { @@ -1932,7 +1933,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const } else { - return ICPPKernel::default_mws; + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return default_mws_other_platforms_1d_tensor; + } + return default_mws; } // tensor is 1D or was re-interpreted as 1D @@ -1952,7 +1959,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const #else /* ENABLE_FP32_KERNELS */ ARM_COMPUTE_UNUSED(platform); #endif /* ENABLE_FP32_KERNELS */ - return ICPPKernel::default_mws; + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return default_mws_other_platforms_1d_tensor; + } + return default_mws; } Status CpuMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, @@ -1994,20 +2007,6 @@ const char *CpuMulKernel::name() const return "CpuMulKernel"; } -size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const -{ - ARM_COMPUTE_UNUSED(platform, thread_count); - - if(_split_dimension == Window::DimX) - { - // Don't split the work load too small if the tensor has been reinterpreted as 1D. - // This number is loosely chosen as threading overhead in each platform varies wildly. - return 10240; - } - - return default_mws; -} - namespace { Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 73ffc0dd2b..0d6f586117 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -79,7 +79,6 @@ public: // Inherited methods overridden void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; - size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; /** Return minimum workload size of the relevant kernel * |