aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfadara01 <fadi.arafeh@arm.com>2022-11-22 18:25:55 +0000
committerfadi.arafeh <fadi.arafeh@arm.com>2022-11-23 13:32:42 +0000
commite112ef1cc70bcdc52ded44350e61eb16d74559b3 (patch)
tree15c0af1c7696397f896ae71e9e02623422f79f74
parent31df05a1870662a7288fbaeb6fbc7fc458bb5a73 (diff)
downloadComputeLibrary-e112ef1cc70bcdc52ded44350e61eb16d74559b3.tar.gz
ONCPUML-1072: Remove double definition of get_mws for Mul kernel
Signed-off-by: fadara01 <fadi.arafeh@arm.com> Change-Id: Ieaa2fa4a6a69e7e0a48633967dabe91c786b42b7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8682 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/cpu/kernels/CpuMulKernel.cpp31
-rw-r--r--src/cpu/kernels/CpuMulKernel.h1
2 files changed, 15 insertions, 17 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp
index 81bb85c3dd..cc7efe0a1d 100644
--- a/src/cpu/kernels/CpuMulKernel.cpp
+++ b/src/cpu/kernels/CpuMulKernel.cpp
@@ -38,6 +38,7 @@ namespace
{
static constexpr size_t default_mws_N1_fp32_neon = 22447;
static constexpr size_t default_mws_V1_fp32_neon = 38982;
+ static constexpr size_t default_mws_other_platforms_1d_tensor = 10240;
}
namespace arm_compute
{
@@ -1932,7 +1933,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
}
else
{
- return ICPPKernel::default_mws;
+ if(_split_dimension == Window::DimX)
+ {
+ // Don't split the work load too small if the tensor has been reinterpreted as 1D.
+ // This number is loosely chosen as threading overhead in each platform varies wildly.
+ return default_mws_other_platforms_1d_tensor;
+ }
+ return default_mws;
}
// tensor is 1D or was re-interpreted as 1D
@@ -1952,7 +1959,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
#else /* ENABLE_FP32_KERNELS */
ARM_COMPUTE_UNUSED(platform);
#endif /* ENABLE_FP32_KERNELS */
- return ICPPKernel::default_mws;
+ if(_split_dimension == Window::DimX)
+ {
+ // Don't split the work load too small if the tensor has been reinterpreted as 1D.
+ // This number is loosely chosen as threading overhead in each platform varies wildly.
+ return default_mws_other_platforms_1d_tensor;
+ }
+ return default_mws;
}
Status CpuMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy,
@@ -1994,20 +2007,6 @@ const char *CpuMulKernel::name() const
return "CpuMulKernel";
}
-size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
-{
- ARM_COMPUTE_UNUSED(platform, thread_count);
-
- if(_split_dimension == Window::DimX)
- {
- // Don't split the work load too small if the tensor has been reinterpreted as 1D.
- // This number is loosely chosen as threading overhead in each platform varies wildly.
- return 10240;
- }
-
- return default_mws;
-}
-
namespace
{
Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h
index 73ffc0dd2b..0d6f586117 100644
--- a/src/cpu/kernels/CpuMulKernel.h
+++ b/src/cpu/kernels/CpuMulKernel.h
@@ -79,7 +79,6 @@ public:
// Inherited methods overridden
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
- size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
/** Return minimum workload size of the relevant kernel
*