aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/kernels/CpuMulKernel.cpp31
-rw-r--r--src/cpu/kernels/CpuMulKernel.h1
2 files changed, 15 insertions, 17 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp
index 81bb85c3dd..cc7efe0a1d 100644
--- a/src/cpu/kernels/CpuMulKernel.cpp
+++ b/src/cpu/kernels/CpuMulKernel.cpp
@@ -38,6 +38,7 @@ namespace
{
static constexpr size_t default_mws_N1_fp32_neon = 22447;
static constexpr size_t default_mws_V1_fp32_neon = 38982;
+ static constexpr size_t default_mws_other_platforms_1d_tensor = 10240;
}
namespace arm_compute
{
@@ -1932,7 +1933,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
}
else
{
- return ICPPKernel::default_mws;
+ if(_split_dimension == Window::DimX)
+ {
+ // Don't split the work load too small if the tensor has been reinterpreted as 1D.
+ // This number is loosely chosen as threading overhead in each platform varies wildly.
+ return default_mws_other_platforms_1d_tensor;
+ }
+ return default_mws;
}
// tensor is 1D or was re-interpreted as 1D
@@ -1952,7 +1959,13 @@ size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
#else /* ENABLE_FP32_KERNELS */
ARM_COMPUTE_UNUSED(platform);
#endif /* ENABLE_FP32_KERNELS */
- return ICPPKernel::default_mws;
+ if(_split_dimension == Window::DimX)
+ {
+ // Don't split the work load too small if the tensor has been reinterpreted as 1D.
+ // This number is loosely chosen as threading overhead in each platform varies wildly.
+ return default_mws_other_platforms_1d_tensor;
+ }
+ return default_mws;
}
Status CpuMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy,
@@ -1994,20 +2007,6 @@ const char *CpuMulKernel::name() const
return "CpuMulKernel";
}
-size_t CpuMulKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
-{
- ARM_COMPUTE_UNUSED(platform, thread_count);
-
- if(_split_dimension == Window::DimX)
- {
- // Don't split the work load too small if the tensor has been reinterpreted as 1D.
- // This number is loosely chosen as threading overhead in each platform varies wildly.
- return 10240;
- }
-
- return default_mws;
-}
-
namespace
{
Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h
index 73ffc0dd2b..0d6f586117 100644
--- a/src/cpu/kernels/CpuMulKernel.h
+++ b/src/cpu/kernels/CpuMulKernel.h
@@ -79,7 +79,6 @@ public:
// Inherited methods overridden
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
- size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
/** Return minimum workload size of the relevant kernel
*