aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-05 07:21:28 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-05 09:56:39 +0000
commit5ee0d95b2382cb4a85c5320a4e3aa441524af0be (patch)
treeb50364c5716e9c992724c3b31123192dd2552914
parent8155c0253c00aa9e26651361460c66feb39829a6 (diff)
downloadComputeLibrary-5ee0d95b2382cb4a85c5320a4e3aa441524af0be.tar.gz
Improve implementation selection speed of CpuElementwiseUnary
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I94f2b9135a7de78888418f0af33e3e5f78e2a1fa Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5901 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp50
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.h4
2 files changed, 32 insertions, 22 deletions
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 56e3297e73..b03c32f023 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -43,7 +43,12 @@ namespace kernels
{
namespace
{
-using ElementwiseUnarySelector = std::add_pointer<bool(DataType)>::type;
+struct ElementwiseUnarySelectorData
+{
+ DataType dt;
+ const CPUInfo &ci;
+};
+using ElementwiseUnarySelector = std::add_pointer<bool(const ElementwiseUnarySelectorData &)>::type;
struct ElementwiseUnaryKernel
{
@@ -56,37 +61,37 @@ static const ElementwiseUnaryKernel available_kernels[] =
{
#if defined(ARM_COMPUTE_ENABLE_SVE)
{
- "fp32_sve_elementwise_unary",
- [](DataType dt) { return dt == DataType::F32; },
+ "sve_fp32_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<float>),
},
{
- "fp16_sve_elementwise_unary",
- [](DataType dt) { return dt == DataType::F16; },
+ "sve_fp16_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16; },
REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<__fp16>),
},
{
- "s32_sve_elementwise_unary",
- [](DataType dt) { return dt == DataType::S32; },
+ "sve_s32_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32; },
REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>),
},
#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#if defined(ARM_COMPUTE_ENABLE_NEON)
{
- "fp32_neon_elementwise_unary",
- [](DataType dt) { return dt == DataType::F32; },
+ "neon_fp32_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<float>),
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
- "fp16_neon_elementwise_unary",
- [](DataType dt) { return dt == DataType::F16; },
+ "neon_fp16_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::F16; },
REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<__fp16>),
},
#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
- "s32_neon_elementwise_unary",
- [](DataType dt) { return dt == DataType::S32; },
+ "neon_s32_elementwise_unary",
+ [](const ElementwiseUnarySelectorData & data) { return data.dt == DataType::S32; },
REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>),
},
#endif // defined(ARM_COMPUTE_ENABLE_NEON)
@@ -96,7 +101,7 @@ const ElementwiseUnaryKernel *get_implementation(DataType dt)
{
for(const auto &uk : available_kernels)
{
- if(uk.is_selected(dt))
+ if(uk.is_selected({ dt, CPUInfo::get() }))
{
return &uk;
}
@@ -108,8 +113,12 @@ const ElementwiseUnaryKernel *get_implementation(DataType dt)
void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
{
ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
+ const auto uk = get_implementation(src.data_type());
+ ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
- _op = op;
+ _op = op;
+ _run_method = uk->ukernel;
+ _name = std::string("CpuElementwiseUnaryKernel").append("/").append(uk->name);
// If input shape is dynamic, expect a configured window and dst at run-time.
if(src.is_dynamic())
@@ -158,16 +167,15 @@ void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &windo
{
ARM_COMPUTE_UNUSED(info);
- auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
- auto dst = tensors.get_tensor(TensorType::ACL_DST);
- auto func = get_implementation(src->info()->data_type())->ukernel;
- ARM_COMPUTE_ERROR_ON(func == nullptr);
- func(src, dst, window, _op);
+ auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+
+ _run_method(src, dst, window, _op);
}
const char *CpuElementwiseUnaryKernel::name() const
{
- return "CpuElementwiseUnaryKernel";
+ return _name.c_str();
}
} // namespace kernels
} // namespace cpu
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
index 43c6eded60..bda65a35e0 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -71,7 +71,9 @@ public:
using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
private:
- ElementWiseUnary _op{};
+ ElementWiseUnary _op{};
+ ElementwiseUnaryUkernelPtr _run_method{ nullptr };
+ std::string _name{};
};
} // namespace kernels
} // namespace cpu