diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2021-06-07 14:23:57 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-06-23 12:25:50 +0000 |
commit | 20fca524baf99402f742ce38c538f2fd07d5fff9 (patch) | |
tree | b63d98383d1ba22bb3ca59d393e4ab9d47a9c762 /src/core/cpu/kernels | |
parent | 1d359279e22874121def2ce4bfdb633d94ea5ade (diff) | |
download | ComputeLibrary-20fca524baf99402f742ce38c538f2fd07d5fff9.tar.gz |
Create core library using high priority operators
A smaller core library is created using a subset of the operators.
Changed the structure of filelist.json in order to include more
information about the kernels and make the selection easier.
Resolves: COMPMID-4514
Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu/kernels')
24 files changed, 343 insertions, 207 deletions
diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp index 8a57a3b529..24642f1efb 100644 --- a/src/core/cpu/kernels/CpuActivationKernel.cpp +++ b/src/core/cpu/kernels/CpuActivationKernel.cpp @@ -45,7 +45,8 @@ namespace { struct ActivationSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using ActivationSelectorPtr = std::add_pointer<bool(const ActivationSelectorData &data)>::type; @@ -60,19 +61,19 @@ struct ActivationKernel static const ActivationKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "fp16_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F16; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_activation) }, { "fp32_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F32; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_activation) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "fp16_neon_activation", [](const ActivationSelectorData & data) { return data.dt == DataType::F16; }, @@ -83,24 +84,24 @@ static const ActivationKernel available_kernels[] = [](const ActivationSelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_activation) }, -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) { "qasymm8_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_activation) }, { "qasymm8_signed_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_activation) }, { "qsymm16_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); }, REGISTER_QSYMM16_SVE(arm_compute::cpu::qsymm16_sve_activation) }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ { "qasymm8_neon_activation", [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; }, @@ -116,7 +117,6 @@ static const ActivationKernel available_kernels[] = [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::qsymm16_neon_activation) }, -#endif /* defined(__ARM_FEATURE_SVE2) */ }; const ActivationKernel *get_implementation(const ActivationSelectorData &data) @@ -155,7 +155,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - const auto *uk = get_implementation(ActivationSelectorData{ src->data_type() }); + const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); const DataType data_type = src->data_type(); @@ -243,7 +243,7 @@ void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, con const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC); ITensor *dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, _act_info, window); } diff --git a/src/core/cpu/kernels/CpuAddKernel.cpp b/src/core/cpu/kernels/CpuAddKernel.cpp index 7afdceae38..8d74b4027b 100644 --- a/src/core/cpu/kernels/CpuAddKernel.cpp +++ b/src/core/cpu/kernels/CpuAddKernel.cpp @@ -45,9 +45,15 @@ namespace { struct AddSelectorData { - DataType dt1; - DataType dt2; - DataType dt3; + /* Data types for all ITensorInfos: + dt1 -> src0 + dt2 -> src1 + dt3 -> dst + */ + DataType dt1; + DataType dt2; + DataType dt3; + const CPUInfo &ci; }; using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type; @@ -61,49 +67,99 @@ struct AddKernel static const AddKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "add_qasymm8_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)) && data.ci.has_sve(); + }, + REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve) + }, + { + "add_qasymm8_signed_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)) && data.ci.has_sve(); + }, + REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve) + }, + { + "add_qsymm16_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)) && data.ci.has_sve(); + }, + REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve) + }, +#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)) && data.ci.has_sve(); + }, REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_sve(); + }, REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>) }, { "add_u8_s16_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_s16_s16_sve) }, { "add_s16_u8_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_u8_s16_sve) }, { "add_u8_u8_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_u8_s16_sve) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "add_same_neon", [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); }, @@ -112,7 +168,10 @@ static const AddKernel available_kernels[] = #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "add_same_neon", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_fp16(); + }, REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ @@ -146,24 +205,8 @@ static const AddKernel available_kernels[] = [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); }, REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_u8_s16_neon) }, -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - { - "add_qasymm8_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); }, - REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve) - }, - { - "add_qasymm8_signed_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)); }, - REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve) - }, - { - "add_qsymm16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); }, - REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve) - }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) { "add_qasymm8_neon", [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); }, @@ -179,8 +222,7 @@ static const AddKernel available_kernels[] = [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); }, REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon) }, -#endif /* defined(ENABLE_NEON) */ - +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ }; /** Micro-kernel selector @@ -189,11 +231,11 @@ static const AddKernel available_kernels[] = * * @return A matching micro-kernel else nullptr */ -const AddKernel *get_implementation(DataType dt1, DataType dt2, DataType dt3) +const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt1, DataType dt2, DataType dt3) { for(const auto &uk : available_kernels) { - if(uk.is_selected({ dt1, dt2, dt3 })) + if(uk.is_selected({ dt1, dt2, dt3, cpuinfo })) { return &uk; } @@ -241,7 +283,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons "Wrong shape for dst"); } - const auto *uk = get_implementation(src0.data_type(), src1.data_type(), dst.data_type()); + const auto *uk = get_implementation(CPUInfo::get(), src0.data_type(), src1.data_type(), dst.data_type()); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -327,7 +369,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1); ITensor *dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation(src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type()); + const auto *uk = get_implementation(CPUInfo::get(), src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type()); ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); uk->ukernel(src0, src1, dst, _policy, window); diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp index 643a870540..dc0c5b210d 100644 --- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp @@ -43,7 +43,13 @@ namespace kernels { namespace { -using ElementwiseSelector = std::add_pointer<bool(DataType)>::type; +struct ElementwiseSelectorData +{ + DataType dt; + const CPUInfo &ci; +}; + +using ElementwiseSelector = std::add_pointer<bool(const ElementwiseSelectorData &)>::type; using UKernelType = CpuElementwiseKernel::ElementwiseFunction; struct ElementwiseKernel { @@ -52,23 +58,6 @@ struct ElementwiseKernel UKernelType *ukernel; }; -template <DataType dt> -inline bool is_selected(DataType data_type) -{ - return dt == data_type; -} - -template <DataType input_data_type, DataType output_data_type = input_data_type> -static ElementwiseKernel generate_kernel(UKernelType *ukernel) -{ - std::string kernel_name("op_"); - kernel_name += string_from_data_type(input_data_type) + "_"; - kernel_name += string_from_data_type(input_data_type) + "_"; - kernel_name += string_from_data_type(output_data_type); - - return { kernel_name.c_str(), is_selected<input_data_type>, ukernel }; -} - template <ArithmeticOperation op> std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) @@ -76,36 +65,85 @@ configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorI ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { -#if defined(ENABLE_SVE) - generate_kernel<DataType::F32>(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float32_t>))), - generate_kernel<DataType::S32>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int32_t>))), - generate_kernel<DataType::S16>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int16_t>))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) - generate_kernel<DataType::F32>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>>))), - generate_kernel<DataType::S32>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>>))), -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, uint8_t>))), - generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, int8_t>))), -#else /* !defined(__ARM_FEATURE_SVE2) */ - generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized<op>))), - generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed<op>))), -#endif /* defined(__ARM_FEATURE_SVE2) */ -#if defined(ENABLE_SVE) - generate_kernel<DataType::F16>(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float16_t>))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_elementwise_fp32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float32_t>)) + }, + { + "sve_elementwise_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int32_t>)) + }, + { + "sve_elementwise_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int16_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) + { + "neon_elementwise_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>>)) + }, + { + "neon_elementwise_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "sve2_elementwise_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, uint8_t>)) + }, + { + "sve2_elementwise_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, int8_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) + { + "neon_elementwise_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized<op>)) + }, + { + "neon_elementwise_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed<op>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_elementwise_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float16_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - generate_kernel<DataType::F16>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>))), + { + "neon_elementwise_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>)) + }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ - generate_kernel<DataType::S16>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>>))), -#endif /* defined(ENABLE_NEON) */ + { + "neon_elementwise_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; for(const auto &uk : kernels) { - if(uk.is_selected(src0->data_type())) + if(uk.is_selected({ src0->data_type(), CPUInfo::get() })) { return uk.ukernel; } @@ -121,36 +159,93 @@ configure_comp_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInf ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { -#if defined(ENABLE_SVE) - generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, uint8_t>))), - generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op<op, float>))), - generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int16_t>))), - generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int32_t>))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) - generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8<op, uint8_t, uint8x16_t>))), - generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32<op, float, float32x4_t>))), - generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16<op, int16_t, int16x8_t>))), - generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32<op, int32_t, int32x4_t>))), -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, int8_t>))), - generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, uint8_t>))), -#else /* !defined(__ARM_FEATURE_SVE2) */ - generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed<op>))), - generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized<op>))), -#endif /* defined(__ARM_FEATURE_SVE2) */ -#if defined(ENABLE_SVE) - generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op<op, float16_t>))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16<op, float16_t, float16x8_t>))), -#endif /* defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_comparison_u8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, uint8_t>)) + }, + { + "sve_comparison_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op<op, float>)) + }, + { + "sve_comparison_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int16_t>)) + }, + { + "sve_comparison_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int32_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) + { + "neon_comparison_u8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8<op, uint8_t, uint8x16_t>)) + }, + { + "neon_comparison_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32<op, float, float32x4_t>)) + }, + { + "neon_comparison_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16<op, int16_t, int16x8_t>)) + }, + { + "neon_comparison_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32<op, int32_t, int32x4_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "sve_comparison_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, uint8_t>)) + }, + { + "sve_comparison_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, int8_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) + { + "neon_comparison_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized<op>)) + }, + { + "neon_comparison_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed<op>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_comparison_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op<op, float16_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + { + "neon_comparison_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16<op, float16_t, float16x8_t>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ }; for(const auto &uk : kernels) { - if(uk.is_selected(src0->data_type())) + if(uk.is_selected({ src0->data_type(), CPUInfo::get() })) { return uk.ukernel; } diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 2600a49b70..91fa75ebaf 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -54,7 +54,7 @@ struct ElementwiseUnaryKernel static const ElementwiseUnaryKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "fp32_sve_elementwise_unary", [](DataType dt) { return dt == DataType::F32; }, @@ -70,8 +70,8 @@ static const ElementwiseUnaryKernel available_kernels[] = [](DataType dt) { return dt == DataType::S32; }, REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>), }, -#endif // defined(ENABLE_SVE) -#if defined(ENABLE_NEON) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_NEON) { "fp32_neon_elementwise_unary", [](DataType dt) { return dt == DataType::F32; }, @@ -89,7 +89,7 @@ static const ElementwiseUnaryKernel available_kernels[] = [](DataType dt) { return dt == DataType::S32; }, REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>), }, -#endif // defined(ENABLE_NEON) +#endif // defined(ARM_COMPUTE_ENABLE_NEON) }; const ElementwiseUnaryKernel *get_implementation(DataType dt) diff --git a/src/core/cpu/kernels/CpuScaleKernel.cpp b/src/core/cpu/kernels/CpuScaleKernel.cpp index 29475fa63f..a072dbd896 100644 --- a/src/core/cpu/kernels/CpuScaleKernel.cpp +++ b/src/core/cpu/kernels/CpuScaleKernel.cpp @@ -50,7 +50,8 @@ namespace { struct ScaleSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using ScaleSelectorPtr = std::add_pointer<bool(const ScaleSelectorData &data)>::type; using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, @@ -64,43 +65,43 @@ struct ScaleKernel static const ScaleKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "fp16_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F16; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale) }, { "f32_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F32; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale) }, { "qasymm8_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale) }, { "qasymm8_signed_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale) }, { "u8_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::U8; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); }, REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale) }, { "s16_sve_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::S16; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "common_neon_scale", - [](const ScaleSelectorData & data) { return data.dt == DataType::F16; }, + [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>) }, #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ @@ -129,7 +130,7 @@ static const ScaleKernel available_kernels[] = [](const ScaleSelectorData & data) { return data.dt == DataType::S16; }, REGISTER_INTEGER_NEON(arm_compute::cpu::common_neon_scale<int16_t>) }, -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; /** Micro-kernel selector @@ -153,7 +154,7 @@ const ScaleKernel *get_implementation(const ScaleSelectorData &data) Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info) { - const auto *uk = get_implementation(ScaleSelectorData{ src->data_type() }); + const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); @@ -607,7 +608,7 @@ void CpuScaleKernel::run_op(ITensorPack &tensors, const Window &window, const Th } else { - const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, offsets, dx, dy, _policy, _border_mode, _constant_border_value, _sampling_offset, _align_corners, window); } } diff --git a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp index 8ea186b16a..1e00e12050 100644 --- a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp +++ b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp @@ -47,7 +47,8 @@ namespace { struct SoftmaxSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using SoftmaxSelectorPtr = std::add_pointer<bool(const SoftmaxSelectorData &data)>::type; using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type; @@ -69,20 +70,20 @@ struct SoftmaxLogits1DMaxKernel static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_softmax_logits_1d_float", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::sve_softmax_logits_1d_float<float>) }, { "sve_softmax_logits_1d_float", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::sve_softmax_logits_1d_float<float16_t>) }, -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_softmax_logits_1d_float", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, @@ -95,20 +96,20 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = REGISTER_FP16_NEON(arm_compute::cpu::neon_softmax_logits_1d_float<float16_t>) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ -#endif /* !defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) { "sve_softmax_logits_1d_quantized", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized<qasymm8_t>) }, { "sve_softmax_logits_1d_quantized", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized<qasymm8_signed_t>) }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ { "neon_softmax_logits_1d_quantized", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, @@ -119,35 +120,33 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_softmax_logits_1d_quantized<qasymm8_signed_t>) }, -#endif /* defined(__ARM_FEATURE_SVE2) */ - }; static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::sve_logits_1d_max<float>) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::sve_logits_1d_max<float16_t>) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_logits_1d_max<qasymm8_t>) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_logits_1d_max<qasymm8_signed_t>) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_logits_1d_max", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, @@ -170,14 +169,14 @@ static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] = [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_logits_1d_max<qasymm8_signed_t>) }, -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; const SoftmaxLogits1DKernel *get_implementation_logits(const SoftmaxSelectorData &data) { for(const auto &uk : available_logits_1d_kernels) { - if(uk.is_selected({ data.dt })) + if(uk.is_selected({ data.dt, CPUInfo::get() })) { return &uk; } @@ -189,7 +188,7 @@ const SoftmaxLogits1DMaxKernel *get_implementation_logits_max(const SoftmaxSelec { for(const auto &uk : available_logits_1d_max_kernels) { - if(uk.is_selected({ data.dt })) + if(uk.is_selected({ data.dt, CPUInfo::get() })) { return &uk; } @@ -253,7 +252,7 @@ void CpuLogits1DMaxKernel::run_op(ITensorPack &tensors, const Window &window, co const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); auto dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, window); } @@ -364,7 +363,7 @@ void CpuLogits1DSoftmaxKernel<IS_LOG>::run_op(ITensorPack &tensors, const Window void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread); - const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, max, tmp_for_thread, dst, _beta, IS_LOG, window); } diff --git a/src/core/cpu/kernels/activation/sve/qasymm8.cpp b/src/core/cpu/kernels/activation/sve/qasymm8.cpp index 228b4ae530..69fffd96c5 100644 --- a/src/core/cpu/kernels/activation/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/activation/sve/qasymm8.cpp @@ -21,14 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" #include <cmath> #include <cstddef> -#if defined(__ARM_FEATURE_SVE2) #include "src/core/NEON/SVEAsymm.h" #include "src/core/NEON/SVEMath.h" #include <arm_sve.h> @@ -251,4 +250,4 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */
\ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file diff --git a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp index 989f825eb9..53ee515ff9 100644 --- a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp @@ -28,7 +28,7 @@ #include <cmath> #include <cstddef> -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEAsymm.h" #include "src/core/NEON/SVEMath.h" #include <arm_sve.h> @@ -250,4 +250,4 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ diff --git a/src/core/cpu/kernels/activation/sve/qsymm16.cpp b/src/core/cpu/kernels/activation/sve/qsymm16.cpp index 66974875da..ac549770a2 100644 --- a/src/core/cpu/kernels/activation/sve/qsymm16.cpp +++ b/src/core/cpu/kernels/activation/sve/qsymm16.cpp @@ -29,7 +29,7 @@ #include <cmath> #include <cstddef> -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEMath.h" #include "src/core/NEON/SVESymm.h" #include <arm_sve.h> @@ -117,4 +117,4 @@ void qsymm16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ diff --git a/src/core/cpu/kernels/add/sve/impl.h b/src/core/cpu/kernels/add/sve/impl.h index c38b1d47e0..32ff5d0496 100644 --- a/src/core/cpu/kernels/add/sve/impl.h +++ b/src/core/cpu/kernels/add/sve/impl.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H #define SRC_CORE_SVE_KERNELS_ADD_IMPL_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" @@ -36,5 +36,5 @@ template <typename ScalarType> void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/list.h b/src/core/cpu/kernels/add/sve/list.h index aebb43bb60..9e439497c9 100644 --- a/src/core/cpu/kernels/add/sve/list.h +++ b/src/core/cpu/kernels/add/sve/list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H #define SRC_CORE_SVE_KERNELS_ADD_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/SVEMath.h" @@ -50,5 +50,5 @@ DECLARE_ADD_KERNEL(add_u8_u8_s16_sve); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H
\ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qasymm8.cpp b/src/core/cpu/kernels/add/sve/qasymm8.cpp index f6d1485e61..888ad878ca 100644 --- a/src/core/cpu/kernels/add/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/add/sve/qasymm8.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include <arm_sve.h> namespace arm_compute @@ -179,4 +179,4 @@ void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */
\ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp index 8102aa5c65..3b922c6c21 100644 --- a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include <arm_sve.h> namespace arm_compute @@ -178,4 +178,4 @@ void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *d } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */
\ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qsymm16.cpp b/src/core/cpu/kernels/add/sve/qsymm16.cpp index fb62257b0a..eef5d245d3 100644 --- a/src/core/cpu/kernels/add/sve/qsymm16.cpp +++ b/src/core/cpu/kernels/add/sve/qsymm16.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include <arm_sve.h> namespace arm_compute @@ -153,4 +153,4 @@ void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */
\ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h index a92a8648a8..fea38d2995 100644 --- a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h +++ b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h @@ -23,7 +23,7 @@ */ #ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H #define SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -167,5 +167,5 @@ template <ComparisonOperation op, typename ScalarType, typename OutputScalarType void elementwise_comparison_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif /* SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H */ diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h index 6c5524e284..5e04128b44 100644 --- a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h +++ b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H #define SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/wrapper/svtraits.h" #include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h" @@ -362,5 +362,5 @@ void elementwise_comparison_quantized_op(const ITensor *in1, const ITensor *in2, } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ #endif /* SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H */
\ No newline at end of file diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h index 63490421e9..c2b495f27c 100644 --- a/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h +++ b/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h @@ -25,7 +25,7 @@ #define SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H #include "arm_compute/core/Types.h" -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_compute { @@ -35,5 +35,5 @@ template <typename ScalarType> void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
\ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/fp16.cpp b/src/core/cpu/kernels/scale/sve/fp16.cpp index 5b9377c6e6..76e7735b8a 100644 --- a/src/core/cpu/kernels/scale/sve/fp16.cpp +++ b/src/core/cpu/kernels/scale/sve/fp16.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -173,4 +173,4 @@ void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE
\ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/fp32.cpp b/src/core/cpu/kernels/scale/sve/fp32.cpp index 05fbedf20d..030e109cdf 100644 --- a/src/core/cpu/kernels/scale/sve/fp32.cpp +++ b/src/core/cpu/kernels/scale/sve/fp32.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -171,4 +171,4 @@ void fp32_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE
\ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/integer.cpp b/src/core/cpu/kernels/scale/sve/integer.cpp index d7e270c661..486c674612 100644 --- a/src/core/cpu/kernels/scale/sve/integer.cpp +++ b/src/core/cpu/kernels/scale/sve/integer.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -297,4 +297,4 @@ void s16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, con } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE
\ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/qasymm8.cpp b/src/core/cpu/kernels/scale/sve/qasymm8.cpp index f747037938..c9122ad40b 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -204,4 +204,4 @@ void qasymm8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE)
\ No newline at end of file +#endif // defined(ARM_COMPUTE_ENABLE_SVE)
\ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp index 584ec7a0da..0843e61fd4 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -204,4 +204,4 @@ void qasymm8_signed_sve_scale(const ITensor *src, ITensor *dst, const ITensor *o } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE
\ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file diff --git a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp index 4ed5a4fbea..7a577fd565 100644 --- a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp +++ b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -182,4 +182,4 @@ template void sve_softmax_logits_1d_float<float16_t>(const ITensor *in, const IT ITensor *out, const float beta, bool is_log, const Window &window); } // namespace cpu } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ diff --git a/src/core/cpu/kernels/softmax/impl/sve/list.h b/src/core/cpu/kernels/softmax/impl/sve/list.h index 7ddb358b8e..b4e1e1b186 100644 --- a/src/core/cpu/kernels/softmax/impl/sve/list.h +++ b/src/core/cpu/kernels/softmax/impl/sve/list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H #define SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/SVEMath.h" @@ -42,7 +42,7 @@ template <typename ScalarType> void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, bool is_log, const Window &window); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) template <typename ScalarType> void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, float beta, bool is_log, const Window &window) @@ -215,9 +215,9 @@ void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void }, in_it, max_it, out_it); } -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } // namespace cpu } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #endif /* SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H */ |