diff options
21 files changed, 424 insertions, 82 deletions
diff --git a/Android.bp b/Android.bp index 413c388285..727e7c11a5 100644 --- a/Android.bp +++ b/Android.bp @@ -442,13 +442,20 @@ cc_library_static { "src/cpu/kernels/activation/sve/qasymm8.cpp", "src/cpu/kernels/activation/sve/qasymm8_signed.cpp", "src/cpu/kernels/activation/sve/qsymm16.cpp", - "src/cpu/kernels/add/neon/qasymm8.cpp", - "src/cpu/kernels/add/neon/qasymm8_signed.cpp", - "src/cpu/kernels/add/neon/qsymm16.cpp", - "src/cpu/kernels/add/sve/impl.cpp", - "src/cpu/kernels/add/sve/qasymm8.cpp", - "src/cpu/kernels/add/sve/qasymm8_signed.cpp", - "src/cpu/kernels/add/sve/qsymm16.cpp", + "src/cpu/kernels/add/generic/neon/fp16.cpp", + "src/cpu/kernels/add/generic/neon/fp32.cpp", + "src/cpu/kernels/add/generic/neon/impl.cpp", + "src/cpu/kernels/add/generic/neon/integer.cpp", + "src/cpu/kernels/add/generic/neon/qasymm8.cpp", + "src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/add/generic/neon/qsymm16.cpp", + "src/cpu/kernels/add/generic/sve/fp16.cpp", + "src/cpu/kernels/add/generic/sve/fp32.cpp", + "src/cpu/kernels/add/generic/sve/impl.cpp", + "src/cpu/kernels/add/generic/sve/integer.cpp", + "src/cpu/kernels/add/generic/sve2/qasymm8.cpp", + "src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp", + "src/cpu/kernels/add/generic/sve2/qsymm16.cpp", "src/cpu/kernels/elementwise/sve/elementwise.cpp", "src/cpu/kernels/elementwise/sve/elementwise_unary.cpp", "src/cpu/kernels/floor/neon/fp16.cpp", diff --git a/filelist.json b/filelist.json index 0e7dd4ac4f..3a6afcd237 100644 --- a/filelist.json +++ b/filelist.json @@ -874,15 +874,24 @@ "src/cpu/operators/CpuAdd.cpp", "src/cpu/kernels/CpuAddKernel.cpp", "src/runtime/NEON/functions/NEArithmeticAddition.cpp", - "src/cpu/kernels/add/neon/qasymm8.cpp", - "src/cpu/kernels/add/neon/qasymm8_signed.cpp", - "src/cpu/kernels/add/neon/qsymm16.cpp" + "src/cpu/kernels/add/generic/neon/qasymm8.cpp", + "src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/add/generic/neon/qsymm16.cpp" ], + "neon": { + "common": [ "src/cpu/kernels/add/generic/neon/impl.cpp" ], + "fp32":["src/cpu/kernels/add/generic/neon/fp32.cpp"], + "fp16":["src/cpu/kernels/add/generic/neon/fp16.cpp"], + "integer":["src/cpu/kernels/add/generic/neon/integer.cpp"] + }, "sve": { - "common": [ "src/cpu/kernels/add/sve/impl.cpp" ], - "qasymm8": [ "src/cpu/kernels/add/sve/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/add/sve/qsymm16.cpp" ] + "common": [ "src/cpu/kernels/add/generic/sve/impl.cpp" ], + "fp32":["src/cpu/kernels/add/generic/sve/fp32.cpp"], + "fp16":["src/cpu/kernels/add/generic/sve/fp16.cpp"], + "integer":["src/cpu/kernels/add/generic/sve/integer.cpp"], + "qasymm8": [ "src/cpu/kernels/add/generic/sve2/qasymm8.cpp" ], + "qasymm8_signed": [ "src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp" ], + "qsymm16": [ "src/cpu/kernels/add/generic/sve2/qsymm16.cpp" ] } } }, diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h index 65f6c7093d..c7fbf7f831 100644 --- a/src/core/common/Registrars.h +++ b/src/core/common/Registrars.h @@ -32,6 +32,12 @@ #define REGISTER_FP16_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_FP16_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_FP16_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #define REGISTER_FP16_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ @@ -41,6 +47,7 @@ #else /* !defined(ENABLE_FP16_KERNELS) */ #define REGISTER_FP16_NEON(func_name) nullptr #define REGISTER_FP16_SVE(func_name) nullptr +#define REGISTER_FP16_SVE2(func_name) nullptr #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ #if defined(ENABLE_FP32_KERNELS) @@ -51,6 +58,12 @@ #define REGISTER_FP32_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_FP32_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_FP32_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_FP32_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ @@ -60,6 +73,7 @@ #else /* defined(ENABLE_FP32_KERNELS) */ #define REGISTER_FP32_NEON(func_name) nullptr #define REGISTER_FP32_SVE(func_name) nullptr +#define REGISTER_FP32_SVE2(func_name) nullptr #endif /* defined(ENABLE_FP32_KERNELS) */ #if defined(ENABLE_QASYMM8_SIGNED_KERNELS) @@ -72,9 +86,16 @@ #define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */ #define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr #define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr +#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr #endif /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */ #if defined(ENABLE_QASYMM8_KERNELS) @@ -86,9 +107,16 @@ #define REGISTER_QASYMM8_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_QASYMM8_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_QASYMM8_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #else /* defined(ENABLE_QASYMM8_KERNELS) */ #define REGISTER_QASYMM8_NEON(func_name) nullptr #define REGISTER_QASYMM8_SVE(func_name) nullptr +#define REGISTER_QASYMM8_SVE2(func_name) nullptr #endif /* defined(ENABLE_QASYMM8_KERNELS) */ #if defined(ENABLE_QSYMM16_KERNELS) @@ -101,9 +129,16 @@ #define REGISTER_QSYMM16_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_QSYMM16_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_QSYMM16_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #else /* defined(ENABLE_QSYMM16_KERNELS) */ #define REGISTER_QSYMM16_NEON(func_name) nullptr #define REGISTER_QSYMM16_SVE(func_name) nullptr +#define REGISTER_QSYMM16_SVE2(func_name) nullptr #endif /* defined(ENABLE_QSYMM16_KERNELS) */ #if defined(ENABLE_INTEGER_KERNELS) @@ -114,6 +149,12 @@ #define REGISTER_INTEGER_SVE(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) +#define REGISTER_INTEGER_SVE2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#define REGISTER_INTEGER_SVE2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ + #if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_INTEGER_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ @@ -123,6 +164,7 @@ #else /* defined(ENABLE_INTEGER_KERNELS) */ #define REGISTER_INTEGER_NEON(func_name) nullptr #define REGISTER_INTEGER_SVE(func_name) nullptr +#define REGISTER_INTEGER_SVE2(func_name) nullptr #endif /* defined(ENABLE_INTEGER_KERNELS) */ #endif /* SRC_CORE_COMMON_REGISTRARS_H */ diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp index 73c1fda711..0c3540f0d4 100644 --- a/src/cpu/kernels/CpuAddKernel.cpp +++ b/src/cpu/kernels/CpuAddKernel.cpp @@ -30,9 +30,7 @@ #include "src/core/common/Registrars.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" -#include "src/cpu/kernels/add/neon/list.h" -#include "src/cpu/kernels/add/sve/list.h" - +#include "src/cpu/kernels/add/list.h" #include <array> namespace arm_compute @@ -67,7 +65,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); }, - REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve) + REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2) }, { "sve2_qs8_add", @@ -75,7 +73,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); }, - REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve) + REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2) }, { "sve2_qs16_add", @@ -83,7 +81,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::QSYMM16) && data.ci.has_sve2(); }, - REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve) + REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2) }, #endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ #if defined(ARM_COMPUTE_ENABLE_SVE) @@ -93,7 +91,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::F32) && data.ci.has_sve(); }, - REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>) + REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve) }, { "sve_fp16_add", @@ -101,7 +99,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::F16) && data.ci.has_sve(); }, - REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>) + REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve) }, { "sve_u8_add", @@ -109,7 +107,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::U8) && data.ci.has_sve(); }, - REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>) + REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve) }, { "sve_s16_add", @@ -117,7 +115,7 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::S16) && data.ci.has_sve(); }, - REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>) + REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve) }, { "sve_s32_add", @@ -125,14 +123,14 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::S32) && data.ci.has_sve(); }, - REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>) + REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve) }, #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_fp32_add", [](const AddSelectorData & data) { return (data.dt == DataType::F32); }, - REGISTER_FP32_NEON(arm_compute::cpu::add_same_neon<float>) + REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { @@ -141,23 +139,23 @@ static const AddKernel available_kernels[] = { return (data.dt == DataType::F16) && data.ci.has_fp16(); }, - REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>) + REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "neon_u8_add", [](const AddSelectorData & data) { return (data.dt == DataType::U8); }, - REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<uint8_t>) + REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon) }, { "neon_s16_add", [](const AddSelectorData & data) { return (data.dt == DataType::S16); }, - REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int16_t>) + REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon) }, { "neon_s32_add", [](const AddSelectorData & data) { return (data.dt == DataType::S32); }, - REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int32_t>) + REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon) }, #endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ #if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) @@ -295,12 +293,12 @@ const char *CpuAddKernel::name() const size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const { ARM_COMPUTE_UNUSED(thread_count); - // Tuning results that gave optimized results in performance investigation - if (platform.get_cpu_model() == CPUModel::A73 ) + // Tuning results that gave optimized results in performance investigation + if(platform.get_cpu_model() == CPUModel::A73) { return 10240; } - else + else { return 9216; } diff --git a/src/cpu/kernels/add/generic/neon/fp16.cpp b/src/cpu/kernels/add/generic/neon/fp16.cpp new file mode 100644 index 0000000000..12d4a467b7 --- /dev/null +++ b/src/cpu/kernels/add/generic/neon/fp16.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +#include "src/cpu/kernels/add/generic/neon/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_fp16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_neon<float16_t>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute +#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/add/generic/neon/fp32.cpp b/src/cpu/kernels/add/generic/neon/fp32.cpp new file mode 100644 index 0000000000..3563162fce --- /dev/null +++ b/src/cpu/kernels/add/generic/neon/fp32.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/add/generic/neon/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_fp32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_neon<float>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute diff --git a/src/cpu/kernels/add/neon/list.h b/src/cpu/kernels/add/generic/neon/impl.cpp index 379bd32fb1..ad3e445ab0 100644 --- a/src/cpu/kernels/add/neon/list.h +++ b/src/cpu/kernels/add/generic/neon/impl.cpp @@ -21,26 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_NEON_KERNELS_ADD_LIST_H -#define SRC_CORE_NEON_KERNELS_ADD_LIST_H -#include "arm_compute/core/Types.h" +#include "src/cpu/kernels/add/generic/neon/impl.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/wrapper/wrapper.h" - namespace arm_compute { namespace cpu { -#define DECLARE_ADD_KERNEL(func_name) \ - void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) - -DECLARE_ADD_KERNEL(add_qasymm8_neon); -DECLARE_ADD_KERNEL(add_qasymm8_signed_neon); -DECLARE_ADD_KERNEL(add_qsymm16_neon); - -#undef DECLARE_ADD_KERNEL - template <typename ScalarType> void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) { @@ -138,6 +127,15 @@ void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const input1, input2, output); } } + +template void add_same_neon<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +template void add_same_neon<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +template void add_same_neon<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +template void add_same_neon<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); + +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) +template void add_same_neon<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ + } // namespace cpu } // namespace arm_compute -#endif // SRC_CORE_NEON_KERNELS_ADD_LIST_H diff --git a/src/cpu/kernels/add/generic/neon/impl.h b/src/cpu/kernels/add/generic/neon/impl.h new file mode 100644 index 0000000000..07afdda225 --- /dev/null +++ b/src/cpu/kernels/add/generic/neon/impl.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_KERNELS_ADD_IMPL_H +#define SRC_CORE_NEON_KERNELS_ADD_IMPL_H +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +namespace arm_compute +{ +namespace cpu +{ +template <typename ScalarType> +void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +} // namespace cpu +} // namespace arm_compute +#endif // SRC_CORE_NEON_KERNELS_ADD_IMPL_H
\ No newline at end of file diff --git a/src/cpu/kernels/add/generic/neon/integer.cpp b/src/cpu/kernels/add/generic/neon/integer.cpp new file mode 100644 index 0000000000..62c19e66b1 --- /dev/null +++ b/src/cpu/kernels/add/generic/neon/integer.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/add/generic/neon/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_u8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_neon<uint8_t>(src0, src1, dst, policy, window); +} + +void add_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_neon<int16_t>(src0, src1, dst, policy, window); +} + +void add_s32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_neon<int32_t>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute diff --git a/src/cpu/kernels/add/neon/qasymm8.cpp b/src/cpu/kernels/add/generic/neon/qasymm8.cpp index e357a7ef7f..e357a7ef7f 100644 --- a/src/cpu/kernels/add/neon/qasymm8.cpp +++ b/src/cpu/kernels/add/generic/neon/qasymm8.cpp diff --git a/src/cpu/kernels/add/neon/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp index d62d0739f5..d62d0739f5 100644 --- a/src/cpu/kernels/add/neon/qasymm8_signed.cpp +++ b/src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp diff --git a/src/cpu/kernels/add/neon/qsymm16.cpp b/src/cpu/kernels/add/generic/neon/qsymm16.cpp index e76e408d6e..e76e408d6e 100644 --- a/src/cpu/kernels/add/neon/qsymm16.cpp +++ b/src/cpu/kernels/add/generic/neon/qsymm16.cpp diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp new file mode 100644 index 0000000000..71056a0a48 --- /dev/null +++ b/src/cpu/kernels/add/generic/sve/fp16.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +#include "src/cpu/kernels/add/generic/sve/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_fp16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_sve<float16_t>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute +#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/add/generic/sve/fp32.cpp b/src/cpu/kernels/add/generic/sve/fp32.cpp new file mode 100644 index 0000000000..8f651b3ed2 --- /dev/null +++ b/src/cpu/kernels/add/generic/sve/fp32.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(ARM_COMPUTE_ENABLE_SVE) +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "src/cpu/kernels/add/generic/sve/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_fp32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_sve<float>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute +#endif //ARM_COMPUTE_ENABLE_SVE diff --git a/src/cpu/kernels/add/sve/impl.cpp b/src/cpu/kernels/add/generic/sve/impl.cpp index f8e16a508c..52429bbe1e 100644 --- a/src/cpu/kernels/add/sve/impl.cpp +++ b/src/cpu/kernels/add/generic/sve/impl.cpp @@ -21,17 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) +#include "src/cpu/kernels/add/generic/sve/impl.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" - #include "src/core/NEON/SVEMath.h" -#include "src/cpu/kernels/add/sve/impl.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include <arm_sve.h> - namespace arm_compute { namespace cpu @@ -128,12 +124,13 @@ void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const input1, input2, output); } } - template void add_same_sve<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); template void add_same_sve<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); template void add_same_sve<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); template void add_same_sve<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) +template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); +#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE) */
\ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/impl.h index 32ff5d0496..59f39e90c9 100644 --- a/src/cpu/kernels/add/sve/impl.h +++ b/src/cpu/kernels/add/generic/sve/impl.h @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#if defined(ARM_COMPUTE_ENABLE_SVE) #ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H #define SRC_CORE_SVE_KERNELS_ADD_IMPL_H - -#if defined(ARM_COMPUTE_ENABLE_SVE) +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/Window.h" namespace arm_compute { @@ -36,5 +36,5 @@ template <typename ScalarType> void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); } // namespace cpu } // namespace arm_compute -#endif // defined(ARM_COMPUTE_ENABLE_SVE) -#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file +#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/cpu/kernels/add/generic/sve/integer.cpp b/src/cpu/kernels/add/generic/sve/integer.cpp new file mode 100644 index 0000000000..d197717cf0 --- /dev/null +++ b/src/cpu/kernels/add/generic/sve/integer.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(ARM_COMPUTE_ENABLE_SVE) +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "src/cpu/kernels/add/generic/sve/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void add_u8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_sve<uint8_t>(src0, src1, dst, policy, window); +} + +void add_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_sve<int16_t>(src0, src1, dst, policy, window); +} + +void add_s32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +{ + return add_same_sve<int32_t>(src0, src1, dst, policy, window); +} +} +} // namespace arm_compute +#endif //(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/cpu/kernels/add/sve/qasymm8.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp index 888ad878ca..c61089e937 100644 --- a/src/cpu/kernels/add/sve/qasymm8.cpp +++ b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #if defined(ARM_COMPUTE_ENABLE_SVE2) + #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -34,7 +35,7 @@ namespace arm_compute { namespace cpu { -void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +void add_qasymm8_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) { ARM_COMPUTE_UNUSED(policy); @@ -179,4 +180,4 @@ void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file +#endif //ARM_COMPUTE_ENABLE_SVE2 diff --git a/src/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp index 3b922c6c21..9ac138aaef 100644 --- a/src/cpu/kernels/add/sve/qasymm8_signed.cpp +++ b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp @@ -34,7 +34,7 @@ namespace arm_compute { namespace cpu { -void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +void add_qasymm8_signed_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) { ARM_COMPUTE_UNUSED(policy); @@ -178,4 +178,4 @@ void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *d } } // namespace cpu } // namespace arm_compute -#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file +#endif //ARM_COMPUTE_ENABLE_SVE2 diff --git a/src/cpu/kernels/add/sve/qsymm16.cpp b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp index eef5d245d3..f148872c17 100644 --- a/src/cpu/kernels/add/sve/qsymm16.cpp +++ b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp @@ -34,7 +34,7 @@ namespace arm_compute { namespace cpu { -void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) +void add_qsymm16_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) { ARM_COMPUTE_UNUSED(policy); @@ -153,4 +153,4 @@ void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file +#endif //ARM_COMPUTE_ENABLE_SVE2 diff --git a/src/cpu/kernels/add/sve/list.h b/src/cpu/kernels/add/list.h index 4529a9f7c1..9d7c9a67ff 100644 --- a/src/cpu/kernels/add/sve/list.h +++ b/src/cpu/kernels/add/list.h @@ -21,16 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H -#define SRC_CORE_SVE_KERNELS_ADD_LIST_H +#ifndef SRC_CORE_KERNELS_ADD_LIST_H +#define SRC_CORE_KERNELS_ADD_LIST_H -#if defined(ARM_COMPUTE_ENABLE_SVE) -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/SVEMath.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" -#include "src/cpu/kernels/add/sve/impl.h" -#include <arm_sve.h> +#include "src/cpu/kernels/add/generic/neon/impl.h" +#include "src/cpu/kernels/add/generic/sve/impl.h" namespace arm_compute { @@ -39,13 +34,25 @@ namespace cpu #define DECLARE_ADD_KERNEL(func_name) \ void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -DECLARE_ADD_KERNEL(add_qasymm8_sve); -DECLARE_ADD_KERNEL(add_qasymm8_signed_sve); -DECLARE_ADD_KERNEL(add_qsymm16_sve); +DECLARE_ADD_KERNEL(add_qasymm8_neon); +DECLARE_ADD_KERNEL(add_qasymm8_signed_neon); +DECLARE_ADD_KERNEL(add_qsymm16_neon); +DECLARE_ADD_KERNEL(add_fp32_neon); +DECLARE_ADD_KERNEL(add_fp16_neon); +DECLARE_ADD_KERNEL(add_u8_neon); +DECLARE_ADD_KERNEL(add_s16_neon); +DECLARE_ADD_KERNEL(add_s32_neon); +DECLARE_ADD_KERNEL(add_fp32_sve); +DECLARE_ADD_KERNEL(add_fp16_sve); +DECLARE_ADD_KERNEL(add_u8_sve); +DECLARE_ADD_KERNEL(add_s16_sve); +DECLARE_ADD_KERNEL(add_s32_sve); +DECLARE_ADD_KERNEL(add_qasymm8_sve2); +DECLARE_ADD_KERNEL(add_qasymm8_signed_sve2); +DECLARE_ADD_KERNEL(add_qsymm16_sve2); #undef DECLARE_ADD_KERNEL } // namespace cpu } // namespace arm_compute -#endif // defined(ARM_COMPUTE_ENABLE_SVE) -#endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H
\ No newline at end of file +#endif // SRC_CORE_KERNELS_ADD_LIST_H
\ No newline at end of file |