diff options
author | SiCong Li <sicong.li@arm.com> | 2023-05-17 13:46:13 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2023-05-17 15:20:17 +0000 |
commit | c0463a2959c84e8aa28f39ed2faa035678e682d8 (patch) | |
tree | 3aadfd440b51a919c07517b14a0ae74c83c13033 | |
parent | f15c615e79eb13c7457288e79b8a906bc68dfa1a (diff) | |
download | ComputeLibrary-c0463a2959c84e8aa28f39ed2faa035678e682d8.tar.gz |
Move lut kernel to sve2 category
This specific Lut kernel uses sve2 instructions
Resolves: COMPMID-6268
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I44fa3812e96fa79b3d1e1e3a31d587581f59f0e1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9675
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | filelist.json | 22 | ||||
-rw-r--r-- | src/BUILD.bazel | 8 | ||||
-rw-r--r-- | src/CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/cpu/kernels/CpuActivationKernel.cpp | 8 | ||||
-rw-r--r-- | src/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 6 | ||||
-rw-r--r-- | src/cpu/kernels/activation/generic/sve2/lut.cpp (renamed from src/cpu/kernels/activation/generic/sve/lut.cpp) | 4 | ||||
-rw-r--r-- | src/cpu/kernels/activation/list.h | 4 | ||||
-rw-r--r-- | src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp (renamed from src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp) | 12 | ||||
-rw-r--r-- | src/cpu/kernels/elementwise_unary/list.h | 2 | ||||
-rw-r--r-- | src/cpu/kernels/lut/generic/sve2/u8.cpp (renamed from src/cpu/kernels/lut/generic/sve/u8.cpp) | 3 | ||||
-rw-r--r-- | src/cpu/kernels/lut/list.h | 2 |
11 files changed, 42 insertions, 40 deletions
diff --git a/filelist.json b/filelist.json index d87373623f..f5966c53d1 100644 --- a/filelist.json +++ b/filelist.json @@ -891,11 +891,13 @@ }, "sve": { "fp16": [ "src/cpu/kernels/activation/generic/sve/fp16.cpp" ], - "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ], - "qasymm8": ["src/cpu/kernels/activation/generic/sve/lut.cpp"] + "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ] }, "sve2":{ - "qasymm8": [ "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp" ], + "qasymm8": [ + "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp", + "src/cpu/kernels/activation/generic/sve2/lut.cpp" + ], "qasymm8_signed": [ "src/cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp" ], "qsymm16": [ "src/cpu/kernels/activation/generic/sve2/qsymm16.cpp" ] } @@ -1454,9 +1456,11 @@ "common": ["src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp" ], "integer": ["src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp"], "fp32": ["src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp"], - "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"], - "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"], - "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"] + "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"] + }, + "sve2": { + "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"], + "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"] } } }, @@ -1802,9 +1806,9 @@ "qasymm8": ["src/cpu/kernels/lut/generic/neon/u8.cpp"], "qasymm8_signed": ["src/cpu/kernels/lut/generic/neon/u8.cpp"] }, - "sve": { - "qasymm8": ["src/cpu/kernels/lut/generic/sve/u8.cpp"], - "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve/u8.cpp"] + "sve2": { + "qasymm8": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"], + "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"] } } }, diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 8f35c7a659..12dc1ca340 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -107,7 +107,8 @@ filegroup( filegroup( name = "arm_compute_sve2_srcs", - srcs = ["cpu/kernels/activation/generic/sve2/qasymm8.cpp", + srcs = ["cpu/kernels/activation/generic/sve2/lut.cpp", + "cpu/kernels/activation/generic/sve2/qasymm8.cpp", "cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp", "cpu/kernels/activation/generic/sve2/qsymm16.cpp", "cpu/kernels/add/generic/sve2/qasymm8.cpp", @@ -115,6 +116,8 @@ filegroup( "cpu/kernels/add/generic/sve2/qsymm16.cpp", "cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp", "cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp", + "cpu/kernels/elementwise_unary/generic/sve2/q8.cpp", + "cpu/kernels/lut/generic/sve2/u8.cpp", "cpu/kernels/softmax/generic/sve2/impl.cpp", "cpu/kernels/softmax/generic/sve2/qasymm8.cpp", "cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp"] + @@ -323,7 +326,6 @@ filegroup( "core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp", "cpu/kernels/activation/generic/sve/fp16.cpp", "cpu/kernels/activation/generic/sve/fp32.cpp", - "cpu/kernels/activation/generic/sve/lut.cpp", "cpu/kernels/add/generic/sve/fp16.cpp", "cpu/kernels/add/generic/sve/fp32.cpp", "cpu/kernels/add/generic/sve/impl.cpp", @@ -336,8 +338,6 @@ filegroup( "cpu/kernels/elementwise_unary/generic/sve/fp32.cpp", "cpu/kernels/elementwise_unary/generic/sve/impl.cpp", "cpu/kernels/elementwise_unary/generic/sve/integer.cpp", - "cpu/kernels/elementwise_unary/generic/sve/q8.cpp", - "cpu/kernels/lut/generic/sve/u8.cpp", "cpu/kernels/scale/sve/fp16.cpp", "cpu/kernels/scale/sve/fp32.cpp", "cpu/kernels/scale/sve/integer.cpp", diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb48692a72..5d756da568 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -302,7 +302,6 @@ target_sources( core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp cpu/kernels/activation/generic/sve/fp16.cpp cpu/kernels/activation/generic/sve/fp32.cpp - cpu/kernels/activation/generic/sve/lut.cpp cpu/kernels/add/generic/sve/fp16.cpp cpu/kernels/add/generic/sve/fp32.cpp cpu/kernels/add/generic/sve/impl.cpp @@ -315,8 +314,6 @@ target_sources( cpu/kernels/elementwise_unary/generic/sve/fp32.cpp cpu/kernels/elementwise_unary/generic/sve/impl.cpp cpu/kernels/elementwise_unary/generic/sve/integer.cpp - cpu/kernels/elementwise_unary/generic/sve/q8.cpp - cpu/kernels/lut/generic/sve/u8.cpp cpu/kernels/scale/sve/fp16.cpp cpu/kernels/scale/sve/fp32.cpp cpu/kernels/scale/sve/integer.cpp @@ -332,7 +329,8 @@ target_sources( target_sources( arm_compute_sve2 PRIVATE - cpu/kernels/activation/generic/sve2/qasymm8.cpp + cpu/kernels/activation/generic/sve2/lut.cpp + cpu/kernels/activation/generic/sve2/qasymm8.cpp cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp cpu/kernels/activation/generic/sve2/qsymm16.cpp cpu/kernels/add/generic/sve2/qasymm8.cpp @@ -340,6 +338,8 @@ target_sources( cpu/kernels/add/generic/sve2/qsymm16.cpp cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp + cpu/kernels/elementwise_unary/generic/sve2/q8.cpp + cpu/kernels/lut/generic/sve2/u8.cpp cpu/kernels/softmax/generic/sve2/impl.cpp cpu/kernels/softmax/generic/sve2/qasymm8.cpp cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp @@ -976,5 +976,4 @@ target_sources( runtime/Tensor.cpp runtime/TensorAllocator.cpp runtime/Utils.cpp -) -
\ No newline at end of file +)
\ No newline at end of file diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 04a9731f4a..20a8489cdd 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,9 +47,9 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel { #ifdef ARM_COMPUTE_ENABLE_SVE { - "sve_q8_activation_lut", - [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve; }, - REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_q8_activation_lut) + "sve2_q8_activation_lut", + [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve2; }, + REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_q8_activation_lut) }, #endif // ARM_COMPUTE_ENABLE_SVE #ifdef __aarch64__ diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp index dbb752aef3..04a7f15715 100644 --- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -167,12 +167,12 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai }, #ifdef __aarch64__ { - "sve_q8_elementwise_unary", + "sve2_q8_elementwise_unary", [](const DataTypeISASelectorData & data) { - return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve; + return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2; }, - REGISTER_QASYMM8_SVE(sve_q8_elementwise_unary), + REGISTER_QASYMM8_SVE2(sve2_q8_elementwise_unary), &q8_prepare_lut, }, { diff --git a/src/cpu/kernels/activation/generic/sve/lut.cpp b/src/cpu/kernels/activation/generic/sve2/lut.cpp index b4042662b9..2e5975744b 100644 --- a/src/cpu/kernels/activation/generic/sve/lut.cpp +++ b/src/cpu/kernels/activation/generic/sve2/lut.cpp @@ -30,7 +30,7 @@ namespace arm_compute namespace cpu { #ifdef __aarch64__ -void sve_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) +void sve2_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) { ARM_COMPUTE_ERROR_ON(!ActivationLayerInfo::is_lut_supported(act_info.activation(), src->info()->data_type())); const auto window_end_x = window.x().end(); @@ -42,7 +42,7 @@ void sve_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLay { const auto input_ptr = input.ptr(); auto output_ptr = output.ptr(); - lut_u8_sve(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr); + lut_u8_sve2(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr); }, input, output); } diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h index c2149b38ff..6550ddfeca 100644 --- a/src/cpu/kernels/activation/list.h +++ b/src/cpu/kernels/activation/list.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Arm Limited. + * Copyright (c) 2020-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace cpu #ifdef __aarch64__ DECLARE_ACTIVATION_KERNEL(neon_q8_activation_lut); #endif // __aarch64__ -DECLARE_ACTIVATION_KERNEL(sve_q8_activation_lut); +DECLARE_ACTIVATION_KERNEL(sve2_q8_activation_lut); DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation); DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_activation); DECLARE_ACTIVATION_KERNEL(neon_qasymm8_signed_activation); diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp index b68f691086..7e32f50132 100644 --- a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp +++ b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp @@ -29,23 +29,23 @@ namespace arm_compute { namespace cpu { - -void sve_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) +void sve2_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) { ARM_COMPUTE_UNUSED(op); - auto win = window; + auto win = window; const auto window_end_x = window.x().end(); win.set(0, Window::Dimension(0, 1, 1)); Iterator src_it(in, win); Iterator dst_it(out, win); - execute_window_loop(win, [&](const Coordinates &) { + execute_window_loop(win, [&](const Coordinates &) + { const auto src_ptr = src_it.ptr(); - auto dst_ptr = dst_it.ptr(); + auto dst_ptr = dst_it.ptr(); - lut_u8_sve(lut, 1, window_end_x, &src_ptr, &dst_ptr); + lut_u8_sve2(lut, 1, window_end_x, &src_ptr, &dst_ptr); }, src_it, dst_it); } diff --git a/src/cpu/kernels/elementwise_unary/list.h b/src/cpu/kernels/elementwise_unary/list.h index 432fabf4af..a9701afdd8 100644 --- a/src/cpu/kernels/elementwise_unary/list.h +++ b/src/cpu/kernels/elementwise_unary/list.h @@ -37,7 +37,7 @@ namespace cpu DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp32_elementwise_unary); DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp16_elementwise_unary); DECLARE_ELEMETWISE_UNARY_KERNEL(sve_s32_elementwise_unary); -DECLARE_ELEMETWISE_UNARY_KERNEL(sve_q8_elementwise_unary); +DECLARE_ELEMETWISE_UNARY_KERNEL(sve2_q8_elementwise_unary); DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp32_elementwise_unary); DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp16_elementwise_unary); DECLARE_ELEMETWISE_UNARY_KERNEL(neon_s32_elementwise_unary); diff --git a/src/cpu/kernels/lut/generic/sve/u8.cpp b/src/cpu/kernels/lut/generic/sve2/u8.cpp index 70f3a2e6fb..b80d75326e 100644 --- a/src/cpu/kernels/lut/generic/sve/u8.cpp +++ b/src/cpu/kernels/lut/generic/sve2/u8.cpp @@ -31,8 +31,7 @@ namespace arm_compute { namespace cpu { - -void lut_u8_sve( +void lut_u8_sve2( const uint8_t *table, size_t num_strings, size_t string_length, diff --git a/src/cpu/kernels/lut/list.h b/src/cpu/kernels/lut/list.h index 9749b91cfe..7a2afc6927 100644 --- a/src/cpu/kernels/lut/list.h +++ b/src/cpu/kernels/lut/list.h @@ -43,7 +43,7 @@ namespace cpu uint8_t *const *output) DECLARE_LUT_KERNEL(lut_u8_neon); -DECLARE_LUT_KERNEL(lut_u8_sve); +DECLARE_LUT_KERNEL(lut_u8_sve2); #undef DECLARE_LUT_KERNEL #endif // __aarch64__ |