aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-05-17 13:46:13 +0100
committerSiCong Li <sicong.li@arm.com>2023-05-17 16:21:31 +0100
commit3f70cd053573cb6140990ab619ead865f55f3139 (patch)
tree6ed9cf0222c4b0ab1def1c9276bef41767accd36
parent81ca48606cea2220a83ae8d736d3935bcc17f854 (diff)
downloadComputeLibrary-branches/arm_compute_23_05.tar.gz
Move lut kernel to sve2 categoryv23.05branches/arm_compute_23_05
This specific Lut kernel uses sve2 instructions Resolves: COMPMID-6268 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I44fa3812e96fa79b3d1e1e3a31d587581f59f0e1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9675 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--filelist.json22
-rw-r--r--src/BUILD.bazel8
-rw-r--r--src/CMakeLists.txt11
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp8
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.cpp6
-rw-r--r--src/cpu/kernels/activation/generic/sve2/lut.cpp (renamed from src/cpu/kernels/activation/generic/sve/lut.cpp)4
-rw-r--r--src/cpu/kernels/activation/list.h4
-rw-r--r--src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp (renamed from src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp)12
-rw-r--r--src/cpu/kernels/elementwise_unary/list.h2
-rw-r--r--src/cpu/kernels/lut/generic/sve2/u8.cpp (renamed from src/cpu/kernels/lut/generic/sve/u8.cpp)3
-rw-r--r--src/cpu/kernels/lut/list.h2
11 files changed, 42 insertions, 40 deletions
diff --git a/filelist.json b/filelist.json
index 807000a623..c12873ad63 100644
--- a/filelist.json
+++ b/filelist.json
@@ -891,11 +891,13 @@
},
"sve": {
"fp16": [ "src/cpu/kernels/activation/generic/sve/fp16.cpp" ],
- "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ],
- "qasymm8": ["src/cpu/kernels/activation/generic/sve/lut.cpp"]
+ "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ]
},
"sve2":{
- "qasymm8": [ "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp" ],
+ "qasymm8": [
+ "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp",
+ "src/cpu/kernels/activation/generic/sve2/lut.cpp"
+ ],
"qasymm8_signed": [ "src/cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp" ],
"qsymm16": [ "src/cpu/kernels/activation/generic/sve2/qsymm16.cpp" ]
}
@@ -1454,9 +1456,11 @@
"common": ["src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp" ],
"integer": ["src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp"],
"fp32": ["src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp"],
- "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"],
- "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"],
- "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"]
+ "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"]
+ },
+ "sve2": {
+ "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"],
+ "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"]
}
}
},
@@ -1802,9 +1806,9 @@
"qasymm8": ["src/cpu/kernels/lut/generic/neon/u8.cpp"],
"qasymm8_signed": ["src/cpu/kernels/lut/generic/neon/u8.cpp"]
},
- "sve": {
- "qasymm8": ["src/cpu/kernels/lut/generic/sve/u8.cpp"],
- "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve/u8.cpp"]
+ "sve2": {
+ "qasymm8": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"],
+ "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"]
}
}
},
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index 8f35c7a659..12dc1ca340 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -107,7 +107,8 @@ filegroup(
filegroup(
name = "arm_compute_sve2_srcs",
- srcs = ["cpu/kernels/activation/generic/sve2/qasymm8.cpp",
+ srcs = ["cpu/kernels/activation/generic/sve2/lut.cpp",
+ "cpu/kernels/activation/generic/sve2/qasymm8.cpp",
"cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp",
"cpu/kernels/activation/generic/sve2/qsymm16.cpp",
"cpu/kernels/add/generic/sve2/qasymm8.cpp",
@@ -115,6 +116,8 @@ filegroup(
"cpu/kernels/add/generic/sve2/qsymm16.cpp",
"cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp",
"cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp",
+ "cpu/kernels/elementwise_unary/generic/sve2/q8.cpp",
+ "cpu/kernels/lut/generic/sve2/u8.cpp",
"cpu/kernels/softmax/generic/sve2/impl.cpp",
"cpu/kernels/softmax/generic/sve2/qasymm8.cpp",
"cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp"] +
@@ -323,7 +326,6 @@ filegroup(
"core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp",
"cpu/kernels/activation/generic/sve/fp16.cpp",
"cpu/kernels/activation/generic/sve/fp32.cpp",
- "cpu/kernels/activation/generic/sve/lut.cpp",
"cpu/kernels/add/generic/sve/fp16.cpp",
"cpu/kernels/add/generic/sve/fp32.cpp",
"cpu/kernels/add/generic/sve/impl.cpp",
@@ -336,8 +338,6 @@ filegroup(
"cpu/kernels/elementwise_unary/generic/sve/fp32.cpp",
"cpu/kernels/elementwise_unary/generic/sve/impl.cpp",
"cpu/kernels/elementwise_unary/generic/sve/integer.cpp",
- "cpu/kernels/elementwise_unary/generic/sve/q8.cpp",
- "cpu/kernels/lut/generic/sve/u8.cpp",
"cpu/kernels/scale/sve/fp16.cpp",
"cpu/kernels/scale/sve/fp32.cpp",
"cpu/kernels/scale/sve/integer.cpp",
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cb48692a72..5d756da568 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -302,7 +302,6 @@ target_sources(
core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp
cpu/kernels/activation/generic/sve/fp16.cpp
cpu/kernels/activation/generic/sve/fp32.cpp
- cpu/kernels/activation/generic/sve/lut.cpp
cpu/kernels/add/generic/sve/fp16.cpp
cpu/kernels/add/generic/sve/fp32.cpp
cpu/kernels/add/generic/sve/impl.cpp
@@ -315,8 +314,6 @@ target_sources(
cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
cpu/kernels/elementwise_unary/generic/sve/impl.cpp
cpu/kernels/elementwise_unary/generic/sve/integer.cpp
- cpu/kernels/elementwise_unary/generic/sve/q8.cpp
- cpu/kernels/lut/generic/sve/u8.cpp
cpu/kernels/scale/sve/fp16.cpp
cpu/kernels/scale/sve/fp32.cpp
cpu/kernels/scale/sve/integer.cpp
@@ -332,7 +329,8 @@ target_sources(
target_sources(
arm_compute_sve2
PRIVATE
- cpu/kernels/activation/generic/sve2/qasymm8.cpp
+ cpu/kernels/activation/generic/sve2/lut.cpp
+ cpu/kernels/activation/generic/sve2/qasymm8.cpp
cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp
cpu/kernels/activation/generic/sve2/qsymm16.cpp
cpu/kernels/add/generic/sve2/qasymm8.cpp
@@ -340,6 +338,8 @@ target_sources(
cpu/kernels/add/generic/sve2/qsymm16.cpp
cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp
cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp
+ cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
+ cpu/kernels/lut/generic/sve2/u8.cpp
cpu/kernels/softmax/generic/sve2/impl.cpp
cpu/kernels/softmax/generic/sve2/qasymm8.cpp
cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp
@@ -976,5 +976,4 @@ target_sources(
runtime/Tensor.cpp
runtime/TensorAllocator.cpp
runtime/Utils.cpp
-)
- \ No newline at end of file
+) \ No newline at end of file
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 04a9731f4a..20a8489cdd 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,9 +47,9 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
{
#ifdef ARM_COMPUTE_ENABLE_SVE
{
- "sve_q8_activation_lut",
- [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve; },
- REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_q8_activation_lut)
+ "sve2_q8_activation_lut",
+ [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve2; },
+ REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_q8_activation_lut)
},
#endif // ARM_COMPUTE_ENABLE_SVE
#ifdef __aarch64__
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index dbb752aef3..04a7f15715 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -167,12 +167,12 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai
},
#ifdef __aarch64__
{
- "sve_q8_elementwise_unary",
+ "sve2_q8_elementwise_unary",
[](const DataTypeISASelectorData & data)
{
- return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve;
+ return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2;
},
- REGISTER_QASYMM8_SVE(sve_q8_elementwise_unary),
+ REGISTER_QASYMM8_SVE2(sve2_q8_elementwise_unary),
&q8_prepare_lut,
},
{
diff --git a/src/cpu/kernels/activation/generic/sve/lut.cpp b/src/cpu/kernels/activation/generic/sve2/lut.cpp
index b4042662b9..2e5975744b 100644
--- a/src/cpu/kernels/activation/generic/sve/lut.cpp
+++ b/src/cpu/kernels/activation/generic/sve2/lut.cpp
@@ -30,7 +30,7 @@ namespace arm_compute
namespace cpu
{
#ifdef __aarch64__
-void sve_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void sve2_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
ARM_COMPUTE_ERROR_ON(!ActivationLayerInfo::is_lut_supported(act_info.activation(), src->info()->data_type()));
const auto window_end_x = window.x().end();
@@ -42,7 +42,7 @@ void sve_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLay
{
const auto input_ptr = input.ptr();
auto output_ptr = output.ptr();
- lut_u8_sve(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr);
+ lut_u8_sve2(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr);
},
input, output);
}
diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h
index c2149b38ff..6550ddfeca 100644
--- a/src/cpu/kernels/activation/list.h
+++ b/src/cpu/kernels/activation/list.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ namespace cpu
#ifdef __aarch64__
DECLARE_ACTIVATION_KERNEL(neon_q8_activation_lut);
#endif // __aarch64__
-DECLARE_ACTIVATION_KERNEL(sve_q8_activation_lut);
+DECLARE_ACTIVATION_KERNEL(sve2_q8_activation_lut);
DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation);
DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_activation);
DECLARE_ACTIVATION_KERNEL(neon_qasymm8_signed_activation);
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
index b68f691086..7e32f50132 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
@@ -29,23 +29,23 @@ namespace arm_compute
{
namespace cpu
{
-
-void sve_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+void sve2_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
{
ARM_COMPUTE_UNUSED(op);
- auto win = window;
+ auto win = window;
const auto window_end_x = window.x().end();
win.set(0, Window::Dimension(0, 1, 1));
Iterator src_it(in, win);
Iterator dst_it(out, win);
- execute_window_loop(win, [&](const Coordinates &) {
+ execute_window_loop(win, [&](const Coordinates &)
+ {
const auto src_ptr = src_it.ptr();
- auto dst_ptr = dst_it.ptr();
+ auto dst_ptr = dst_it.ptr();
- lut_u8_sve(lut, 1, window_end_x, &src_ptr, &dst_ptr);
+ lut_u8_sve2(lut, 1, window_end_x, &src_ptr, &dst_ptr);
},
src_it, dst_it);
}
diff --git a/src/cpu/kernels/elementwise_unary/list.h b/src/cpu/kernels/elementwise_unary/list.h
index 432fabf4af..a9701afdd8 100644
--- a/src/cpu/kernels/elementwise_unary/list.h
+++ b/src/cpu/kernels/elementwise_unary/list.h
@@ -37,7 +37,7 @@ namespace cpu
DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp32_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp16_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(sve_s32_elementwise_unary);
-DECLARE_ELEMETWISE_UNARY_KERNEL(sve_q8_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(sve2_q8_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp32_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp16_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_s32_elementwise_unary);
diff --git a/src/cpu/kernels/lut/generic/sve/u8.cpp b/src/cpu/kernels/lut/generic/sve2/u8.cpp
index 70f3a2e6fb..b80d75326e 100644
--- a/src/cpu/kernels/lut/generic/sve/u8.cpp
+++ b/src/cpu/kernels/lut/generic/sve2/u8.cpp
@@ -31,8 +31,7 @@ namespace arm_compute
{
namespace cpu
{
-
-void lut_u8_sve(
+void lut_u8_sve2(
const uint8_t *table,
size_t num_strings,
size_t string_length,
diff --git a/src/cpu/kernels/lut/list.h b/src/cpu/kernels/lut/list.h
index 9749b91cfe..7a2afc6927 100644
--- a/src/cpu/kernels/lut/list.h
+++ b/src/cpu/kernels/lut/list.h
@@ -43,7 +43,7 @@ namespace cpu
uint8_t *const *output)
DECLARE_LUT_KERNEL(lut_u8_neon);
-DECLARE_LUT_KERNEL(lut_u8_sve);
+DECLARE_LUT_KERNEL(lut_u8_sve2);
#undef DECLARE_LUT_KERNEL
#endif // __aarch64__