aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuSoftmaxKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/CpuSoftmaxKernel.cpp')
-rw-r--r--src/cpu/kernels/CpuSoftmaxKernel.cpp60
1 files changed, 56 insertions, 4 deletions
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.cpp b/src/cpu/kernels/CpuSoftmaxKernel.cpp
index 54ff858eeb..b7e395fb79 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.cpp
+++ b/src/cpu/kernels/CpuSoftmaxKernel.cpp
@@ -48,18 +48,41 @@ namespace kernels
{
namespace
{
+
/* Softmax */
static const std::vector<typename CpuSoftmaxKernel::SoftmaxKernel> available_kernels = {
+ {"sme2_fp32_softmax",
+ [](const SoftmaxKernelDataTypeISASelectorData &data)
+ { return (!data.is_log && data.dt == DataType::F32 && data.isa.sme2 && data.axis == 0); },
+ REGISTER_FP32_SME2(sme2_fp32_softmax)},
{"neon_fp32_softmax",
[](const SoftmaxKernelDataTypeISASelectorData &data) { return (!data.is_log && data.dt == DataType::F32); },
REGISTER_FP32_NEON(neon_fp32_softmax<false>)},
+ {"sme2_fp16_softmax",
+ [](const SoftmaxKernelDataTypeISASelectorData &data)
+ { return (!data.is_log && data.dt == DataType::F16 && data.isa.sme2 && data.axis == 0); },
+ REGISTER_FP16_SME2(sme2_fp16_softmax)},
{"neon_fp16_softmax",
[](const SoftmaxKernelDataTypeISASelectorData &data)
{ return (!data.is_log && data.dt == DataType::F16) && data.isa.fp16; },
REGISTER_FP16_NEON(neon_fp16_softmax<false>)},
+ {"sme2_qu8_softmax_lut_512VL",
+ [](const SoftmaxKernelDataTypeISASelectorData &data)
+ {
+ return (!data.is_log && data.dt == DataType::QASYMM8 && data.isa.sme2 && data.axis == 0 &&
+ data.sme2_vector_length == 512);
+ },
+ REGISTER_QASYMM8_SME2(sme2_qasymm8_softmax_lut_512VL)},
{"neon_qu8_softmax",
[](const SoftmaxKernelDataTypeISASelectorData &data) { return (!data.is_log && data.dt == DataType::QASYMM8); },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_softmax<false>)},
+ {"sme2_qs8_softmax_lut_512VL",
+ [](const SoftmaxKernelDataTypeISASelectorData &data)
+ {
+ return (!data.is_log && data.dt == DataType::QASYMM8_SIGNED && data.isa.sme2 && data.axis == 0 &&
+ data.sme2_vector_length == 512);
+ },
+ REGISTER_QASYMM8_SIGNED_SME2(sme2_qasymm8_signed_softmax_lut_512VL)},
{"neon_qs8_softmax",
[](const SoftmaxKernelDataTypeISASelectorData &data)
{ return (!data.is_log && data.dt == DataType::QASYMM8_SIGNED); },
@@ -80,6 +103,28 @@ static const std::vector<typename CpuSoftmaxKernel::SoftmaxKernel> available_ker
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qasymm8_signed_softmax<true>)},
};
+void init_lut(std::vector<float> &lut, DataType type, float scale, float beta)
+{
+ if (type == DataType::QASYMM8)
+ {
+ for (int i = 0; i < 256; ++i)
+ {
+ lut.push_back(std::exp(-scale * beta * i));
+ }
+ }
+ else if (type == DataType::QASYMM8_SIGNED)
+ {
+ for (int i = -128; i < 128; ++i)
+ {
+ lut.push_back(std::exp(-scale * beta * i));
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Invalid datatype for QASYMM8/QASYMM8_SIGNED softmax");
+ }
+}
+
Status validate_arguments_softmax(
const ITensorInfo &src, const ITensorInfo &dst, float beta, int axis, const ITensorInfo &tmp, bool is_log)
{
@@ -149,8 +194,8 @@ void CpuSoftmaxKernel::configure(
auto_init_if_empty(*tmp, TensorInfo(*src).set_data_type(DataType::F32).reset_padding());
}
- const auto *uk = CpuSoftmaxKernel::get_implementation(
- SoftmaxKernelDataTypeISASelectorData{src->data_type(), CPUInfo::get().get_isa(), is_log});
+ const auto *uk = CpuSoftmaxKernel::get_implementation(SoftmaxKernelDataTypeISASelectorData{
+ src->data_type(), CPUInfo::get().get_isa(), is_log, axis, CPUInfo::get().get_sme2_vector_length()});
ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
std::string kernel_name = is_log ? std::string("CpuLogSoftmaxKernel") : std::string("CpuSoftmaxKernel");
@@ -186,6 +231,13 @@ void CpuSoftmaxKernel::configure(
win.set(_axis, Window::Dimension(0, 1, 1));
ICpuKernel<CpuSoftmaxKernel>::configure(win);
+
+ const std::string uk_name = uk->name;
+ if (uk_name == "sme2_qu8_softmax_lut_512VL" || uk_name == "sme2_qs8_softmax_lut_512VL")
+ {
+ const float scale = src->quantization_info().uniform().scale;
+ init_lut(_lut, src->data_type(), scale, beta);
+ }
}
Status CpuSoftmaxKernel::validate(
@@ -222,11 +274,11 @@ void CpuSoftmaxKernel::run_op(ITensorPack &tensors, const Window &window, const
const unsigned int tmp_size_for_thread = tmp->info()->element_size() * num_elems_processed_per_iteration;
void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread);
- _run_method(src, tmp_for_thread, dst, _beta, _axis, window);
+ _run_method(src, tmp_for_thread, dst, _beta, _axis, window, _lut.data());
}
else
{
- _run_method(src, nullptr, dst, _beta, _axis, window);
+ _run_method(src, nullptr, dst, _beta, _axis, window, nullptr);
}
}