aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp16
-rw-r--r--src/cpu/kernels/activation/generic/neon/qasymm8.cpp46
-rw-r--r--src/cpu/kernels/activation/list.h2
3 files changed, 45 insertions, 19 deletions
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 330de1ece2..9eaf44af51 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -45,11 +45,13 @@ namespace
{
static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels =
{
- { // neon LUT implementantion of HARD_SWISH takes precedence
- "neon_qu8_activation_hardswish_lut",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f == ActivationLayerInfo::ActivationFunction::HARD_SWISH; },
- REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_hardswish_lut)
+#ifdef __aarch64__
+ { // Neon LUT implementantion takes precedence
+ "neon_qu8_activation_lut",
+ [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt); },
+ REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation_lut)
},
+#endif // __aarch64__
{
"sve2_qu8_activation",
[](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; },
@@ -87,7 +89,7 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
},
{
"neon_qu8_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.f != ActivationLayerInfo::ActivationFunction::HARD_SWISH; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_activation)
},
{
@@ -188,10 +190,12 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
_run_method = uk->ukernel;
_name = std::string("CpuActivationKernel").append("/").append(uk->name);
- if(activation_info.activation() == ActivationLayerInfo::ActivationFunction::HARD_SWISH && src->data_type() == DataType::QASYMM8)
+#ifdef __aarch64__
+ if(ActivationLayerInfo::is_lut_supported(activation_info.activation(), src->data_type()))
{
activation_info.init_lut(src->quantization_info().uniform(),(dst)?dst->quantization_info().uniform():src->quantization_info().uniform());
}
+#endif // __aarch64__
_act_info = activation_info;
// Configure kernel window
diff --git a/src/cpu/kernels/activation/generic/neon/qasymm8.cpp b/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
index 29f5e6b376..5095ecf5bd 100644
--- a/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
+++ b/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
@@ -417,9 +417,9 @@ void substitute_bytes_neon(
#endif // __aarch64__
} // namespace
-void neon_qasymm8_hardswish_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void neon_qasymm8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
- ARM_COMPUTE_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationFunction::HARD_SWISH);
+ ARM_COMPUTE_ERROR_ON(!ActivationLayerInfo::is_lut_supported(act_info.activation(), src->info()->data_type()));
#ifdef __aarch64__
const int window_step_x = src->info()->tensor_shape().x();
Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
@@ -472,6 +472,13 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
const float a_f32 = act_info.a();
const float b_f32 = act_info.b();
+#ifndef __aarch64__
+ const auto const_6_f32 = vdupq_n_f32(6.f);
+ const auto const_0_f32 = vdupq_n_f32(0.f);
+ const auto const_3_f32 = vdupq_n_f32(3.f);
+ const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
+#endif // __aarch64__
+
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
float o = -qi_in.offset * s + qi_out.offset;
@@ -545,21 +552,28 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
// Re-quantize to new output space
tmp = vquantize(tmp_dep, qi_out);
}
- else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+#ifndef __aarch64__ // LUT-based implementation is used for aarch64 instead.
+ else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
{
+ // De-quantize
const auto vin_deq = vdequantize(vin, qi_in);
-
-#ifdef __aarch64__
- const uint32x4x4_t pos_mask =
+ // Perform activation
+ const float32x4x4_t tmp_dep =
{
{
- wrapper::vcgtz(vin_deq.val[0]),
- wrapper::vcgtz(vin_deq.val[1]),
- wrapper::vcgtz(vin_deq.val[2]),
- wrapper::vcgtz(vin_deq.val[3]),
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
}
};
-#else // __aarch64__
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+ {
+ const auto vin_deq = vdequantize(vin, qi_in);
+
const uint32x4x4_t pos_mask =
{
{
@@ -569,7 +583,6 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
wrapper::vcgt(vin_deq.val[3], vconst_0_f32),
}
};
-#endif // __aarch64__
const float32x4x4_t tmp_dep =
{
@@ -583,6 +596,7 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
tmp = vquantize(tmp_dep, qi_out);
}
+#endif // __aarch64__
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -622,12 +636,20 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+#ifndef __aarch64__ // LUT-based implementation is used for aarch64 instead.
+ else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
else if(act == ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
{
float tmp_f = dequantize_qasymm8(in, qi_in);
tmp_f = tmp_f > 0 ? tmp_f : tmp_f * a_f32;
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+#endif // __aarch64__
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h
index 7220d6cce1..b2322a6477 100644
--- a/src/cpu/kernels/activation/list.h
+++ b/src/cpu/kernels/activation/list.h
@@ -32,7 +32,7 @@ namespace cpu
void func_name(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation);
-DECLARE_ACTIVATION_KERNEL(neon_qasymm8_hardswish_lut);
+DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation_lut);
DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_activation);
DECLARE_ACTIVATION_KERNEL(neon_qasymm8_signed_activation);
DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_signed_activation);