aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuActivationKernel.cpp
diff options
context:
space:
mode:
authorMurray Kornelsen <murray.kornelsen@mail.mcgill.ca>2022-07-13 21:22:39 -0400
committerPablo Marquez Tello <pablo.tello@arm.com>2022-09-14 09:15:03 +0000
commit926f502ca731fa49bcdf949408ce25728616e5f2 (patch)
tree7e221103a9c0c5c0e4c054abc07cbdf11c7c7b4e /src/cpu/kernels/CpuActivationKernel.cpp
parent6e09e1404c635d948cf20eb6b4b5747dfb6656f2 (diff)
downloadComputeLibrary-926f502ca731fa49bcdf949408ce25728616e5f2.tar.gz
Adding GELU activation
OpenCL implementation uses built in erf. NEON implementation requires new vectorized erf. Uses the following approximation: erf(x) = 1 - 1 / (1 + a1x + a2x^2 + a3x^3 + a4x^4)^4 a1 = 0.278393, a2 = 0.230389, a3 = 0.000972, a4 = 0.078108 From https://en.wikipedia.org/wiki/Error_function#Numerical_approximations Signed-off-by: Murray Kornelsen <murray.kornelsen@mail.mcgill.ca> Change-Id: I2d3964b2c26a4334166b17135f9104bc6324fad2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7921 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Pablo Marquez Tello <pablo.tello@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuActivationKernel.cpp')
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp18
1 files changed, 10 insertions, 8 deletions
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index ee9db99080..61efcb2dd6 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -46,7 +46,8 @@ namespace
static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels =
{
#ifdef __aarch64__
- { // Neon LUT implementantion takes precedence
+ {
+ // Neon LUT implementantion takes precedence
"neon_q8_activation_lut",
[](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt); },
REGISTER_Q8_NEON(arm_compute::cpu::neon_q8_activation_lut)
@@ -54,27 +55,27 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
#endif // __aarch64__
{
"sve2_qu8_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve2 && data.f != ActivationLayerInfo::ActivationFunction::GELU; },
REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_qasymm8_activation)
},
{
"sve2_qs8_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve2; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve2 && data.f != ActivationLayerInfo::ActivationFunction::GELU; },
REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::sve2_qasymm8_signed_activation)
},
{
"sve2_qs16_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16 && data.isa.sve2; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::QSYMM16 && data.isa.sve2 && data.f != ActivationLayerInfo::ActivationFunction::GELU; },
REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)
},
{
"sve_fp16_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16 && data.f != ActivationLayerInfo::ActivationFunction::GELU; },
REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation)
},
{
"sve_fp32_activation",
- [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
+ [](const ActivationDataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve && data.f != ActivationLayerInfo::ActivationFunction::GELU; },
REGISTER_FP32_SVE(arm_compute::cpu::sve_fp32_activation)
},
{
@@ -105,7 +106,7 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
};
/* Supported activation in the 8-bit integer domain */
-static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_activations =
+static const std::array<ActivationLayerInfo::ActivationFunction, 8> qasymm8_activations =
{
ActivationLayerInfo::ActivationFunction::RELU,
ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
@@ -114,6 +115,7 @@ static const std::array<ActivationLayerInfo::ActivationFunction, 7> qasymm8_acti
ActivationLayerInfo::ActivationFunction::TANH,
ActivationLayerInfo::ActivationFunction::HARD_SWISH,
ActivationLayerInfo::ActivationFunction::LEAKY_RELU,
+ ActivationLayerInfo::ActivationFunction::GELU,
};
/* Supported activation in the 16-bit integer domain */
static const std::array<ActivationLayerInfo::ActivationFunction, 4> qsymm16_activations =
@@ -193,7 +195,7 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
#ifdef __aarch64__
if(ActivationLayerInfo::is_lut_supported(activation_info.activation(), src->data_type()))
{
- activation_info.init_lut(src->data_type(), src->quantization_info().uniform(), (dst)?dst->quantization_info().uniform():src->quantization_info().uniform());
+ activation_info.init_lut(src->data_type(), src->quantization_info().uniform(), (dst) ? dst->quantization_info().uniform() : src->quantization_info().uniform());
}
#endif // __aarch64__
_act_info = activation_info;