aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2024-05-21 15:39:54 +0100
committerGunes Bayir <gunes.bayir@arm.com>2024-05-22 13:09:26 +0000
commitab538a211d1e8a3504512ceb6a778b3a0fc058fc (patch)
tree8ba432f59db5af1e630c0c801641eae737d876c1 /src
parent4c4b48fc5b88876a24fa04192b6fd9cdd660737e (diff)
downloadComputeLibrary-ab538a211d1e8a3504512ceb6a778b3a0fc058fc.tar.gz
Use lookup table for Fp16 Tanh activation in hardware with SVE
Resolves: COMPMID-6901 Change-Id: Idcd3f5f5d90f4073aaf116c0586e46013fbd64f7 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11605 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/helpers/LUTManager.cpp27
-rw-r--r--src/core/helpers/LUTManager.h18
-rw-r--r--src/cpu/kernels/CpuActivationKernel.cpp18
3 files changed, 46 insertions, 17 deletions
diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp
index 06e35eed8c..2effffbe92 100644
--- a/src/core/helpers/LUTManager.cpp
+++ b/src/core/helpers/LUTManager.cpp
@@ -30,17 +30,38 @@ namespace arm_compute
namespace
{
-void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut)
+float16_t activation(float16_t x, const LUTInfo &info)
+{
+ float16_t out = 0.f;
+ switch (info.act)
+ {
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ out = 1.f / (1.f + std::exp(-x));
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ {
+ out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table");
+ break;
+ }
+ return out;
+}
+
+void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info)
{
union Element
{
uint16_t i = 0;
float16_t fp;
} item;
+
// Fill lut by iterating over all 16 bit values using the union.
while (true)
{
- (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp));
+ (*lut)[item.i] = activation(item.fp, info);
if (item.i == 65535)
break;
item.i++;
@@ -62,7 +83,7 @@ std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table
// Not found, or pointer not valid
// We do not use make_shared to prevent the weak_ptr keeping the control block alive
std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536);
- init_lut_fp16(ptr.get());
+ init_lut_fp16(ptr.get(), info);
map_fp16[info] = ptr;
return ptr;
}
diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h
index 4e13ead7e3..f3f4bf2832 100644
--- a/src/core/helpers/LUTManager.h
+++ b/src/core/helpers/LUTManager.h
@@ -38,19 +38,23 @@ namespace arm_compute
struct LUTInfo
{
ActivationLayerInfo::ActivationFunction act;
+ float alpha;
+ float beta;
DataType dt;
- QuantizationInfo qinfo;
+ UniformQuantizationInfo qinfo;
+
// Operators enable use of map with Lutinfo as key
friend bool operator<(const LUTInfo &l, const LUTInfo &r)
{
- return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) ||
- ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) ||
- ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) &&
- (l.qinfo.offset() < l.qinfo.offset()));
+ const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset);
+ const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset);
+
+ return l_tup < r_tup;
}
- bool operator==(const LUTInfo &l)
+ bool operator==(const LUTInfo &l) const
{
- return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo;
+ return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt &&
+ this->qinfo == l.qinfo;
}
};
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 7cfa39b286..4253027231 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -43,6 +43,13 @@ namespace kernels
{
namespace
{
+
+bool is_fp16_lut_supported(ActivationLayerInfo::ActivationFunction func)
+{
+ return func == ActivationLayerInfo::ActivationFunction::LOGISTIC ||
+ func == ActivationLayerInfo::ActivationFunction::TANH;
+}
+
static const std::vector<CpuActivationKernel::ActivationKernel> available_kernels = {
#ifdef ARM_COMPUTE_ENABLE_SVE
{"sve2_q8_activation_lut",
@@ -85,10 +92,7 @@ static const std::vector<CpuActivationKernel::ActivationKernel> available_kernel
REGISTER_QSYMM16_SVE2(arm_compute::cpu::sve2_qsymm16_activation)},
{"sve_fp16_activation_lut",
[](const ActivationDataTypeISASelectorData &data)
- {
- return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve &&
- data.f == ActivationLayerInfo::ActivationFunction::LOGISTIC;
- },
+ { return data.dt == DataType::F16 && data.isa.fp16 && data.isa.sve && is_fp16_lut_supported(data.f); },
REGISTER_FP16_SVE(arm_compute::cpu::sve_fp16_activation_lut)},
{"sve_fp16_activation",
[](const ActivationDataTypeISASelectorData &data)
@@ -299,10 +303,10 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
activation_info.setLookupTable256(tmp_lut);
}
- if (src->data_type() == DataType::F16 &&
- activation_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ if (std::string(uk->name) == "sve_fp16_activation_lut")
{
- const LUTInfo info = {activation_info.activation(), src->data_type(), src->quantization_info()};
+ const LUTInfo info = {activation_info.activation(), activation_info.a(), activation_info.b(), src->data_type(),
+ src->quantization_info().uniform()};
activation_info.setLookupTable65536((lut_manager.get_lut_table(info)));
}
#endif // __aarch64__