diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CPP/CPPTypes.cpp | 14 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp | 6 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.cpp | 27 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.h | 18 |
8 files changed, 62 insertions, 27 deletions
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index f6761f27b0..ee39210fa5 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -140,10 +140,20 @@ unsigned int CPUInfo::get_L2_cache_size() const unsigned long CPUInfo::get_sme2_vector_length() const { #ifdef ARM_COMPUTE_ENABLE_SME2 - return arm_gemm::utils::sme::get_vector_length<int8_t>(); + if (this->has_sme2()) + return arm_gemm::utils::sme::get_vector_length<int8_t>(); + else + return 0; #else // ARM_COMPUTE_ENABLE_SME2 return 0; #endif // ARM_COMPUTE_ENABLE_SME2 } - +unsigned int CPUInfo::get_cpu_num_excluding_little() const +{ +#if defined(__ANDROID__) + return _impl->info.not_little_num_cpus(); +#else /* defined(__ANDROID__) */ + return get_cpu_num(); +#endif /* defined(__ANDROID__) */ +} } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 717fd11485..153c36052a 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,11 +78,11 @@ static const BatchNormalizationKernel available_kernels[] = { REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)}, #endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #if defined(ARM_COMPUTE_ENABLE_NEON) -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::fp16_neon_batch_normalization)}, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ {"neon_fp32_batch_normalization", [](const BatchNormalizationSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)}, diff --git a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp index cb869838e2..694def1a3a 100644 --- a/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp +++ b/src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,11 +63,11 @@ static const BoundingBoxTransformKernel available_kernels[] = { {"fp32_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_boundingboxtransform)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_boundingboxtransform)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 #if defined(ARM_COMPUTE_ENABLE_NEON) {"qu16_neon_boundingboxtransform", [](const BoundingBoxTransformSelectorData &data) { return data.dt == DataType::QASYMM16; }, diff --git a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp index 549319e49f..e23e3d020f 100644 --- a/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp +++ b/src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,10 +61,10 @@ static const ComputeAllAnchorsKernel available_kernels[] = { {"neon_qu16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::neon_qu16_computeallanchors)}, #endif //defined(ARM_COMPUTE_ENABLE_NEON) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"neon_fp16_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_computeallanchors)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"neon_fp32_computeallanchors", [](const ComputeAllAnchorsData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_computeallanchors)}, }; diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp index 0a1780f6ee..5883731088 100644 --- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -70,10 +70,10 @@ struct InstanceNormKernel static const InstanceNormKernel available_kernels[] = { {"fp32_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_instancenorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_instancenorm", [](const InstanceNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_instancenorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 }; /** Micro-kernel selector diff --git a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp index 451031d696..cfe4ac9a4c 100644 --- a/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,10 +60,10 @@ struct MeanStdDevNormKernel static const std::vector<MeanStdDevNormKernel> available_kernels = { {"fp32_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_meanstddevnorm)}, -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef ARM_COMPUTE_ENABLE_FP16 {"fp16_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::F16; }, REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_meanstddevnorm)}, -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // ARM_COMPUTE_ENABLE_FP16 {"qasymm8_neon_meanstddevnorm", [](const MeanStdDevNormSelectorData &data) { return data.dt == DataType::QASYMM8; }, REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qasymm8_meanstddevnorm)}, }; diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp index 06e35eed8c..2effffbe92 100644 --- a/src/core/helpers/LUTManager.cpp +++ b/src/core/helpers/LUTManager.cpp @@ -30,17 +30,38 @@ namespace arm_compute namespace { -void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut) +float16_t activation(float16_t x, const LUTInfo &info) +{ + float16_t out = 0.f; + switch (info.act) + { + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + out = 1.f / (1.f + std::exp(-x)); + break; + case ActivationLayerInfo::ActivationFunction::TANH: + { + out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x)); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table"); + break; + } + return out; +} + +void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info) { union Element { uint16_t i = 0; float16_t fp; } item; + // Fill lut by iterating over all 16 bit values using the union. while (true) { - (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp)); + (*lut)[item.i] = activation(item.fp, info); if (item.i == 65535) break; item.i++; @@ -62,7 +83,7 @@ std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table // Not found, or pointer not valid // We do not use make_shared to prevent the weak_ptr keeping the control block alive std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536); - init_lut_fp16(ptr.get()); + init_lut_fp16(ptr.get(), info); map_fp16[info] = ptr; return ptr; } diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h index 4e13ead7e3..f3f4bf2832 100644 --- a/src/core/helpers/LUTManager.h +++ b/src/core/helpers/LUTManager.h @@ -38,19 +38,23 @@ namespace arm_compute struct LUTInfo { ActivationLayerInfo::ActivationFunction act; + float alpha; + float beta; DataType dt; - QuantizationInfo qinfo; + UniformQuantizationInfo qinfo; + // Operators enable use of map with Lutinfo as key friend bool operator<(const LUTInfo &l, const LUTInfo &r) { - return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) || - ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) || - ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) && - (l.qinfo.offset() < l.qinfo.offset())); + const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset); + const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset); + + return l_tup < r_tup; } - bool operator==(const LUTInfo &l) + bool operator==(const LUTInfo &l) const { - return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo; + return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt && + this->qinfo == l.qinfo; } }; |