aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-06-04 17:31:46 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-06-05 14:14:48 +0000
commit4b3fba1850fdf84ba3f9a0c98acf3de672330b34 (patch)
tree1b65639ec7387c474903583ff0927918c8c7d837 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parentc625acd2a60a4fe34633c5cecef85c230933f772 (diff)
downloadComputeLibrary-4b3fba1850fdf84ba3f9a0c98acf3de672330b34.tar.gz
COMPMID-2372: Add support for QASYMM8 for Tanh
-Perform calculations in the floating point domain -Extends checks for Logistic as scale should be 1/256 and offset 0 Change-Id: I90ef4a042f053976936f5d28f8e09b54eec196a2 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1287 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp57
1 files changed, 49 insertions, 8 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 3f71553926..64342512a0 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -39,15 +39,33 @@
#include <array>
#include <cmath>
#include <map>
+#include <set>
using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::F16, DataType::F32);
+ static std::set<ActivationLayerInfo::ActivationFunction> qs8_supported_activations =
+ {
+ ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH
+ };
+ const DataType data_type = input->data_type();
+ const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
+ const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qs8_supported_activations.count(f_act) == 0),
+ "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
{
@@ -96,12 +114,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
_output = output;
}
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
-
- ARM_COMPUTE_ERROR_ON_MSG((input->info()->data_type() == DataType::QASYMM8) && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU)
- && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
- && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC),
- "For QASYMM8 only logistic, relu and lower/upper bounded relu are supported");
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
// Activation functions : FP32
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
@@ -146,6 +159,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t> },
{ ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
+ { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
{ ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
};
@@ -328,6 +342,10 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
// Initialise scale/offset for re-quantization
float s = qi_in.scale / qi_out.scale;
@@ -385,6 +403,23 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
// Re-quantize to new output space
tmp = vquantize(tmp_dep, qi_out);
}
+ else if(act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -418,6 +453,12 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
tmp_f = 1.f / (1.f + std::exp(-tmp_f));
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+ else if(act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
else
{
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -431,7 +472,7 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat
Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
return Status{};