aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-06-27 17:39:11 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commitccc65d44a53eaa61c718cbc4d826c811e2ccebda (patch)
treea5394c05c43ece384d629ceae644f5c7b5d23280 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent0745a980c6a5e2d294bcd09f3c704e6cf4fe316d (diff)
downloadComputeLibrary-ccc65d44a53eaa61c718cbc4d826c811e2ccebda.tar.gz
COMPMID-427: Port NEActivationLayer in 16bit fixed point.
Change-Id: Iebd61807f7b597c6bd990673bc7655c68ee16f4b Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79085 Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp149
1 files changed, 138 insertions, 11 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 492d197925..f530413453 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel()
void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
_input = input;
_act_info = activation_info;
@@ -78,7 +78,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
};
-
// Activation functions : QS8
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
{
@@ -92,15 +91,31 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
};
+ // Activation functions : QS16
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
+ {
+ { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
+ { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
+ { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
+ { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
+ { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
+ { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
+ { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
+ { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
+ { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
+ };
switch(input->info()->data_type())
{
- case DataType::F32:
- _func = act_map_f32[activation_info.activation()];
- break;
case DataType::QS8:
_func = act_map_qs8[activation_info.activation()];
break;
+ case DataType::QS16:
+ _func = act_map_qs16[activation_info.activation()];
+ break;
+ case DataType::F32:
+ _func = act_map_f32[activation_info.activation()];
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
}
@@ -262,9 +277,9 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
{
- Iterator input(_input, window);
- Iterator output(_output, window);
- int fixed_point_position = _input->info()->fixed_point_position();
+ Iterator input(_input, window);
+ Iterator output(_output, window);
+ const int fixed_point_position = _input->info()->fixed_point_position();
static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
@@ -291,7 +306,7 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation
tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
break;
case ActivationFunction::LOGISTIC:
- tmp = vrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
+ tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
break;
case ActivationFunction::RELU:
tmp = vmaxq_qs8(CONST_0, in);
@@ -300,13 +315,13 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation
tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
break;
case ActivationFunction::SQRT:
- tmp = vrecipq_qs8(vinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
+ tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
break;
case ActivationFunction::SQUARE:
tmp = vqmulq_qs8(in, in, fixed_point_position);
break;
case ActivationFunction::TANH:
- tmp = vtanhq_qs8(in, fixed_point_position);
+ tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
break;
default:
break;
@@ -317,6 +332,118 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation
input, output);
}
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
+{
+ Iterator input(_input, window);
+ Iterator output(_output, window);
+ const int fixed_point_position = _input->info()->fixed_point_position();
+
+ static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
+ const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
+ const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
+ const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+ const qint16x8x2_t in = vld2q_s16(input_ptr);
+ qint16x8x2_t tmp = { {} };
+
+ switch(F)
+ {
+ case ActivationFunction::ABS:
+ tmp =
+ {
+ {
+ vqabsq_qs16(in.val[0]),
+ vqabsq_qs16(in.val[1]),
+ }
+ };
+ break;
+ case ActivationFunction::BOUNDED_RELU:
+ tmp =
+ {
+ {
+ vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
+ vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
+ }
+ };
+ break;
+ case ActivationFunction::LINEAR:
+ tmp =
+ {
+ {
+ vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
+ vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
+ }
+ };
+ break;
+ case ActivationFunction::LOGISTIC:
+ tmp =
+ {
+ {
+ vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
+ vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
+ }
+ };
+ break;
+ case ActivationFunction::RELU:
+ tmp =
+ {
+ {
+ vmaxq_qs16(CONST_0, in.val[0]),
+ vmaxq_qs16(CONST_0, in.val[1]),
+ }
+ };
+ break;
+ case ActivationFunction::SOFT_RELU:
+ tmp =
+ {
+ {
+ vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
+ vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
+ }
+ };
+ break;
+ case ActivationFunction::SQRT:
+ tmp =
+ {
+ {
+ vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
+ vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
+ }
+ };
+ break;
+ case ActivationFunction::SQUARE:
+ tmp =
+ {
+ {
+ vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
+ vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
+ }
+ };
+ break;
+ case ActivationFunction::TANH:
+ tmp =
+ {
+ {
+ vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
+ vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
+ }
+ };
+ break;
+ default:
+ break;
+ }
+
+ vst2q_qs16(output_ptr, tmp);
+ },
+ input, output);
+}
+
void NEActivationLayerKernel::run(const Window &window)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);