diff options
-rw-r--r-- | arm_compute/core/FixedPoint.h | 11 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.h | 32 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.inl | 61 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEActivationLayerKernel.h | 8 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEActivationLayer.h | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 149 | ||||
-rw-r--r-- | src/core/Utils.cpp | 2 | ||||
-rw-r--r-- | tests/validation/Helpers.h | 2 | ||||
-rw-r--r-- | tests/validation/NEON/ActivationLayer.cpp | 38 | ||||
-rw-r--r-- | tests/validation/TensorOperations.h | 2 |
10 files changed, 244 insertions, 63 deletions
diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h index 5eb4c55c41..774125ec7d 100644 --- a/arm_compute/core/FixedPoint.h +++ b/arm_compute/core/FixedPoint.h @@ -251,7 +251,16 @@ qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position); * * @return The result of the 8 bit fixed point exponential. */ -qint8_t sexp_qs8(qint8_t a, int fixed_point_position); +qint8_t sqexp_qs8(qint8_t a, int fixed_point_position); + +/** 16 bit fixed point scalar exponential +* +* @param[in] a 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point exponential. +*/ +qint16_t sqexp_qs16(qint16_t a, int fixed_point_position); /** 16 bit fixed point scalar exponential * diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h index e3eb5d4638..e30509cd0a 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -176,6 +176,14 @@ void vst1q_qs8(qint8_t *addr, qint8x16_t b); */ void vst1q_qs16(qint16_t *addr, qint16x8_t b); +/** Store two 16 bit fixed point vector to memory (8x2 elements) +* +* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored +* @param[in] b 16 bit fixed point vectors to store +* +*/ +void vst2q_qs16(qint16_t *addr, qint16x8x2_t b); + /** 16 bit fixed point vector saturating narrow (8 elements) * * @param[in] a 16 bit fixed point vector to convert @@ -1122,7 +1130,7 @@ qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position); * * @return The calculated Hyperbolic Tangent. */ -qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); +qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position); /** Calculate hyperbolic tangent for fixed point 16 bit (4 elements) * @@ -1131,7 +1139,7 @@ qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); * * @return The calculated Hyperbolic Tangent. */ -qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position); +qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position); /** Calculate hyperbolic tangent for fixed point 8bit (16 elements) * @@ -1140,7 +1148,16 @@ qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position); * * @return The calculated Hyperbolic Tangent. */ -qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); +qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 16bit (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position); /** Calculate saturating n power for fixed point 8bit (16 elements). * @@ -1162,15 +1179,6 @@ qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); * @return The lane-by-lane maximum -> float32x4x2 */ float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b); - -/** Calculate hyperbolic tangent for fixed point 8bit (8 elements) - * - * @param[in] a 16 bit fixed point input vector - * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number - * - * @return The calculated Hyperbolic Tangent. - */ -qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position); } #include "arm_compute/core/NEON/NEFixedPoint.inl" #endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index 92af82cf71..b241dd5069 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -200,6 +200,11 @@ inline void vst1q_qs16(qint16_t *addr, qint16x8_t b) vst1q_s16(addr, b); } +inline void vst2q_qs16(qint16_t *addr, qint16x8x2_t b) +{ + vst2q_s16(addr, b); +} + inline qint8x8_t vqmovn_qs16(qint16x8_t a) { return vqmovn_s16(a); @@ -1641,15 +1646,15 @@ inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + qint8x8_t shift_value = vqneg_s8(vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 qint8x8_t temp = vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); temp = vbsl_s8(temp_ltz, vqadd_s8(temp, vdup_n_s8(1)), temp); - qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + qint8x8_t shift_value2 = vqneg_s8(vshr_n_s8(temp, 1)); - temp = vshl_s8(a, shift_value); + temp = vqshl_s8(a, shift_value); // Initial guess qint8x8_t x = temp; @@ -1660,7 +1665,7 @@ inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); - return vshl_s8(x, shift_value2); + return vqshl_s8(x, shift_value2); } inline qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position) @@ -1668,15 +1673,15 @@ inline qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position) const qint16x4_t const_three = vdup_n_s16(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint16x4_t shift_value = vneg_s16(vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)))); + qint16x4_t shift_value = vqneg_s16(vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 qint16x4_t temp = vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))); uint16x4_t temp_ltz = vclt_s16(temp, vdup_n_qs16(0)); temp = vbsl_s16(temp_ltz, vqadd_s16(temp, vdup_n_s16(1)), temp); - qint16x4_t shift_value2 = vneg_s16(vshr_n_s16(temp, 1)); + qint16x4_t shift_value2 = vqneg_s16(vshr_n_s16(temp, 1)); - temp = vshl_s16(a, shift_value); + temp = vqshl_s16(a, shift_value); // Initial guess qint16x4_t x = temp; @@ -1753,15 +1758,15 @@ inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 qint8x16_t temp = vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); temp = vbslq_s8(temp_ltz, vqaddq_s8(temp, vdupq_n_s8(1)), temp); - qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + qint8x16_t shift_value2 = vqnegq_s8(vshrq_n_s8(temp, 1)); - temp = vshlq_s8(a, shift_value); + temp = vqshlq_s8(a, shift_value); // Initial guess qint8x16_t x = temp; @@ -1780,13 +1785,13 @@ inline qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position) const qint16x8_t const_three = vdupq_n_s16(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint16x8_t shift_value = vnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); + qint16x8_t shift_value = vqnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 qint16x8_t temp = vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))); uint16x8_t temp_ltz = vcltq_s16(temp, vdupq_n_qs16(0)); temp = vbslq_s16(temp_ltz, vqaddq_s16(temp, vdupq_n_s16(1)), temp); - qint16x8_t shift_value2 = vnegq_s16(vshrq_n_s16(temp, 1)); + qint16x8_t shift_value2 = vqnegq_s16(vshrq_n_s16(temp, 1)); temp = vqshlq_s16(a, shift_value); @@ -1804,7 +1809,7 @@ inline qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position) return vqshlq_s16(x, shift_value2); } -inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) +inline qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position); @@ -1817,7 +1822,7 @@ inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) return tanh; } -inline qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position) +inline qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position) { const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position); const qint16x4_t const_two = vdup_n_s16(2 << fixed_point_position); @@ -1830,7 +1835,7 @@ inline qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position) return tanh; } -inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) +inline qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position); @@ -1843,6 +1848,19 @@ inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) return tanh; } +inline qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position); + + qint16x8_t exp2x = vqexpq_qs16(vqmulq_qs16(const_two, a, fixed_point_position), fixed_point_position); + qint16x8_t num = vqsubq_qs16(exp2x, const_one); + qint16x8_t den = vqaddq_qs16(exp2x, const_one); + qint16x8_t tanh = vqmulq_qs16(num, vqrecipq_qs16(den, fixed_point_position), fixed_point_position); + + return tanh; +} + inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) { return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); @@ -1859,17 +1877,4 @@ inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) }; return res; } - -inline qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position) -{ - const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); - const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position); - - qint16x8_t exp2x = vqexpq_qs16(vqmulq_qs16(const_two, a, fixed_point_position), fixed_point_position); - qint16x8_t num = vqsubq_qs16(exp2x, const_one); - qint16x8_t den = vqaddq_qs16(exp2x, const_one); - qint16x8_t tanh = vqmulq_qs16(num, vqrecipq_qs16(den, fixed_point_position), fixed_point_position); - - return tanh; -} } diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 539bca587a..e995f1e5e0 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -50,7 +50,7 @@ public: * @note If the output tensor is a nullptr, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QS8/F32. + * of the activation function. Data types supported: QS8/QS16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ @@ -78,6 +78,12 @@ private: */ template <ActivationLayerInfo::ActivationFunction F, typename T> typename std::enable_if<std::is_same<T, qint8_t>::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template <ActivationLayerInfo::ActivationFunction F, typename T> + typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type activation(const Window &window); private: ITensor *_input; diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index b1a211553d..f3cd305910 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -44,7 +44,7 @@ public: * @note If the output tensor is a nullptr, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QS8/F32. + * of the activation function. Data types supported: QS8/QS16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer parameters. */ diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 492d197925..f530413453 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel() void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); _input = input; _act_info = activation_info; @@ -78,7 +78,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> }, }; - // Activation functions : QS8 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 = { @@ -92,15 +91,31 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> }, }; + // Activation functions : QS16 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 = + { + { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> }, + { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> }, + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> }, + { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> }, + { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> }, + { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> }, + }; switch(input->info()->data_type()) { - case DataType::F32: - _func = act_map_f32[activation_info.activation()]; - break; case DataType::QS8: _func = act_map_qs8[activation_info.activation()]; break; + case DataType::QS16: + _func = act_map_qs16[activation_info.activation()]; + break; + case DataType::F32: + _func = act_map_f32[activation_info.activation()]; + break; default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -262,9 +277,9 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL template <ActivationLayerInfo::ActivationFunction F, typename T> typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) { - Iterator input(_input, window); - Iterator output(_output, window); - int fixed_point_position = _input->info()->fixed_point_position(); + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); static const qint8x16_t CONST_0 = vdupq_n_qs8(0); const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position)); @@ -291,7 +306,7 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation tmp = vqmlaq_qs8(b, a, in, fixed_point_position); break; case ActivationFunction::LOGISTIC: - tmp = vrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); + tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); break; case ActivationFunction::RELU: tmp = vmaxq_qs8(CONST_0, in); @@ -300,13 +315,13 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position); break; case ActivationFunction::SQRT: - tmp = vrecipq_qs8(vinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); + tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); break; case ActivationFunction::SQUARE: tmp = vqmulq_qs8(in, in, fixed_point_position); break; case ActivationFunction::TANH: - tmp = vtanhq_qs8(in, fixed_point_position); + tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position); break; default: break; @@ -317,6 +332,118 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation input, output); } +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); + + static const qint16x8_t CONST_0 = vdupq_n_qs16(0); + const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position)); + const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position)); + const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position)); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); + + const qint16x8x2_t in = vld2q_s16(input_ptr); + qint16x8x2_t tmp = { {} }; + + switch(F) + { + case ActivationFunction::ABS: + tmp = + { + { + vqabsq_qs16(in.val[0]), + vqabsq_qs16(in.val[1]), + } + }; + break; + case ActivationFunction::BOUNDED_RELU: + tmp = + { + { + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])), + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])), + } + }; + break; + case ActivationFunction::LINEAR: + tmp = + { + { + vqmlaq_qs16(b, a, in.val[0], fixed_point_position), + vqmlaq_qs16(b, a, in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::LOGISTIC: + tmp = + { + { + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position), + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::RELU: + tmp = + { + { + vmaxq_qs16(CONST_0, in.val[0]), + vmaxq_qs16(CONST_0, in.val[1]), + } + }; + break; + case ActivationFunction::SOFT_RELU: + tmp = + { + { + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position), + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::SQRT: + tmp = + { + { + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position), + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position), + } + }; + break; + case ActivationFunction::SQUARE: + tmp = + { + { + vqmulq_qs16(in.val[0], in.val[0], fixed_point_position), + vqmulq_qs16(in.val[1], in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::TANH: + tmp = + { + { + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position), + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position), + } + }; + break; + default: + break; + } + + vst2q_qs16(output_ptr, tmp); + }, + input, output); +} + void NEActivationLayerKernel::run(const Window &window) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index f6230c0199..11b41aa178 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -286,6 +286,7 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const case DataType::U16: print_consecutive_elements_impl<uint16_t>(s, reinterpret_cast<const uint16_t *>(ptr), n, stream_width, element_delim); break; + case DataType::QS16: case DataType::S16: print_consecutive_elements_impl<int16_t>(s, reinterpret_cast<const int16_t *>(ptr), n, stream_width, element_delim); break; @@ -316,6 +317,7 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp return max_consecutive_elements_display_width_impl<int8_t>(s, reinterpret_cast<const int8_t *>(ptr), n); case DataType::U16: return max_consecutive_elements_display_width_impl<uint16_t>(s, reinterpret_cast<const uint16_t *>(ptr), n); + case DataType::QS16: case DataType::S16: return max_consecutive_elements_display_width_impl<int16_t>(s, reinterpret_cast<const int16_t *>(ptr), n); case DataType::U32: diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index d92699d93e..a551da731e 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -90,7 +90,7 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation break; case ActivationLayerInfo::ActivationFunction::SQRT: // Reduce range as sqrt should take a non-negative number - bounds.first = (is_float) ? 0 : 1 << (fixed_point_position); + bounds.first = (is_float) ? 0 : 1; break; default: break; diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index 40be32278d..71dfcdc4e2 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -193,10 +193,11 @@ BOOST_DATA_TEST_CASE(Configuration, boost::unit_test::data::make({ false, true } BOOST_AUTO_TEST_SUITE(Float) BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) -BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * SmallShapes() * CNNFloatDataTypes() * ActivationFunctions(), in_place, shape, dt, act_function) +BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * SmallShapes() * CNNFloatDataTypes() * ActivationFunctions() * boost::unit_test::data::make({ 0.5f, 1.f }), + in_place, shape, dt, act_function, alpha_beta) { // Create activation layer info - ActivationLayerInfo act_info(act_function, 1.f, 1.f); + ActivationLayerInfo act_info(act_function, alpha_beta, alpha_beta); // Compute function Tensor dst = compute_activation_layer(in_place, shape, dt, act_info); @@ -209,10 +210,11 @@ BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * S } BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) -BOOST_DATA_TEST_CASE(RunLarge, boost::unit_test::data::make({ false, true }) * LargeShapes() * CNNFloatDataTypes() * ActivationFunctions(), in_place, shape, dt, act_function) +BOOST_DATA_TEST_CASE(RunLarge, boost::unit_test::data::make({ false, true }) * LargeShapes() * CNNFloatDataTypes() * ActivationFunctions() * boost::unit_test::data::make({ 0.5f, 1.f }), + in_place, shape, dt, act_function, alpha_beta) { // Create activation layer info - ActivationLayerInfo act_info(act_function, 1.f, 1.f); + ActivationLayerInfo act_info(act_function, alpha_beta, alpha_beta); // Compute function Tensor dst = compute_activation_layer(in_place, shape, dt, act_info); @@ -229,12 +231,13 @@ BOOST_AUTO_TEST_SUITE_END() * cause overflowing issues in most of the transcendentals functions. */ BOOST_AUTO_TEST_SUITE(Quantized) +BOOST_AUTO_TEST_SUITE(QS8) BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) -BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * SmallShapes() * ActivationFunctions() * boost::unit_test::data::xrange(3, 6, 1), - in_place, shape, act_function, fixed_point_position) +BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * SmallShapes() * ActivationFunctions() * boost::unit_test::data::xrange(3, 6, 1) * boost::unit_test::data::make({ 0.5f, 1.f }), + in_place, shape, act_function, fixed_point_position, alpha_beta) { // Create activation layer info - ActivationLayerInfo act_info(act_function, 1.f, 1.f); + ActivationLayerInfo act_info(act_function, alpha_beta, alpha_beta); // Compute function Tensor dst = compute_activation_layer(in_place, shape, DataType::QS8, act_info, fixed_point_position); @@ -247,6 +250,27 @@ BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * S } BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, boost::unit_test::data::make({ false, true }) * SmallShapes() * ActivationFunctions() * boost::unit_test::data::xrange(3, 6, 1) * boost::unit_test::data::make({ 0.5f, 1.f }), + in_place, shape, act_function, fixed_point_position, alpha_beta) +{ + // Create activation layer info + ActivationLayerInfo act_info(act_function, alpha_beta, alpha_beta); + + // Compute function + Tensor dst = compute_activation_layer(in_place, shape, DataType::QS16, act_info, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_activation_layer(shape, DataType::QS16, act_info, fixed_point_position); + + // Validate output + validate(NEAccessor(dst), ref_dst, activation_layer_tolerance(act_function, fixed_point_position)); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END() #endif /* DOXYGEN_SKIP_THIS */ diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h index 0d752ee6fc..adac70901d 100644 --- a/tests/validation/TensorOperations.h +++ b/tests/validation/TensorOperations.h @@ -930,7 +930,7 @@ void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo a out[i] = mul(x, x).raw(); break; case ActivationLayerInfo::ActivationFunction::TANH: - out[i] = tanh(x).raw(); + out[i] = mul(a, tanh(mul(b, x))).raw(); break; default: ARM_COMPUTE_ERROR("Activation function not recognised"); |