diff options
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/FixedPoint.h | 4 | ||||
-rw-r--r-- | arm_compute/core/FixedPoint.inl | 8 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.h | 16 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.inl | 28 |
4 files changed, 28 insertions, 28 deletions
diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h index da304c6329..5eb4c55c41 100644 --- a/arm_compute/core/FixedPoint.h +++ b/arm_compute/core/FixedPoint.h @@ -296,7 +296,7 @@ float scvt_f32_qs8(qint8_t a, int fixed_point_position); * * @return The result of the conversion float -> 8 bit fixed point */ -qint8_t scvt_qs8_f32(float a, int fixed_point_position); +qint8_t sqcvt_qs8_f32(float a, int fixed_point_position); /** Convert a 16 bit fixed point to float * @@ -314,7 +314,7 @@ float scvt_f32_qs16(qint16_t a, int fixed_point_position); * * @return The result of the conversion float -> 16 bit fixed point */ -qint16_t scvt_qs16_f32(float a, int fixed_point_position); +qint16_t sqcvt_qs16_f32(float a, int fixed_point_position); /** Scalar saturating move and narrow. * diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl index fab91d6699..fdbc3f0c06 100644 --- a/arm_compute/core/FixedPoint.inl +++ b/arm_compute/core/FixedPoint.inl @@ -366,10 +366,10 @@ inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) return static_cast<float>(a) / (1 << fixed_point_position); } -inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) +inline qint8_t sqcvt_qs8_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast<qint8_t>(a * (1 << fixed_point_position) + 0.5f); + return saturate_convert<float, qint8_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5)); } inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) @@ -377,10 +377,10 @@ inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) return static_cast<float>(a) / (1 << fixed_point_position); } -inline qint16_t scvt_qs16_f32(float a, int fixed_point_position) +inline qint16_t sqcvt_qs16_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast<qint16_t>(a * (1 << fixed_point_position) + 0.5f); + return saturate_convert<float, qint16_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5)); } inline qint8_t sqmovn_qs16(qint16_t a) diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h index 660464eb62..e3eb5d4638 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -788,36 +788,36 @@ qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point * @param[in] a Float input vector * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number * - * @return The result of the conversion float -> 8 bit fixed point + * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow */ -qint8x8_t vcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position); +qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position); /** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements * * @param[in] a Float input vector * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number * - * @return The result of the conversion float -> 16 bit fixed point + * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow */ -qint16x4_t vcvt_qs16_f32(const float32x4_t a, int fixed_point_position); +qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position); /** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements * * @param[in] a Float input vector * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number * - * @return The result of the conversion float -> 8 bit fixed point + * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow */ -qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); +qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); /** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements * * @param[in] a Float input vector * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number * - * @return The result of the conversion float -> 16 bit fixed point + * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow */ -qint16x8_t vcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position); +qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position); /** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements * diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index 4f7f44ab3b..92af82cf71 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -236,7 +236,7 @@ inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position) vdupq_n_f32(a), } }; - return vcvtq_qs8_f32(res, fixed_point_position); + return vqcvtq_qs8_f32(res, fixed_point_position); } inline qint16x8_t vdupq_n_qs16(qint16_t a) @@ -809,15 +809,15 @@ inline qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixe return vqaddq_s32(a, tmp); } -inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) +inline qint8x8_t vqcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position)); float32x4x2_t res_f32 = { { - vdupq_n_f32(0.5f), - vdupq_n_f32(0.5f) + vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)), + vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)) } }; @@ -837,11 +837,11 @@ inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) return vqmovn_s16(res_s16); } -inline qint16x4_t vcvt_qs16_f32(const float32x4_t a, int fixed_point_position) +inline qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position)); - float32x4_t res_f32 = vdupq_n_f32(0.5f); + float32x4_t res_f32 = vbslq_f32(vcgeq_f32(a, vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)); res_f32 = vmlaq_f32(res_f32, a, pow2); @@ -850,17 +850,17 @@ inline qint16x4_t vcvt_qs16_f32(const float32x4_t a, int fixed_point_position) return vqmovn_s32(res_s32); } -inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) +inline qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position)); float32x4x4_t res_f32 = { { - vdupq_n_f32(0.5f), - vdupq_n_f32(0.5f), - vdupq_n_f32(0.5f), - vdupq_n_f32(0.5f) + vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)), + vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)), + vbslq_f32(vcgeq_f32(a.val[2], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)), + vbslq_f32(vcgeq_f32(a.val[3], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)) } }; @@ -890,15 +890,15 @@ inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1])); } -inline qint16x8_t vcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position) +inline qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast<float>(1 << fixed_point_position)); float32x4x2_t res_f32 = { { - vdupq_n_f32(0.5f), - vdupq_n_f32(0.5f) + vbslq_f32(vcgeq_f32(a.val[0], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)), + vbslq_f32(vcgeq_f32(a.val[1], vdupq_n_f32(0)), vdupq_n_f32(0.5f), vdupq_n_f32(-0.5f)) } }; |