diff options
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 39 |
1 files changed, 3 insertions, 36 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index f0ac695b20..d3cfc7a8fa 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -86,37 +86,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen namespace arm_compute { class Coordinates; - -/* Function used by the left-over for loop to perform the quantization */ -template <bool is_bounded_relu> -inline uint8_t finalize_quantization(int32x4_t in_s32, int result_fixedpoint_multiplier, int32_t result_shift, int32x4_t result_offset_after_shift_s32, uint8_t min_u8, uint8_t max_u8) -{ - const static int32x4_t zero_s32 = vdupq_n_s32(0); - const static int32x4_t sat_value_s32 = vdupq_n_s32(255); - - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_s32 = vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier); - - // Round to the nearest division by a power-of-two using result_shift_s32 - in_s32 = rounding_divide_by_pow2(in_s32, result_shift); - - // Add the offset terms - in_s32 = vaddq_s32(in_s32, result_offset_after_shift_s32); - - // Saturate negative values - in_s32 = vmaxq_s32(in_s32, zero_s32); - in_s32 = vminq_s32(in_s32, sat_value_s32); - - auto out_u8 = static_cast<uint8_t>(vgetq_lane_s32(in_s32, 0)); - - if(is_bounded_relu) - { - out_u8 = std::max(out_u8, min_u8); - out_u8 = std::min(out_u8, max_u8); - } - - return out_u8; -} } // namespace arm_compute template <bool is_bounded_relu> @@ -188,10 +157,8 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window // Add bias in_value += bias_value; - // Finalize and store the result - *(out.ptr() + x) = finalize_quantization<is_bounded_relu>(vdupq_n_s32(in_value), _result_fixedpoint_multiplier, _result_shift, result_offset_after_shift_s32, static_cast<uint8_t>(_min), - static_cast<uint8_t>(_max)); + *(out.ptr() + x) = finalize_quantization<is_bounded_relu>(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, static_cast<uint8_t>(_min), static_cast<uint8_t>(_max)); } }, in, out, bias); @@ -220,10 +187,10 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window // Compute left-over elements for(; x < window_end_x; ++x) { - const int32x4_t in_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x); + const int32_t in_value = *(reinterpret_cast<const int32_t *>(in.ptr()) + x); // Finalize and store the result - *(out.ptr() + x) = finalize_quantization<is_bounded_relu>(in_s32, _result_fixedpoint_multiplier, _result_shift, result_offset_after_shift_s32, static_cast<uint8_t>(_min), static_cast<uint8_t>(_max)); + *(out.ptr() + x) = finalize_quantization<is_bounded_relu>(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, static_cast<uint8_t>(_min), static_cast<uint8_t>(_max)); } }, in, out); |