diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-01-12 16:29:45 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:43:42 +0000 |
commit | f72f9367d1eddee91f15a64952b99ee6b80b821d (patch) | |
tree | 0d3296219ca7919c263b3701ab22b5468df86354 /src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | |
parent | a026e981c607272181292b044c91f73a27d2bcd9 (diff) | |
download | ComputeLibrary-f72f9367d1eddee91f15a64952b99ee6b80b821d.tar.gz |
COMPMID-791: Adds support of QASYMM8 in NEDepthwiseConvolution3x3
Change-Id: I1a9ed6c3420ddf8978aeaad48d9915333b006b49
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116374
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 59 |
1 files changed, 4 insertions, 55 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index 8b3f2383ab..5e14e1a95d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -96,57 +96,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } +} // namespace -template <bool is_bounded_relu> -inline uint8x16_t finalize_quantization(int32x4x4_t &in_s32, int result_fixedpoint_multiplier, int32_t result_shift, int32x4_t result_offset_after_shift_s32, uint8x16_t min_u8, - uint8x16_t max_u8) +namespace arm_compute { - const static int32x4_t zero_s32 = vdupq_n_s32(0); - - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier); - in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier); - - // Round to the nearest division by a power-of-two using result_shift_s32 - in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift); - in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift); - in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift); - in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift); - - // Add the offset terms - in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32); - in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32); - in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32); - in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32); - - // Saturate negative values - in_s32.val[0] = vmaxq_s32(in_s32.val[0], zero_s32); - in_s32.val[1] = vmaxq_s32(in_s32.val[1], zero_s32); - in_s32.val[2] = vmaxq_s32(in_s32.val[2], zero_s32); - in_s32.val[3] = vmaxq_s32(in_s32.val[3], zero_s32); - - // Convert S32 to S16 - const int16x8x2_t in_s16 = - { - { - vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])), - vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3])) - } - }; - - // Convert S16 to U8 - uint8x16_t out_u8 = vcombine_u8(vqmovun_s16(in_s16.val[0]), vqmovun_s16(in_s16.val[1])); - - if(is_bounded_relu) - { - out_u8 = vmaxq_u8(out_u8, min_u8); - out_u8 = vminq_u8(out_u8, max_u8); - } - - return out_u8; -} +class Coordinates; /* Function used by the left-over for loop to perform the quantization */ template <bool is_bounded_relu> @@ -178,11 +132,6 @@ inline uint8_t finalize_quantization(int32x4_t in_s32, int result_fixedpoint_mul return out_u8; } -} // namespace - -namespace arm_compute -{ -class Coordinates; } // namespace arm_compute template <bool is_bounded_relu> |