/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef __ARM_COMPUTE_NESYMM_H__ #define __ARM_COMPUTE_NESYMM_H__ #include "NEAsymm.h" #include namespace arm_compute { /** Performs final quantization step on 8 signed 16-bit elements * * @tparam is_bounded_relu Specified if a fused bounded relu should be applied * * @param[in] in_s32 Input to be quantized. * @param[in] result_fixedpoint_multiplier Result multiplier parameter * @param[in] result_shift Result shift parameter * @param[in] min_s16 Relu lower bound * @param[in] max_s16 Relu upper bound * * @return Quantized values */ template int16x8_t finalize_quantization_int16(int32x4x2_t &in_s32, int result_fixedpoint_multiplier, int32_t result_shift, int16x8_t min_s16, int16x8_t max_s16) { // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); // Round to the nearest division by a power-of-two using result_shift_s32 in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift); in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift); // Convert S32 to S16 int16x8_t out_s16 = vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])); if(is_bounded_relu) { out_s16 = vmaxq_s16(out_s16, min_s16); out_s16 = vminq_s16(out_s16, max_s16); } return out_s16; } /** Performs final quantization step on single signed 16-bit element * * @tparam is_bounded_relu Specified if a fused bounded relu should be applied * * @param[in] in_value Input to be quantized. * @param[in] result_fixedpoint_multiplier Result multiplier parameter * @param[in] result_shift Result shift parameter * @param[in] min_s16 Relu lower bound * @param[in] max_s16 Relu upper bound * * @return Quantized values */ template inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoint_multiplier, int32_t result_shift, int16_t min_s16, int16_t max_s16) { int32x4_t in_s32 = vdupq_n_s32(in_value); // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0); // Shift value by result_shift_s32 in_value = rounding_divide_by_pow2(in_value, result_shift); // Bound the result int16_t out_s16 = static_cast(std::max(-32768, std::min(32767, in_value))); if(is_bounded_relu) { out_s16 = static_cast(std::max(min_s16, std::min(max_s16, out_s16))); } return out_s16; } } // namespace arm_compute #endif // __ARM_COMPUTE_NESYMM_H__