diff options
Diffstat (limited to 'src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 382 |
1 files changed, 1 insertions, 381 deletions
diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 2d7c29d9a0..4a318f02c1 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,386 +51,6 @@ constexpr float COEFF1 = 0.0663f; constexpr float COEFF2 = 0.2447f; } // namespace -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -namespace fp16 -{ -inline float16x8_t inv(float16x8_t x) -{ - const float16x8_t estimate = vrecpeq_f16(x); - return vmulq_f16(estimate, vrecpsq_f16(x, estimate)); -} - -inline float16x8_t atan2_fast(float16x8_t gx, float16x8_t gy, float16x8_t scale) -{ - static const float16x8_t one = vdupq_n_f16(1.0f); - static const float16x8_t ninety = vdupq_n_f16(90.f * SCALE_FACTOR); - static const float16x8_t epsilon = vdupq_n_f16(1e-9f); - static const float16x8_t piover4 = vdupq_n_f16(PI_4); - static const float16x8_t coeff1 = vdupq_n_f16(COEFF1); - static const float16x8_t coeff2 = vdupq_n_f16(COEFF2); - - const float16x8_t abs_gx = vabsq_f16(gx); - const float16x8_t abs_gy = vabsq_f16(gy); - const float16x8_t tmin = vminq_f16(abs_gx, abs_gy); - const float16x8_t tmax = vmaxq_f16(abs_gx, abs_gy); - - // z = min(x, y) / max(x, y) - const float16x8_t z = vmulq_f16(tmin, inv(vaddq_f16(tmax, epsilon))); - const float16x8_t absz = vabsq_f16(z); - - // = x * [pi/4 + (1 - |x|) * (0.2447 + 0.0663 * |x|)] - float16x8_t arctan = vmulq_f16(z, vfmaq_f16(piover4, - vsubq_f16(one, absz), - vfmaq_f16(coeff2, coeff1, absz))); - - // Radians to degrees conversion with applied a scale factor in order to have the result [0, 255] - arctan = vmulq_f16(arctan, scale); - - /* If z > 1, result = 90 - result */ - return vbslq_f16(vcgeq_f16(abs_gx, abs_gy), arctan, vsubq_f16(ninety, arctan)); -} - -inline float16x8_t atan2_0_360(float16x8_t gx, float16x8_t gy) -{ - static const float16x8_t scale = vdupq_n_f16(SCALE_360); - static const float16x8_t threesixty = vdupq_n_f16(360.0f * SCALE_FACTOR); - static const float16x8_t zero = vdupq_n_f16(0.0f); - static const float16x8_t oneeighty = vdupq_n_f16(180.0f * SCALE_FACTOR); - - float16x8_t arctan = atan2_fast(gx, gy, scale); - - // Choose correct quadrant - arctan = vbslq_f16(vcltq_f16(gx, zero), vsubq_f16(oneeighty, arctan), arctan); - arctan = vbslq_f16(vcltq_f16(gy, zero), vsubq_f16(threesixty, arctan), arctan); - - return arctan; -} - -inline float16x8_t atan2_0_180(float16x8_t gx, float16x8_t gy) -{ - static const float16x8_t scale = vdupq_n_f16(SCALE_180); - static const float16x8_t threesixty = vdupq_n_f16(360.0f * SCALE_FACTOR); - static const float16x8_t oneeighty = vdupq_n_f16(180.0f * SCALE_FACTOR); - static const float16x8_t zero = vdupq_n_f16(0.0f); - - float16x8_t arctan = atan2_fast(gx, gy, scale); - - // Choose correct quadrant - arctan = vbslq_f16(vcltq_f16(gx, zero), vsubq_f16(oneeighty, arctan), arctan); - arctan = vbslq_f16(vcltq_f16(gy, zero), vsubq_f16(threesixty, arctan), arctan); - arctan = vbslq_f16(vcgtq_f16(arctan, oneeighty), vsubq_f16(arctan, oneeighty), arctan); - - return arctan; -} - -inline float32x4_t invsqrtv(float32x4_t x) -{ - float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); - - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), - sqrt_reciprocal); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), - sqrt_reciprocal); - - return sqrt_reciprocal; -} - -inline float32x4_t sqrtv(float32x4_t x) -{ - float32x4_t res = vdupq_n_f32(0.5f); - return vmlaq_f32(res, x, invsqrtv(x)); -} - -inline int16x8_t magnitude_l1(int16x8_t input1, int16x8_t input2) -{ - return vqaddq_s16(vqabsq_s16(input1), vqabsq_s16(input2)); -} - -inline int16x8_t magnitude_l2(int16x8_t input1, int16x8_t input2) -{ - const int32x4x2_t square_x = - { - vmull_s16(vget_low_s16(input1), vget_low_s16(input1)), - vmull_s16(vget_high_s16(input1), vget_high_s16(input1)) - }; - - const int32x4x2_t square_y = - { - vmull_s16(vget_low_s16(input2), vget_low_s16(input2)), - vmull_s16(vget_high_s16(input2), vget_high_s16(input2)) - }; - - const uint32x4x2_t sum = - { - vaddq_u32(vreinterpretq_u32_s32(square_x.val[0]), - vreinterpretq_u32_s32(square_y.val[0])), - vaddq_u32(vreinterpretq_u32_s32(square_x.val[1]), - vreinterpretq_u32_s32(square_y.val[1])) - }; - - const float32x4x2_t res = - { - sqrtv(vcvtq_f32_u32(sum.val[0])), - sqrtv(vcvtq_f32_u32(sum.val[1])) - }; - - return vcombine_s16(vqmovn_s32(vcvtq_s32_f32(res.val[0])), - vqmovn_s32(vcvtq_s32_f32(res.val[1]))); -} - -inline uint8x8_t phase_signed(int16x8_t input1, int16x8_t input2) -{ - static const float16x8_t zeropointfive = vdupq_n_f16(0.5f); - - const float16x8_t inputx_f16 = vcvtq_f16_s16(input1); - const float16x8_t inputy_f16 = vcvtq_f16_s16(input2); - - // Compute fast atan2 - const float16x8_t angle = atan2_0_360(inputx_f16, inputy_f16); - - return vqmovun_s16(vcvtq_s16_f16(vaddq_f16(angle, zeropointfive))); -} - -inline uint8x8_t phase_unsigned(int16x8_t input1, int16x8_t input2) -{ - static const float16x8_t zeropointfive = vdupq_n_f16(0.5f); - - const float16x8_t inputx_f16 = vcvtq_f16_s16(input1); - const float16x8_t inputy_f16 = vcvtq_f16_s16(input2); - - // Compute fast atan2 - const float16x8_t angle = atan2_0_180(inputx_f16, inputy_f16); - - return vqmovun_s16(vcvtq_s16_f16(vaddq_f16(angle, zeropointfive))); -} - -template <MagnitudeType mag_type> -inline int16x8x2_t compute_magnitude(const int16x8x2_t &in0, const int16x8x2_t &gx); - -template <> -inline int16x8x2_t compute_magnitude<MagnitudeType::L2NORM>(const int16x8x2_t &in0, const int16x8x2_t &gx) -{ - const int16x8x2_t mag = - { - magnitude_l2(in0.val[0], gx.val[0]), - magnitude_l2(in0.val[1], gx.val[1]) - }; - - return mag; -} - -template <> -inline int16x8x2_t compute_magnitude<MagnitudeType::L1NORM>(const int16x8x2_t &in0, const int16x8x2_t &gx) -{ - const int16x8x2_t mag = - { - magnitude_l1(in0.val[0], gx.val[0]), - magnitude_l1(in0.val[1], gx.val[1]) - }; - - return mag; -} - -template <PhaseType phase_type> -inline uint8x16_t compute_phase(const int16x8x2_t &in0, const int16x8x2_t &gx); - -template <> -inline uint8x16_t compute_phase<PhaseType::SIGNED>(const int16x8x2_t &in0, const int16x8x2_t &gx) -{ - return vcombine_u8(phase_signed(in0.val[0], gx.val[0]), - phase_signed(in0.val[1], gx.val[1])); -} - -template <> -inline uint8x16_t compute_phase<PhaseType::UNSIGNED>(const int16x8x2_t &in0, const int16x8x2_t &gx) -{ - return vcombine_u8(phase_unsigned(in0.val[0], gx.val[0]), - phase_unsigned(in0.val[1], gx.val[1])); -} -} // namespace fp16 - -template <MagnitudeType mag_type, PhaseType phase_type> -NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::NEMagnitudePhaseFP16Kernel() - : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) -{ -} - -template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase) -{ - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(gx, Format::S16); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(gy, Format::S16); - ARM_COMPUTE_ERROR_ON((nullptr == magnitude) && (nullptr == phase)); - - const bool run_mag = magnitude != nullptr; - const bool run_phase = phase != nullptr; - - if(run_mag) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(magnitude, Format::S16); - } - - if(run_phase) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(phase, Format::U8); - } - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - if(run_mag && run_phase) - { - /* Run magnitude and phase */ - _func = &NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::magnitude_phase; - } - else if(run_mag) - { - /* Run magnitude */ - _func = &NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::magnitude; - } - else if(run_phase) - { - /* Run phase */ - _func = &NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::phase; - } - else - { - ARM_COMPUTE_ERROR("At least one output must be NOT NULL"); - } - - const unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*gx->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal magnitude_access(magnitude == nullptr ? nullptr : magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal phase_access(phase == nullptr ? nullptr : phase->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(gx->info(), 0, num_elems_processed_per_iteration), - AccessWindowHorizontal(gy->info(), 0, num_elems_processed_per_iteration), - magnitude_access, - phase_access); - - ValidRegion valid_region = intersect_valid_regions(gx->info()->valid_region(), - gy->info()->valid_region()); - - magnitude_access.set_valid_region(win, valid_region); - phase_access.set_valid_region(win, valid_region); - - INEKernel::configure(win); -} - -template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::magnitude(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator magnitude(_magnitude, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t input1 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr()) + 8) - }; - - const int16x8x2_t input2 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr()) + 8) - }; - - // Compute and store magnitude - const int16x8x2_t mag = fp16::compute_magnitude<mag_type>(input1, input2); - - /* Store magnitude */ - vst1q_s16(reinterpret_cast<int16_t *>(magnitude.ptr()), mag.val[0]); - vst1q_s16(reinterpret_cast<int16_t *>(magnitude.ptr()) + 8, mag.val[1]); - }, - gx, gy, magnitude); -} - -template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::phase(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator phase(_phase, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t input1 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr()) + 8) - }; - - const int16x8x2_t input2 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr()) + 8) - }; - - // Compute and store phase - vst1q_u8(phase.ptr(), fp16::compute_phase<phase_type>(input1, input2)); - }, - gx, gy, phase); -} - -template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::magnitude_phase(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator magnitude(_magnitude, window); - Iterator phase(_phase, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t input1 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gx.ptr()) + 8) - }; - - const int16x8x2_t input2 = - { - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr())), - vld1q_s16(reinterpret_cast<int16_t *>(gy.ptr()) + 8) - }; - - // Compute and store magnitude - const int16x8x2_t mag = fp16::compute_magnitude<mag_type>(input1, input2); - - vst1q_s16(reinterpret_cast<int16_t *>(magnitude.ptr()), mag.val[0]); - vst1q_s16(reinterpret_cast<int16_t *>(magnitude.ptr()) + 8, mag.val[1]); - - // Compute and store phase - vst1q_u8(phase.ptr(), fp16::compute_phase<phase_type>(input1, input2)); - }, - gx, gy, magnitude, phase); -} - -template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, PhaseType::SIGNED>; -template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::SIGNED>; -template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, PhaseType::UNSIGNED>; -template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::UNSIGNED>; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - namespace { inline float32x4_t inv(float32x4_t x) |