COMPMID-1548: NEON FP16 mismatches on CannyEdge and HarrisCorners.

Removes FP16 from HarrisCorners and CannyEdge. Change-Id: I5e4f9205fdbe4de85f04f55ecf1568c837e56cc0 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146247 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-08-30 16:02:11 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 09d3451b4caf8d5e0e7cf2c6097e50a1f815d027 (patch)
tree: 8e9b8e9882d0120c85ff08473dbf3721b7602677 /src/core/NEON/kernels/NECannyEdgeKernel.cpp
parent: b57be0da77370e5e71fe82dfa281f528279e8127 (diff)
download: ComputeLibrary-09d3451b4caf8d5e0e7cf2c6097e50a1f815d027.tar.gz
1 files changed, 0 insertions, 738 deletions
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index dc37452415..fa51a7bb0b 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -51,744 +51,6 @@ constexpr int EDGE    = 255;
 constexpr int MAYBE   = 127;
 } // namespace
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-namespace fp16
-{
-inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
-{
-    // Constant use for evaluating score1 and score3
-    static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f);
-    static const float32x4_t zero    = vdupq_n_f32(0.0f);
-    static const float32x4_t one     = vdupq_n_f32(1.0f);
-    static const float32x4_t two     = vdupq_n_f32(2.0f);
-    static const float32x4_t three   = vdupq_n_f32(3.0f);
-
-    // Score0: (1, 0)
-    const float32x4x2_t score0 =
-    {
-        vabsq_f32(gx.val[0]),
-        vabsq_f32(gx.val[1])
-    };
-
-    // Score2: ( 0, 1 )
-    const float32x4x2_t score2 =
-    {
-        vabsq_f32(gy.val[0]),
-        vabsq_f32(gy.val[1])
-    };
-
-    // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 )
-    float32x4x2_t score1 =
-    {
-        vmulq_f32(gy.val[0], const45),
-        vmulq_f32(gy.val[1], const45)
-    };
-
-    float32x4x2_t score3 = score1;
-
-    score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45);
-    score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45);
-    score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45);
-    score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45);
-
-    score1.val[0] = vabsq_f32(score1.val[0]);
-    score1.val[1] = vabsq_f32(score1.val[1]);
-    score3.val[0] = vabsq_f32(score3.val[0]);
-    score3.val[1] = vabsq_f32(score3.val[1]);
-
-    float32x4x2_t phase =
-    {
-        zero,
-        zero
-    };
-
-    float32x4x2_t old_score = score0;
-
-    // score1 > old_score?
-    uint32x4x2_t mask =
-    {
-        vcgtq_f32(score1.val[0], old_score.val[0]),
-        vcgtq_f32(score1.val[1], old_score.val[1])
-    };
-
-    phase.val[0]     = vbslq_f32(mask.val[0], one, phase.val[0]);
-    phase.val[1]     = vbslq_f32(mask.val[1], one, phase.val[1]);
-    old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]);
-    old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]);
-
-    // score2 > old_score?
-    mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]);
-    mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]);
-
-    phase.val[0]     = vbslq_f32(mask.val[0], two, phase.val[0]);
-    phase.val[1]     = vbslq_f32(mask.val[1], two, phase.val[1]);
-    old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]);
-    old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]);
-
-    // score3 > old_score?
-    mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]);
-    mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]);
-
-    phase.val[0]     = vbslq_f32(mask.val[0], three, phase.val[0]);
-    phase.val[1]     = vbslq_f32(mask.val[1], three, phase.val[1]);
-    old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]);
-    old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]);
-
-    // Convert from float32x4_t to uint8x8_t
-    return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])),
-                                  vmovn_u32(vcvtq_u32_f32(phase.val[1]))));
-}
-
-inline uint8x8_t phase_quantization(float16x8_t gx, float16x8_t gy)
-{
-    // Constant use for evaluating score1 and score3
-    static const float16x8_t const45 = vdupq_n_f16(0.70710678118655f);
-    static const float16x8_t zero    = vdupq_n_f16(0.0f);
-    static const float16x8_t one     = vdupq_n_f16(1.0f);
-    static const float16x8_t two     = vdupq_n_f16(2.0f);
-    static const float16x8_t three   = vdupq_n_f16(3.0f);
-
-    // Score0: (1, 0)
-    const float16x8_t score0 = vabsq_f16(gx);
-
-    // Score2: ( 0, 1 )
-    const float16x8_t score2 = vabsq_f16(gy);
-
-    // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 )
-    float16x8_t score1 = vmulq_f16(gy, const45);
-    float16x8_t score3 = score1;
-
-    score1 = vfmaq_f16(score1, gx, const45);
-    score3 = vfmsq_f16(score3, gx, const45);
-
-    score1 = vabsq_f16(score1);
-    score3 = vabsq_f16(score3);
-
-    float16x8_t phase     = zero;
-    float16x8_t old_score = score0;
-
-    // score1 > old_score?
-    uint16x8_t mask = vcgtq_f16(score1, old_score);
-
-    phase     = vbslq_f16(mask, one, phase);
-    old_score = vbslq_f16(mask, score1, old_score);
-
-    // score2 > old_score?
-    mask = vcgtq_f16(score2, old_score);
-
-    phase     = vbslq_f16(mask, two, phase);
-    old_score = vbslq_f16(mask, score2, old_score);
-
-    // score3 > old_score?
-    mask = vcgtq_f16(score3, old_score);
-
-    phase = vbslq_f16(mask, three, phase);
-
-    // Convert from float16x8_t to uint8x8_t
-    return vmovn_u16(vcvtq_u16_f16(phase));
-}
-
-/** Computes the gradient phase if gradient_size = 3 or 5. The output is quantized.
- *         0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return quantized phase for 8 pixels
- */
-inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy)
-{
-    return phase_quantization(vcvtq_f16_s16(gx), vcvtq_f16_s16(gy));
-}
-
-/** Computes the gradient phase if gradient_size = 7. The output is quantized.
- *         0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return quantized phase for 8 pixels
- */
-inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
-{
-    // Convert to float
-    const float32x4x2_t gx_f32 =
-    {
-        vcvtq_f32_s32(gx.val[0]),
-        vcvtq_f32_s32(gx.val[1])
-    };
-
-    const float32x4x2_t gy_f32 =
-    {
-        vcvtq_f32_s32(gy.val[0]),
-        vcvtq_f32_s32(gy.val[1])
-    };
-
-    return phase_quantization(gx_f32, gy_f32);
-}
-
-/** Computes the magnitude using the L1-norm type if gradient_size = 3 or 5
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return magnitude for 8 pixels
- */
-inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy)
-{
-    return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)),
-                     vreinterpretq_u16_s16(vabsq_s16(gy)));
-}
-
-/** Computes the magnitude using the L1-norm type if gradient_size = 7
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return magnitude for 8 pixels
- */
-inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
-{
-    const uint32x4x2_t gx_abs =
-    {
-        vreinterpretq_u32_s32(vabsq_s32(gx.val[0])),
-        vreinterpretq_u32_s32(vabsq_s32(gx.val[1]))
-    };
-
-    const uint32x4x2_t gy_abs =
-    {
-        vreinterpretq_u32_s32(vabsq_s32(gy.val[0])),
-        vreinterpretq_u32_s32(vabsq_s32(gy.val[1]))
-    };
-
-    const uint32x4x2_t out =
-    {
-        vaddq_u32(gx_abs.val[0], gy_abs.val[0]),
-        vaddq_u32(gx_abs.val[1], gy_abs.val[1])
-    };
-
-    return out;
-}
-
-inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy)
-{
-    // x^2 ...
-    float32x4x2_t mag =
-    {
-        vmulq_f32(gx.val[0], gx.val[0]),
-        vmulq_f32(gx.val[1], gx.val[1])
-    };
-
-    // ... + y^2
-    mag.val[0] = vmlaq_f32(mag.val[0], gy.val[0], gy.val[0]);
-    mag.val[1] = vmlaq_f32(mag.val[1], gy.val[1], gy.val[1]);
-
-    // sqrt(...)
-    mag.val[0] = vmulq_f32(vrsqrteq_f32(mag.val[0]), mag.val[0]);
-    mag.val[1] = vmulq_f32(vrsqrteq_f32(mag.val[1]), mag.val[1]);
-
-    return mag;
-}
-
-inline float16x8_t mag_l2(float16x8_t gx, float16x8_t gy)
-{
-    // x^2 ...
-    float16x8_t mag = vmulq_f16(gx, gx);
-
-    // ... + y^2
-    mag = vfmaq_f16(mag, gy, gy);
-
-    // sqrt(...)
-    mag = vmulq_f16(vrsqrteq_f16(mag), mag);
-
-    return mag;
-}
-
-/** Computes the magnitude using L2-norm if gradient_size = 3 or 5
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return magnitude for 8 pixels
- */
-inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy)
-{
-    /* Compute magnitude using L2 normalization */
-    const float16x8_t gx2 = vcvtq_f16_s16(gx);
-    const float16x8_t gy2 = vcvtq_f16_s16(gy);
-    const float16x8_t mag = mag_l2(gx2, gy2);
-
-    /* Store magnitude - Convert to uint16x8 */
-    return vcvtq_u16_f16(mag);
-}
-
-/** Computes the magnitude using L2-norm if gradient_size = 7
- *
- * @param[in] gx Gx component
- * @param[in] gy Gy component
- *
- * @return magnitude for 8 pixels
- */
-inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
-{
-    // Compute magnitude using L2 normalization
-    float32x4x2_t gx2 =
-    {
-        vcvtq_f32_s32(gx.val[0]),
-        vcvtq_f32_s32(gx.val[1])
-    };
-
-    float32x4x2_t gy2 =
-    {
-        vcvtq_f32_s32(gy.val[0]),
-        vcvtq_f32_s32(gy.val[1])
-    };
-
-    const float32x4x2_t mag = mag_l2(gx2, gy2);
-    const uint32x4x2_t  mag32 =
-    {
-        vcvtq_u32_f32(mag.val[0]),
-        vcvtq_u32_f32(mag.val[1])
-    };
-
-    return mag32;
-}
-
-/** Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm
- *
- * @param[in]  in1_ptr  Pointer to source image. Gx image. Data type supported S16
- * @param[in]  in2_ptr  Pointer to source image. Gy image. Data type supported S16
- * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U16
- * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8
- */
-void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr)
-{
-    const auto in1  = static_cast<const int16_t *__restrict>(in1_ptr);
-    const auto in2  = static_cast<const int16_t *__restrict>(in2_ptr);
-    const auto out1 = static_cast<uint16_t *__restrict>(out1_ptr);
-    const auto out2 = static_cast<uint8_t *__restrict>(out2_ptr);
-
-    const int16x8x4_t gx =
-    {
-        vld1q_s16(in1),
-        vld1q_s16(in1 + 8),
-        vld1q_s16(in1 + 16),
-        vld1q_s16(in1 + 24)
-    };
-
-    const int16x8x4_t gy =
-    {
-        vld1q_s16(in2),
-        vld1q_s16(in2 + 8),
-        vld1q_s16(in2 + 16),
-        vld1q_s16(in2 + 24)
-    };
-
-    // Compute and store phase
-    vst1_u8(out2 + 0, phase_quantization_S16_S16(gx.val[0], gy.val[0]));
-    vst1_u8(out2 + 8, phase_quantization_S16_S16(gx.val[1], gy.val[1]));
-    vst1_u8(out2 + 16, phase_quantization_S16_S16(gx.val[2], gy.val[2]));
-    vst1_u8(out2 + 24, phase_quantization_S16_S16(gx.val[3], gy.val[3]));
-
-    // Compute ans store magnitude using L1 normalization
-    vst1q_u16(out1 + 0, mag_l1_S16_S16(gx.val[0], gy.val[0]));
-    vst1q_u16(out1 + 8, mag_l1_S16_S16(gx.val[1], gy.val[1]));
-    vst1q_u16(out1 + 16, mag_l1_S16_S16(gx.val[2], gy.val[2]));
-    vst1q_u16(out1 + 24, mag_l1_S16_S16(gx.val[3], gy.val[3]));
-}
-
-/** Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm
- *
- * @param[in]  in1_ptr  Pointer to source image. Gx image. Data type supported S16
- * @param[in]  in2_ptr  Pointer to source image. Gy image. Data type supported S16
- * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U16
- * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8
- */
-void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr)
-{
-    const auto in1  = static_cast<const int16_t *__restrict>(in1_ptr);
-    const auto in2  = static_cast<const int16_t *__restrict>(in2_ptr);
-    const auto out1 = static_cast<uint16_t *__restrict>(out1_ptr);
-    const auto out2 = static_cast<uint8_t *__restrict>(out2_ptr);
-
-    const int16x8x4_t gx =
-    {
-        vld1q_s16(in1),
-        vld1q_s16(in1 + 8),
-        vld1q_s16(in1 + 16),
-        vld1q_s16(in1 + 24)
-    };
-
-    const int16x8x4_t gy =
-    {
-        vld1q_s16(in2),
-        vld1q_s16(in2 + 8),
-        vld1q_s16(in2 + 16),
-        vld1q_s16(in2 + 24)
-    };
-
-    // Compute and store phase
-    vst1_u8(out2 + 0, phase_quantization_S16_S16(gx.val[0], gy.val[0]));
-    vst1_u8(out2 + 8, phase_quantization_S16_S16(gx.val[1], gy.val[1]));
-    vst1_u8(out2 + 16, phase_quantization_S16_S16(gx.val[2], gy.val[2]));
-    vst1_u8(out2 + 24, phase_quantization_S16_S16(gx.val[3], gy.val[3]));
-
-    // Compute and store magnitude using L2 normalization
-    vst1q_u16(out1 + 0, mag_l2_S16_S16(gx.val[0], gy.val[0]));
-    vst1q_u16(out1 + 8, mag_l2_S16_S16(gx.val[1], gy.val[1]));
-    vst1q_u16(out1 + 16, mag_l2_S16_S16(gx.val[2], gy.val[2]));
-    vst1q_u16(out1 + 24, mag_l2_S16_S16(gx.val[3], gy.val[3]));
-}
-
-/** Gradient function used when the gradient size = 7 and when the norm_type = L1-norm
- *
- * @param[in]  in1_ptr  Pointer to source image. Gx image. Data type supported S32
- * @param[in]  in2_ptr  Pointer to source image. Gy image. Data type supported S32
- * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U32
- * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8
- */
-void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr)
-{
-    auto in1  = static_cast<const int32_t *__restrict>(in1_ptr);
-    auto in2  = static_cast<const int32_t *__restrict>(in2_ptr);
-    auto out1 = static_cast<uint32_t *__restrict>(out1_ptr);
-    auto out2 = static_cast<uint8_t *__restrict>(out2_ptr);
-
-    // Process low and high part
-    for(size_t i = 0; i < 2; ++i, in1 += 16, in2 += 16, out1 += 16, out2 += 16)
-    {
-        const int32x4x2_t gx0 =
-        {
-            vld1q_s32(in1 + 0),
-            vld1q_s32(in1 + 4)
-        };
-
-        const int32x4x2_t gx1 =
-        {
-            vld1q_s32(in1 + 8),
-            vld1q_s32(in1 + 12)
-        };
-
-        const int32x4x2_t gy0 =
-        {
-            vld1q_s32(in2 + 0),
-            vld1q_s32(in2 + 4)
-        };
-
-        const int32x4x2_t gy1 =
-        {
-            vld1q_s32(in2 + 8),
-            vld1q_s32(in2 + 12)
-        };
-
-        // Compute and store phase
-        vst1_u8(out2 + 0, phase_quantization_S32_S32(gx0, gy0));
-        vst1_u8(out2 + 8, phase_quantization_S32_S32(gx1, gy1));
-
-        // Compute magnitude using L1 normalization
-        const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0);
-        const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1);
-
-        // Store magnitude
-        vst1q_u32(out1 + 0, mag0.val[0]);
-        vst1q_u32(out1 + 4, mag0.val[1]);
-        vst1q_u32(out1 + 8, mag1.val[0]);
-        vst1q_u32(out1 + 12, mag1.val[1]);
-    }
-}
-
-/** Gradient function used when the gradient size = 7 and when the norm_type = L2-norm
- *
- * @param[in]  in1_ptr  Pointer to source image. Gx image. Data type supported S32
- * @param[in]  in2_ptr  Pointer to source image. Gy image. Data type supported S32
- * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U32
- * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8
- */
-void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr)
-{
-    auto in1  = static_cast<const int32_t *__restrict>(in1_ptr);
-    auto in2  = static_cast<const int32_t *__restrict>(in2_ptr);
-    auto out1 = static_cast<uint32_t *__restrict>(out1_ptr);
-    auto out2 = static_cast<uint8_t *__restrict>(out2_ptr);
-
-    // Process low and high part
-    for(size_t i = 0; i < 2; ++i, in1 += 16, in2 += 16, out1 += 16, out2 += 16)
-    {
-        const int32x4x2_t gx0 =
-        {
-            vld1q_s32(in1 + 0),
-            vld1q_s32(in1 + 4)
-        };
-
-        const int32x4x2_t gx1 =
-        {
-            vld1q_s32(in1 + 8),
-            vld1q_s32(in1 + 12)
-        };
-
-        const int32x4x2_t gy0 =
-        {
-            vld1q_s32(in2 + 0),
-            vld1q_s32(in2 + 4)
-        };
-
-        const int32x4x2_t gy1 =
-        {
-            vld1q_s32(in2 + 8),
-            vld1q_s32(in2 + 12)
-        };
-
-        // Compute and store phase
-        vst1_u8(out2 + 0, phase_quantization_S32_S32(gx0, gy0));
-        vst1_u8(out2 + 8, phase_quantization_S32_S32(gx1, gy1));
-
-        // Compute magnitude using L2 normalization
-        const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0);
-        const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1);
-
-        // Store magnitude
-        vst1q_u32(out1 + 0, mag0.val[0]);
-        vst1q_u32(out1 + 4, mag0.val[1]);
-        vst1q_u32(out1 + 8, mag1.val[0]);
-        vst1q_u32(out1 + 12, mag1.val[1]);
-    }
-}
-
-inline uint16x4_t non_max_U32_helper(const uint32_t *in, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr)
-{
-    // Phase for 4 pixel
-    const uint32x4_t pc32 = vmovl_u16(pc);
-
-    // Get magnitude for 4 pixel
-    uint32x4_t mc = vld1q_u32(in);
-
-    // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
-    // 0 degree
-    const uint32x4_t mk0_0 = vld1q_u32(in - 1);
-    const uint32x4_t mk0_1 = vld1q_u32(in + 1);
-    uint32x4_t       mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
-    mask0                  = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
-    mask0                  = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
-
-    // 45 degree
-    const uint32x4_t mk45_0 = vld1q_u32(in - stride_mag - 1);
-    const uint32x4_t mk45_1 = vld1q_u32(in + stride_mag + 1);
-    uint32x4_t       mask1  = vceqq_u32(pc32, vdupq_n_u32(1));
-    mask1                   = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
-    mask1                   = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
-
-    // 90 degree
-    const uint32x4_t mk90_0 = vld1q_u32(in - stride_mag);
-    const uint32x4_t mk90_1 = vld1q_u32(in + stride_mag);
-    uint32x4_t       mask2  = vceqq_u32(pc32, vdupq_n_u32(2));
-    mask2                   = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
-    mask2                   = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
-
-    // 135 degree
-    const uint32x4_t mk135_0 = vld1q_u32(in - stride_mag + 1);
-    const uint32x4_t mk135_1 = vld1q_u32(in + stride_mag - 1);
-    uint32x4_t       mask3   = vceqq_u32(pc32, vdupq_n_u32(3));
-    mask3                    = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
-    mask3                    = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
-
-    // Merge masks
-    mask0 = vorrq_u32(mask0, mask1);
-    mask2 = vorrq_u32(mask2, mask3);
-    mask0 = vorrq_u32(mask0, mask2);
-
-    mc = vbslq_u32(mask0, mc, vdupq_n_u32(0));
-
-    // mc > upper_thr
-    mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr));
-
-    // mc <= lower_thr
-    mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr));
-
-    // mc <= upper_thr && mc > lower_thr
-    mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr));
-    mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr)));
-
-    mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc);
-    mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc);
-    mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc);
-
-    return vmovn_u32(mc);
-}
-
-/** Computes edge tracing when is called by edge_trace_U8_U8 recursively
- *
- * @param[in]  in         Pointer to source image. Data type supported U8
- * @param[out] out        Pointer to destination image. Data type supported U8
- * @param[in]  in_stride  Stride of the input image
- * @param[in]  out_stride Stride of the output image
- */
-void edge_trace_recursive_U8_U8(uint8_t *__restrict in, uint8_t *__restrict out, const int32_t in_stride, const int32_t out_stride)
-{
-    // Look for MAYBE pixels in 8 directions
-    *out = EDGE;
-
-    // (-1, 0)
-    uint8_t pixel = *(in - 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in - 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride);
-    }
-
-    // (+1, 0)
-    pixel = *(in + 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in + 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride);
-    }
-
-    in -= in_stride;
-    out -= out_stride;
-
-    // (-1, -1)
-    pixel = *(in - 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in - 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride);
-    }
-
-    // (0, -1)
-    pixel = *in;
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *in = EDGE;
-
-        edge_trace_recursive_U8_U8(in, out, in_stride, out_stride);
-    }
-
-    // (+1, -1)
-    pixel = *(in + 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in + 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride);
-    }
-
-    in += in_stride * 2;
-    out += out_stride * 2;
-
-    // (-1, +1)
-    pixel = *(in - 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in - 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride);
-    }
-
-    // (0, +1)
-    pixel = *in;
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *in = EDGE;
-
-        edge_trace_recursive_U8_U8(in, out, in_stride, out_stride);
-    }
-
-    // (+1, +1)
-    pixel = *(in + 1);
-
-    if(pixel == MAYBE)
-    {
-        // Touched a MAYBE point. MAYBE becomes EDGE
-        *(in + 1) = EDGE;
-
-        edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride);
-    }
-}
-} // namespace fp16
-
-void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase);
-
-    set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape());
-    set_shape_if_empty(*phase->info(), gx->info()->tensor_shape());
-
-    Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32;
-    set_format_if_unknown(*magnitude->info(), magnitude_format);
-    set_format_if_unknown(*phase->info(), Format::U8);
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy);
-    ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy");
-
-    _gx        = gx;
-    _gy        = gy;
-    _magnitude = magnitude;
-    _phase     = phase;
-
-    if(_gx->info()->data_type() == DataType::S16)
-    {
-        if(norm_type == 1)
-        {
-            _func = &fp16::mag_phase_l1norm_S16_S16_U16_U8;
-        }
-        else
-        {
-            _func = &fp16::mag_phase_l2norm_S16_S16_U16_U8;
-        }
-    }
-    else
-    {
-        if(norm_type == 1)
-        {
-            _func = &fp16::mag_phase_l1norm_S32_S32_U32_U8;
-        }
-        else
-        {
-            _func = &fp16::mag_phase_l2norm_S32_S32_U32_U8;
-        }
-    }
-
-    constexpr unsigned int num_elems_processed_per_iteration = 32;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access);
-
-    mag_access.set_valid_region(win, _gx->info()->valid_region());
-    phase_access.set_valid_region(win, _gx->info()->valid_region());
-
-    INEKernel::configure(win);
-}
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
 namespace
 {
 inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-08-30 16:02:11 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	09d3451b4caf8d5e0e7cf2c6097e50a1f815d027 (patch)
tree	8e9b8e9882d0120c85ff08473dbf3721b7602677 /src/core/NEON/kernels/NECannyEdgeKernel.cpp
parent	b57be0da77370e5e71fe82dfa281f528279e8127 (diff)
download	ComputeLibrary-09d3451b4caf8d5e0e7cf2c6097e50a1f815d027.tar.gz