From 09d3451b4caf8d5e0e7cf2c6097e50a1f815d027 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 30 Aug 2018 16:02:11 +0100 Subject: COMPMID-1548: NEON FP16 mismatches on CannyEdge and HarrisCorners. Removes FP16 from HarrisCorners and CannyEdge. Change-Id: I5e4f9205fdbe4de85f04f55ecf1568c837e56cc0 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146247 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio --- arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 18 - .../core/NEON/kernels/NEHarrisCornersKernel.h | 29 - arm_compute/runtime/NEON/functions/NECannyEdge.h | 5 +- .../runtime/NEON/functions/NEHarrisCorners.h | 3 +- docs/00_introduction.dox | 2 +- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 738 --------------------- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 324 --------- src/runtime/NEON/functions/NECannyEdge.cpp | 18 +- src/runtime/NEON/functions/NEHarrisCorners.cpp | 75 +-- tests/benchmark/CL/CannyEdge.cpp | 3 +- tests/benchmark/CL/HarrisCorners.cpp | 10 +- tests/benchmark/NEON/CannyEdge.cpp | 3 +- tests/benchmark/NEON/HarrisCorners.cpp | 34 +- tests/benchmark/fixtures/CannyEdgeFixture.h | 20 +- tests/benchmark/fixtures/HarrisCornersFixture.h | 5 +- tests/validation/CL/CannyEdge.cpp | 9 +- tests/validation/CL/HarrisCorners.cpp | 10 +- tests/validation/NEON/CannyEdge.cpp | 14 +- tests/validation/NEON/HarrisCorners.cpp | 14 +- tests/validation/fixtures/CannyEdgeFixture.h | 23 +- tests/validation/fixtures/HarrisCornersFixture.h | 25 +- tests/validation/reference/CannyEdgeDetector.cpp | 6 +- tests/validation/reference/CannyEdgeDetector.h | 3 +- tests/validation/reference/HarrisCornerDetector.h | 7 +- 24 files changed, 72 insertions(+), 1326 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h index 58ef1757fe..7924d32807 100644 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -85,24 +85,6 @@ protected: ITensor *_phase; /**< Destination tensor - Quantized phase */ }; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Gradient computation for FP16 datatype - */ -class NEGradientFP16Kernel : public NEGradientKernel -{ -public: - const char *name() const override - { - return "NEGradientFP16Kernel"; - } - // Inherited methods overriden: - void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** NEON kernel to perform Gradient computation for FP16 datatype */ -using NEGradientFP16Kernel = NEGradientKernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - /** NEON kernel to perform Non-Maxima suppression for Canny Edge. * * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index aabf8b312b..42a899d55e 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -102,34 +102,5 @@ private: /** Harris Score function to use for the particular image types passed to configure() */ HarrisScoreFunction *_func; }; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Interface for the accumulate Weighted kernel using F16 */ -template -class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel -{ -public: - const char *name() const override - { - return "NEHarrisScoreFP16Kernel"; - } - /** Default constructor */ - NEHarrisScoreFP16Kernel(); - // Inherited methods overridden: - void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; - BorderSize border_size() const override; - void run(const Window &window, const ThreadInfo &info) override; - -private: - using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float norm_factor, float sensitivity, float strength_thresh); - /** Harris Score function to use for the particular image types passed to configure() */ - HarrisScoreFunction *_func; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** Interface for the accumulate Weighted kernel using FP16 */ -template -using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h index 17fefcc0ab..0b6e555667 100644 --- a/arm_compute/runtime/NEON/functions/NECannyEdge.h +++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h @@ -74,11 +74,8 @@ public: * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - * */ - void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0, - bool use_fp16 = false); + void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h index 6ea14a38e5..b35a9add04 100644 --- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h +++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h @@ -79,11 +79,10 @@ public: * @param[out] corners Array of keypoints to store the results. * @param[in] border_mode Border mode to use * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. */ void configure(IImage *input, float threshold, float min_dist, float sensitivity, int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); + BorderMode border_mode, uint8_t constant_border_value = 0); // Inherited methods overridden: void run() override; diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index e3bbea24db..9c6df59335 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -509,7 +509,7 @@ v17.04 Public bug fixes release - @ref NEFillArrayKernel - @ref NEGaussianPyramidHorKernel - @ref NEGaussianPyramidVertKernel - - @ref NEHarrisScoreFP16Kernel + - NEHarrisScoreFP16Kernel - @ref NEHarrisScoreKernel - @ref NEHOGDetectorKernel - @ref NELogits1DMaxKernel diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index dc37452415..fa51a7bb0b 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -51,744 +51,6 @@ constexpr int EDGE = 255; constexpr int MAYBE = 127; } // namespace -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -namespace fp16 -{ -inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) -{ - // Constant use for evaluating score1 and score3 - static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f); - static const float32x4_t zero = vdupq_n_f32(0.0f); - static const float32x4_t one = vdupq_n_f32(1.0f); - static const float32x4_t two = vdupq_n_f32(2.0f); - static const float32x4_t three = vdupq_n_f32(3.0f); - - // Score0: (1, 0) - const float32x4x2_t score0 = - { - vabsq_f32(gx.val[0]), - vabsq_f32(gx.val[1]) - }; - - // Score2: ( 0, 1 ) - const float32x4x2_t score2 = - { - vabsq_f32(gy.val[0]), - vabsq_f32(gy.val[1]) - }; - - // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 ) - float32x4x2_t score1 = - { - vmulq_f32(gy.val[0], const45), - vmulq_f32(gy.val[1], const45) - }; - - float32x4x2_t score3 = score1; - - score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45); - score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45); - score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45); - score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45); - - score1.val[0] = vabsq_f32(score1.val[0]); - score1.val[1] = vabsq_f32(score1.val[1]); - score3.val[0] = vabsq_f32(score3.val[0]); - score3.val[1] = vabsq_f32(score3.val[1]); - - float32x4x2_t phase = - { - zero, - zero - }; - - float32x4x2_t old_score = score0; - - // score1 > old_score? - uint32x4x2_t mask = - { - vcgtq_f32(score1.val[0], old_score.val[0]), - vcgtq_f32(score1.val[1], old_score.val[1]) - }; - - phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]); - - // score2 > old_score? - mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]); - mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]); - - phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]); - - // score3 > old_score? - mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]); - mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]); - - phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]); - - // Convert from float32x4_t to uint8x8_t - return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])), - vmovn_u32(vcvtq_u32_f32(phase.val[1])))); -} - -inline uint8x8_t phase_quantization(float16x8_t gx, float16x8_t gy) -{ - // Constant use for evaluating score1 and score3 - static const float16x8_t const45 = vdupq_n_f16(0.70710678118655f); - static const float16x8_t zero = vdupq_n_f16(0.0f); - static const float16x8_t one = vdupq_n_f16(1.0f); - static const float16x8_t two = vdupq_n_f16(2.0f); - static const float16x8_t three = vdupq_n_f16(3.0f); - - // Score0: (1, 0) - const float16x8_t score0 = vabsq_f16(gx); - - // Score2: ( 0, 1 ) - const float16x8_t score2 = vabsq_f16(gy); - - // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 ) - float16x8_t score1 = vmulq_f16(gy, const45); - float16x8_t score3 = score1; - - score1 = vfmaq_f16(score1, gx, const45); - score3 = vfmsq_f16(score3, gx, const45); - - score1 = vabsq_f16(score1); - score3 = vabsq_f16(score3); - - float16x8_t phase = zero; - float16x8_t old_score = score0; - - // score1 > old_score? - uint16x8_t mask = vcgtq_f16(score1, old_score); - - phase = vbslq_f16(mask, one, phase); - old_score = vbslq_f16(mask, score1, old_score); - - // score2 > old_score? - mask = vcgtq_f16(score2, old_score); - - phase = vbslq_f16(mask, two, phase); - old_score = vbslq_f16(mask, score2, old_score); - - // score3 > old_score? - mask = vcgtq_f16(score3, old_score); - - phase = vbslq_f16(mask, three, phase); - - // Convert from float16x8_t to uint8x8_t - return vmovn_u16(vcvtq_u16_f16(phase)); -} - -/** Computes the gradient phase if gradient_size = 3 or 5. The output is quantized. - * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return quantized phase for 8 pixels - */ -inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy) -{ - return phase_quantization(vcvtq_f16_s16(gx), vcvtq_f16_s16(gy)); -} - -/** Computes the gradient phase if gradient_size = 7. The output is quantized. - * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return quantized phase for 8 pixels - */ -inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - // Convert to float - const float32x4x2_t gx_f32 = - { - vcvtq_f32_s32(gx.val[0]), - vcvtq_f32_s32(gx.val[1]) - }; - - const float32x4x2_t gy_f32 = - { - vcvtq_f32_s32(gy.val[0]), - vcvtq_f32_s32(gy.val[1]) - }; - - return phase_quantization(gx_f32, gy_f32); -} - -/** Computes the magnitude using the L1-norm type if gradient_size = 3 or 5 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy) -{ - return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)), - vreinterpretq_u16_s16(vabsq_s16(gy))); -} - -/** Computes the magnitude using the L1-norm type if gradient_size = 7 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - const uint32x4x2_t gx_abs = - { - vreinterpretq_u32_s32(vabsq_s32(gx.val[0])), - vreinterpretq_u32_s32(vabsq_s32(gx.val[1])) - }; - - const uint32x4x2_t gy_abs = - { - vreinterpretq_u32_s32(vabsq_s32(gy.val[0])), - vreinterpretq_u32_s32(vabsq_s32(gy.val[1])) - }; - - const uint32x4x2_t out = - { - vaddq_u32(gx_abs.val[0], gy_abs.val[0]), - vaddq_u32(gx_abs.val[1], gy_abs.val[1]) - }; - - return out; -} - -inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy) -{ - // x^2 ... - float32x4x2_t mag = - { - vmulq_f32(gx.val[0], gx.val[0]), - vmulq_f32(gx.val[1], gx.val[1]) - }; - - // ... + y^2 - mag.val[0] = vmlaq_f32(mag.val[0], gy.val[0], gy.val[0]); - mag.val[1] = vmlaq_f32(mag.val[1], gy.val[1], gy.val[1]); - - // sqrt(...) - mag.val[0] = vmulq_f32(vrsqrteq_f32(mag.val[0]), mag.val[0]); - mag.val[1] = vmulq_f32(vrsqrteq_f32(mag.val[1]), mag.val[1]); - - return mag; -} - -inline float16x8_t mag_l2(float16x8_t gx, float16x8_t gy) -{ - // x^2 ... - float16x8_t mag = vmulq_f16(gx, gx); - - // ... + y^2 - mag = vfmaq_f16(mag, gy, gy); - - // sqrt(...) - mag = vmulq_f16(vrsqrteq_f16(mag), mag); - - return mag; -} - -/** Computes the magnitude using L2-norm if gradient_size = 3 or 5 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy) -{ - /* Compute magnitude using L2 normalization */ - const float16x8_t gx2 = vcvtq_f16_s16(gx); - const float16x8_t gy2 = vcvtq_f16_s16(gy); - const float16x8_t mag = mag_l2(gx2, gy2); - - /* Store magnitude - Convert to uint16x8 */ - return vcvtq_u16_f16(mag); -} - -/** Computes the magnitude using L2-norm if gradient_size = 7 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - // Compute magnitude using L2 normalization - float32x4x2_t gx2 = - { - vcvtq_f32_s32(gx.val[0]), - vcvtq_f32_s32(gx.val[1]) - }; - - float32x4x2_t gy2 = - { - vcvtq_f32_s32(gy.val[0]), - vcvtq_f32_s32(gy.val[1]) - }; - - const float32x4x2_t mag = mag_l2(gx2, gy2); - const uint32x4x2_t mag32 = - { - vcvtq_u32_f32(mag.val[0]), - vcvtq_u32_f32(mag.val[1]) - }; - - return mag32; -} - -/** Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm - * - * @param[in] in1_ptr Pointer to source image. Gx image. Data type supported S16 - * @param[in] in2_ptr Pointer to source image. Gy image. Data type supported S16 - * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U16 - * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr) -{ - const auto in1 = static_cast(in1_ptr); - const auto in2 = static_cast(in2_ptr); - const auto out1 = static_cast(out1_ptr); - const auto out2 = static_cast(out2_ptr); - - const int16x8x4_t gx = - { - vld1q_s16(in1), - vld1q_s16(in1 + 8), - vld1q_s16(in1 + 16), - vld1q_s16(in1 + 24) - }; - - const int16x8x4_t gy = - { - vld1q_s16(in2), - vld1q_s16(in2 + 8), - vld1q_s16(in2 + 16), - vld1q_s16(in2 + 24) - }; - - // Compute and store phase - vst1_u8(out2 + 0, phase_quantization_S16_S16(gx.val[0], gy.val[0])); - vst1_u8(out2 + 8, phase_quantization_S16_S16(gx.val[1], gy.val[1])); - vst1_u8(out2 + 16, phase_quantization_S16_S16(gx.val[2], gy.val[2])); - vst1_u8(out2 + 24, phase_quantization_S16_S16(gx.val[3], gy.val[3])); - - // Compute ans store magnitude using L1 normalization - vst1q_u16(out1 + 0, mag_l1_S16_S16(gx.val[0], gy.val[0])); - vst1q_u16(out1 + 8, mag_l1_S16_S16(gx.val[1], gy.val[1])); - vst1q_u16(out1 + 16, mag_l1_S16_S16(gx.val[2], gy.val[2])); - vst1q_u16(out1 + 24, mag_l1_S16_S16(gx.val[3], gy.val[3])); -} - -/** Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm - * - * @param[in] in1_ptr Pointer to source image. Gx image. Data type supported S16 - * @param[in] in2_ptr Pointer to source image. Gy image. Data type supported S16 - * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U16 - * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr) -{ - const auto in1 = static_cast(in1_ptr); - const auto in2 = static_cast(in2_ptr); - const auto out1 = static_cast(out1_ptr); - const auto out2 = static_cast(out2_ptr); - - const int16x8x4_t gx = - { - vld1q_s16(in1), - vld1q_s16(in1 + 8), - vld1q_s16(in1 + 16), - vld1q_s16(in1 + 24) - }; - - const int16x8x4_t gy = - { - vld1q_s16(in2), - vld1q_s16(in2 + 8), - vld1q_s16(in2 + 16), - vld1q_s16(in2 + 24) - }; - - // Compute and store phase - vst1_u8(out2 + 0, phase_quantization_S16_S16(gx.val[0], gy.val[0])); - vst1_u8(out2 + 8, phase_quantization_S16_S16(gx.val[1], gy.val[1])); - vst1_u8(out2 + 16, phase_quantization_S16_S16(gx.val[2], gy.val[2])); - vst1_u8(out2 + 24, phase_quantization_S16_S16(gx.val[3], gy.val[3])); - - // Compute and store magnitude using L2 normalization - vst1q_u16(out1 + 0, mag_l2_S16_S16(gx.val[0], gy.val[0])); - vst1q_u16(out1 + 8, mag_l2_S16_S16(gx.val[1], gy.val[1])); - vst1q_u16(out1 + 16, mag_l2_S16_S16(gx.val[2], gy.val[2])); - vst1q_u16(out1 + 24, mag_l2_S16_S16(gx.val[3], gy.val[3])); -} - -/** Gradient function used when the gradient size = 7 and when the norm_type = L1-norm - * - * @param[in] in1_ptr Pointer to source image. Gx image. Data type supported S32 - * @param[in] in2_ptr Pointer to source image. Gy image. Data type supported S32 - * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U32 - * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr) -{ - auto in1 = static_cast(in1_ptr); - auto in2 = static_cast(in2_ptr); - auto out1 = static_cast(out1_ptr); - auto out2 = static_cast(out2_ptr); - - // Process low and high part - for(size_t i = 0; i < 2; ++i, in1 += 16, in2 += 16, out1 += 16, out2 += 16) - { - const int32x4x2_t gx0 = - { - vld1q_s32(in1 + 0), - vld1q_s32(in1 + 4) - }; - - const int32x4x2_t gx1 = - { - vld1q_s32(in1 + 8), - vld1q_s32(in1 + 12) - }; - - const int32x4x2_t gy0 = - { - vld1q_s32(in2 + 0), - vld1q_s32(in2 + 4) - }; - - const int32x4x2_t gy1 = - { - vld1q_s32(in2 + 8), - vld1q_s32(in2 + 12) - }; - - // Compute and store phase - vst1_u8(out2 + 0, phase_quantization_S32_S32(gx0, gy0)); - vst1_u8(out2 + 8, phase_quantization_S32_S32(gx1, gy1)); - - // Compute magnitude using L1 normalization - const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0); - const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1); - - // Store magnitude - vst1q_u32(out1 + 0, mag0.val[0]); - vst1q_u32(out1 + 4, mag0.val[1]); - vst1q_u32(out1 + 8, mag1.val[0]); - vst1q_u32(out1 + 12, mag1.val[1]); - } -} - -/** Gradient function used when the gradient size = 7 and when the norm_type = L2-norm - * - * @param[in] in1_ptr Pointer to source image. Gx image. Data type supported S32 - * @param[in] in2_ptr Pointer to source image. Gy image. Data type supported S32 - * @param[out] out1_ptr Pointer to destination image. Magnitude. Data type supported U32 - * @param[out] out2_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out1_ptr, void *__restrict out2_ptr) -{ - auto in1 = static_cast(in1_ptr); - auto in2 = static_cast(in2_ptr); - auto out1 = static_cast(out1_ptr); - auto out2 = static_cast(out2_ptr); - - // Process low and high part - for(size_t i = 0; i < 2; ++i, in1 += 16, in2 += 16, out1 += 16, out2 += 16) - { - const int32x4x2_t gx0 = - { - vld1q_s32(in1 + 0), - vld1q_s32(in1 + 4) - }; - - const int32x4x2_t gx1 = - { - vld1q_s32(in1 + 8), - vld1q_s32(in1 + 12) - }; - - const int32x4x2_t gy0 = - { - vld1q_s32(in2 + 0), - vld1q_s32(in2 + 4) - }; - - const int32x4x2_t gy1 = - { - vld1q_s32(in2 + 8), - vld1q_s32(in2 + 12) - }; - - // Compute and store phase - vst1_u8(out2 + 0, phase_quantization_S32_S32(gx0, gy0)); - vst1_u8(out2 + 8, phase_quantization_S32_S32(gx1, gy1)); - - // Compute magnitude using L2 normalization - const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0); - const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1); - - // Store magnitude - vst1q_u32(out1 + 0, mag0.val[0]); - vst1q_u32(out1 + 4, mag0.val[1]); - vst1q_u32(out1 + 8, mag1.val[0]); - vst1q_u32(out1 + 12, mag1.val[1]); - } -} - -inline uint16x4_t non_max_U32_helper(const uint32_t *in, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr) -{ - // Phase for 4 pixel - const uint32x4_t pc32 = vmovl_u16(pc); - - // Get magnitude for 4 pixel - uint32x4_t mc = vld1q_u32(in); - - // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - // 0 degree - const uint32x4_t mk0_0 = vld1q_u32(in - 1); - const uint32x4_t mk0_1 = vld1q_u32(in + 1); - uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0)); - mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0)); - mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1)); - - // 45 degree - const uint32x4_t mk45_0 = vld1q_u32(in - stride_mag - 1); - const uint32x4_t mk45_1 = vld1q_u32(in + stride_mag + 1); - uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1)); - mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0)); - mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1)); - - // 90 degree - const uint32x4_t mk90_0 = vld1q_u32(in - stride_mag); - const uint32x4_t mk90_1 = vld1q_u32(in + stride_mag); - uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1)); - - // 135 degree - const uint32x4_t mk135_0 = vld1q_u32(in - stride_mag + 1); - const uint32x4_t mk135_1 = vld1q_u32(in + stride_mag - 1); - uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3)); - mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0)); - mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1)); - - // Merge masks - mask0 = vorrq_u32(mask0, mask1); - mask2 = vorrq_u32(mask2, mask3); - mask0 = vorrq_u32(mask0, mask2); - - mc = vbslq_u32(mask0, mc, vdupq_n_u32(0)); - - // mc > upper_thr - mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr)); - - // mc <= lower_thr - mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr)); - - // mc <= upper_thr && mc > lower_thr - mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr))); - - mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc); - mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc); - mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc); - - return vmovn_u32(mc); -} - -/** Computes edge tracing when is called by edge_trace_U8_U8 recursively - * - * @param[in] in Pointer to source image. Data type supported U8 - * @param[out] out Pointer to destination image. Data type supported U8 - * @param[in] in_stride Stride of the input image - * @param[in] out_stride Stride of the output image - */ -void edge_trace_recursive_U8_U8(uint8_t *__restrict in, uint8_t *__restrict out, const int32_t in_stride, const int32_t out_stride) -{ - // Look for MAYBE pixels in 8 directions - *out = EDGE; - - // (-1, 0) - uint8_t pixel = *(in - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in - 1) = EDGE; - - edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride); - } - - // (+1, 0) - pixel = *(in + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in + 1) = EDGE; - - edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride); - } - - in -= in_stride; - out -= out_stride; - - // (-1, -1) - pixel = *(in - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in - 1) = EDGE; - - edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride); - } - - // (0, -1) - pixel = *in; - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *in = EDGE; - - edge_trace_recursive_U8_U8(in, out, in_stride, out_stride); - } - - // (+1, -1) - pixel = *(in + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in + 1) = EDGE; - - edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride); - } - - in += in_stride * 2; - out += out_stride * 2; - - // (-1, +1) - pixel = *(in - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in - 1) = EDGE; - - edge_trace_recursive_U8_U8(in - 1, out - 1, in_stride, out_stride); - } - - // (0, +1) - pixel = *in; - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *in = EDGE; - - edge_trace_recursive_U8_U8(in, out, in_stride, out_stride); - } - - // (+1, +1) - pixel = *(in + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(in + 1) = EDGE; - - edge_trace_recursive_U8_U8(in + 1, out + 1, in_stride, out_stride); - } -} -} // namespace fp16 - -void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase); - - set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape()); - set_shape_if_empty(*phase->info(), gx->info()->tensor_shape()); - - Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32; - set_format_if_unknown(*magnitude->info(), magnitude_format); - set_format_if_unknown(*phase->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy); - ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy"); - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - if(_gx->info()->data_type() == DataType::S16) - { - if(norm_type == 1) - { - _func = &fp16::mag_phase_l1norm_S16_S16_U16_U8; - } - else - { - _func = &fp16::mag_phase_l2norm_S16_S16_U16_U8; - } - } - else - { - if(norm_type == 1) - { - _func = &fp16::mag_phase_l1norm_S32_S32_U32_U8; - } - else - { - _func = &fp16::mag_phase_l2norm_S32_S32_U32_U8; - } - } - - constexpr unsigned int num_elems_processed_per_iteration = 32; - - // Configure kernel window - Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access); - - mag_access.set_valid_region(win, _gx->info()->valid_region()); - phase_access.set_valid_region(win, _gx->info()->valid_region()); - - INEKernel::configure(win); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - namespace { inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index 5e1c216b65..61221c1070 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -39,330 +39,6 @@ using namespace arm_compute; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -namespace fp16 -{ -inline float16x8_t harris_score(float16x8_t gx2, float16x8_t gy2, float16x8_t gxgy, float sensitivity, float strength_thresh) -{ - static const float16x8_t zero = vdupq_n_f16(0.f); - - // Trace^2 - float16x8_t trace2 = vaddq_f16(gx2, gy2); - trace2 = vmulq_f16(trace2, trace2); - - // Det(A) - float16x8_t det = vmulq_f16(gx2, gy2); - det = vfmsq_f16(det, gxgy, gxgy); - - // Det(A) - sensitivity * trace^2 - const float16x8_t mc = vfmsq_f16(det, vdupq_n_f16(sensitivity), trace2); - - // mc > strength_thresh - const uint16x8_t mask = vcgtq_f16(mc, vdupq_n_f16(strength_thresh)); - - return vbslq_f16(mask, mc, zero); -} - -template -inline void harris_score1xN_FLOAT_FLOAT_FLOAT(float16x8_t low_gx, float16x8_t low_gy, float16x8_t high_gx, float16x8_t high_gy, float16x8_t &gx2, float16x8_t &gy2, float16x8_t &gxgy, - float norm_factor) -{ - const float16x8_t norm_factor_fp16 = vdupq_n_f16(norm_factor); - - // Normalize - low_gx = vmulq_f16(low_gx, norm_factor_fp16); - low_gy = vmulq_f16(low_gy, norm_factor_fp16); - high_gx = vmulq_f16(high_gx, norm_factor_fp16); - high_gy = vmulq_f16(high_gy, norm_factor_fp16); - - float16x8_t gx = vextq_f16(low_gx, high_gx, 0); - float16x8_t gy = vextq_f16(low_gy, high_gy, 0); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - - gx = vextq_f16(low_gx, high_gx, 1); - gy = vextq_f16(low_gy, high_gy, 1); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - - gx = vextq_f16(low_gx, high_gx, 2); - gy = vextq_f16(low_gy, high_gy, 2); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - - if(block_size > 3) - { - gx = vextq_f16(low_gx, high_gx, 3); - gy = vextq_f16(low_gy, high_gy, 3); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - - gx = vextq_f16(low_gx, high_gx, 4); - gy = vextq_f16(low_gy, high_gy, 4); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - } - - if(block_size == 7) - { - gx = vextq_f16(low_gx, high_gx, 5); - gy = vextq_f16(low_gy, high_gy, 5); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - - gx = vextq_f16(low_gx, high_gx, 6); - gy = vextq_f16(low_gy, high_gy, 6); - - gx2 = vfmaq_f16(gx2, gx, gx); - gy2 = vfmaq_f16(gy2, gy, gy); - gxgy = vfmaq_f16(gxgy, gx, gy); - } -} - -template -inline void harris_score_S16_S16_FLOAT(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity, - float strength_thresh) -{ - auto gx_ptr_0 = static_cast(in1_ptr) - (block_size / 2) * (in_stride + 1); - auto gy_ptr_0 = static_cast(in2_ptr) - (block_size / 2) * (in_stride + 1); - const int16_t *gx_ptr_1 = gx_ptr_0 + 8; - const int16_t *gy_ptr_1 = gy_ptr_0 + 8; - const auto output = static_cast(out_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float16x8_t gx2 = vdupq_n_f16(0.0f); - float16x8_t gy2 = vdupq_n_f16(0.0f); - float16x8_t gxgy = vdupq_n_f16(0.0f); - - for(size_t i = 0; i < block_size; ++i) - { - const float16x8_t low_gx = vcvtq_f16_s16(vld1q_s16(gx_ptr_0)); - const float16x8_t high_gx = vcvtq_f16_s16(vld1q_s16(gx_ptr_1)); - const float16x8_t low_gy = vcvtq_f16_s16(vld1q_s16(gy_ptr_0)); - const float16x8_t high_gy = vcvtq_f16_s16(vld1q_s16(gy_ptr_1)); - harris_score1xN_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += in_stride; - gy_ptr_0 += in_stride; - gx_ptr_1 += in_stride; - gy_ptr_1 += in_stride; - } - - // Calculate harris score - const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh); - - // Store score - vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc))); - vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc))); -} - -template -inline void harris_score_S32_S32_FLOAT(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity, - float strength_thresh) -{ - static const float16x8_t zero = vdupq_n_f16(0.0f); - - auto gx_ptr_0 = static_cast(in1_ptr) - (block_size / 2) * (in_stride + 1); - auto gy_ptr_0 = static_cast(in2_ptr) - (block_size / 2) * (in_stride + 1); - const int32_t *gx_ptr_1 = gx_ptr_0 + 4; - const int32_t *gy_ptr_1 = gy_ptr_0 + 4; - const int32_t *gx_ptr_2 = gx_ptr_0 + 8; - const int32_t *gy_ptr_2 = gy_ptr_0 + 8; - const auto output = static_cast(out_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float16x8_t gx2 = zero; - float16x8_t gy2 = zero; - float16x8_t gxgy = zero; - - for(size_t i = 0; i < block_size; ++i) - { - const float16x8_t low_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_0))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_1)))); - const float16x8_t high_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_2))), - vget_low_f16(zero)); - const float16x8_t low_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_0))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_1)))); - const float16x8_t high_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_2))), - vget_low_f16(zero)); - harris_score1xN_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += in_stride; - gy_ptr_0 += in_stride; - gx_ptr_1 += in_stride; - gy_ptr_1 += in_stride; - gx_ptr_2 += in_stride; - gy_ptr_2 += in_stride; - } - - // Calculate harris score - const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh); - - // Store score - vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc))); - vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc))); -} - -template <> -inline void harris_score_S32_S32_FLOAT<7>(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity, - float strength_thresh) -{ - static const float16x8_t zero = vdupq_n_f16(0.0f); - - auto gx_ptr_0 = static_cast(in1_ptr) - 3 * (in_stride + 1); - auto gy_ptr_0 = static_cast(in2_ptr) - 3 * (in_stride + 1); - const int32_t *gx_ptr_1 = gx_ptr_0 + 4; - const int32_t *gy_ptr_1 = gy_ptr_0 + 4; - const int32_t *gx_ptr_2 = gx_ptr_0 + 8; - const int32_t *gy_ptr_2 = gy_ptr_0 + 8; - const int32_t *gx_ptr_3 = gx_ptr_0 + 12; - const int32_t *gy_ptr_3 = gy_ptr_0 + 12; - const auto output = static_cast(out_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float16x8_t gx2 = zero; - float16x8_t gy2 = zero; - float16x8_t gxgy = zero; - - for(size_t i = 0; i < 7; ++i) - { - const float16x8_t low_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_0))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_1)))); - const float16x8_t high_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_2))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_3)))); - const float16x8_t low_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_0))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_1)))); - const float16x8_t high_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_2))), - vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_3)))); - harris_score1xN_FLOAT_FLOAT_FLOAT<7>(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += in_stride; - gy_ptr_0 += in_stride; - gx_ptr_1 += in_stride; - gy_ptr_1 += in_stride; - gx_ptr_2 += in_stride; - gy_ptr_2 += in_stride; - } - - // Calculate harris score - const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh); - - // Store score - vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc))); - vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc))); -} - -} // namespace fp16 - -template -BorderSize NEHarrisScoreFP16Kernel::border_size() const -{ - return _border_size; -} - -template -NEHarrisScoreFP16Kernel::NEHarrisScoreFP16Kernel() - : INEHarrisScoreKernel(), _func(nullptr) -{ -} - -template -void NEHarrisScoreFP16Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - Iterator input1(_input1, window); - Iterator input2(_input2, window); - Iterator output(_output, window); - - const size_t input_stride = _input1->info()->strides_in_bytes()[1] / element_size_from_data_type(_input1->info()->data_type()); - - execute_window_loop(window, [&](const Coordinates & id) - { - (*_func)(input1.ptr(), input2.ptr(), output.ptr(), input_stride, _norm_factor, _sensitivity, _strength_thresh); - }, - input1, input2, output); -} - -template -void NEHarrisScoreFP16Kernel::configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON(0.0f == norm_factor); - - _input1 = input1; - _input2 = input2; - _output = output; - _sensitivity = sensitivity; - _strength_thresh = strength_thresh; - _norm_factor = norm_factor; - _border_size = BorderSize(block_size / 2); - - if(input1->info()->data_type() == DataType::S16) - { - _func = &fp16::harris_score_S16_S16_FLOAT; - } - else - { - _func = &fp16::harris_score_S32_S32_FLOAT; - } - - ARM_COMPUTE_ERROR_ON(nullptr == _func); - - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = block_size; - - // Configure kernel window - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input1->info(), -_border_size.left, -_border_size.top, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input2->info(), -_border_size.left, -_border_size.top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - - output_access.set_valid_region(win, valid_region, border_undefined, border_size()); - - INEKernel::configure(win); -} - -template class arm_compute::NEHarrisScoreFP16Kernel<3>; -template class arm_compute::NEHarrisScoreFP16Kernel<5>; -template class arm_compute::NEHarrisScoreFP16Kernel<7>; - -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - template class arm_compute::NEHarrisScoreKernel<3>; template class arm_compute::NEHarrisScoreKernel<5>; template class arm_compute::NEHarrisScoreKernel<7>; diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp index d72c98bb50..0e5d50fd21 100644 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -58,8 +58,7 @@ NECannyEdge::NECannyEdge(std::shared_ptr memory_manager) // NOLI { } -void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value, - bool use_fp16) +void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -127,18 +126,9 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, _memory_group.manage(&_phase); // Configure gradient - if(use_fp16) - { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); - _gradient = std::move(k); - } - else - { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); - _gradient = std::move(k); - } + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); + _gradient = std::move(k); // Allocate intermediate tensors _gx.allocator()->allocate(); diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp index 25e28d2213..db5e9265bb 100644 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,7 +61,7 @@ NEHarrisCorners::NEHarrisCorners(std::shared_ptr memory_manager) void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, float sensitivity, int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) + BorderMode border_mode, uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); @@ -126,62 +126,31 @@ void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, // Manage intermediate buffers _memory_group.manage(&_score); - if(use_fp16) + // Set/init Harris Score kernel accordingly with block_size + switch(block_size) { - switch(block_size) + case 3: { - case 3: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 5: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 7: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - default: - break; + auto k = arm_compute::support::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); } - } - else - { - // Set/init Harris Score kernel accordingly with block_size - switch(block_size) + break; + case 5: { - case 3: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 5: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 7: - { - auto k = arm_compute::support::cpp14::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - default: - break; + auto k = arm_compute::support::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); } + break; + case 7: + { + auto k = arm_compute::support::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + default: + break; } // Configure border filling before harris score diff --git a/tests/benchmark/CL/CannyEdge.cpp b/tests/benchmark/CL/CannyEdge.cpp index 7e327f3df3..8cc918f4c8 100644 --- a/tests/benchmark/CL/CannyEdge.cpp +++ b/tests/benchmark/CL/CannyEdge.cpp @@ -42,10 +42,9 @@ namespace { // *INDENT-OFF* // clang-format off -const auto use_fp16 = framework::dataset::make("UseFP16", { false }); const auto canny_edge_dataset = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), - combine(datasets::BorderModes(), use_fp16))); + datasets::BorderModes())); } // namespace using CLCannyEdgeFixture = CannyEdgeFixture; diff --git a/tests/benchmark/CL/HarrisCorners.cpp b/tests/benchmark/CL/HarrisCorners.cpp index 990ac24894..dbaf95c98d 100644 --- a/tests/benchmark/CL/HarrisCorners.cpp +++ b/tests/benchmark/CL/HarrisCorners.cpp @@ -54,25 +54,23 @@ using CLHarrisCornersFixture = HarrisCornersFixture; diff --git a/tests/benchmark/NEON/HarrisCorners.cpp b/tests/benchmark/NEON/HarrisCorners.cpp index 33315ffcd0..75a6794cc5 100644 --- a/tests/benchmark/NEON/HarrisCorners.cpp +++ b/tests/benchmark/NEON/HarrisCorners.cpp @@ -53,49 +53,23 @@ using NEHarrisCornersFixture = HarrisCornersFixture - void setup(std::string image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, bool use_fp16, Format format) + void setup(std::string image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format) { // Load the image (cached by the library if loaded before) const RawTensor &raw = library->get(image, format); @@ -52,7 +52,7 @@ public: src = create_tensor(raw.shape(), format); dst = create_tensor(raw.shape(), format); - configure_target(canny_edge_func, src, dst, gradient_size, static_cast(norm_type) + 1, border_mode, use_fp16); + canny_edge_func.configure(&src, &dst, upper_thresh, lower_thresh, gradient_size, static_cast(norm_type) + 1, border_mode, constant_border_value); // Allocate tensors src.allocator()->allocate(); @@ -72,22 +72,6 @@ public: sync_tensor_if_necessary(dst); } -protected: - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, TensorType &dst, int gradient_size, int norm_type, BorderMode border_mode, bool use_fp16) - { - func.configure(&src, &dst, upper_thresh, lower_thresh, gradient_size, norm_type, border_mode, constant_border_value, use_fp16); - } - - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, TensorType &dst, int gradient_size, int norm_type, BorderMode border_mode, bool use_fp16) - { - ARM_COMPUTE_UNUSED(use_fp16); - ARM_COMPUTE_ERROR_ON(use_fp16); - - func.configure(&src, &dst, upper_thresh, lower_thresh, gradient_size, norm_type, border_mode, constant_border_value); - } - private: static const int32_t lower_thresh = 0; static const int32_t upper_thresh = 255; diff --git a/tests/benchmark/fixtures/HarrisCornersFixture.h b/tests/benchmark/fixtures/HarrisCornersFixture.h index c7ce683ab5..29d1741156 100644 --- a/tests/benchmark/fixtures/HarrisCornersFixture.h +++ b/tests/benchmark/fixtures/HarrisCornersFixture.h @@ -42,8 +42,7 @@ class HarrisCornersFixture : public framework::Fixture public: template void setup(std::string image, Format format, float threshold, float min_dist, float sensitivity, - int32_t gradient_size, int32_t block_size, - BorderMode border_mode, bool use_fp16) + int32_t gradient_size, int32_t block_size, BorderMode border_mode) { // Load the image (cached by the library if loaded before) const RawTensor &raw = library->get(image, format); @@ -52,7 +51,7 @@ public: src = create_tensor(raw.shape(), format); // Create and configure function - harris_corners_func.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &out, border_mode, 0, use_fp16); + harris_corners_func.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &out, border_mode, 0); // Allocate tensor src.allocator()->allocate(); diff --git a/tests/validation/CL/CannyEdge.cpp b/tests/validation/CL/CannyEdge.cpp index 7aa178adba..d130aa4bd1 100644 --- a/tests/validation/CL/CannyEdge.cpp +++ b/tests/validation/CL/CannyEdge.cpp @@ -48,21 +48,16 @@ namespace /* Allowed ratio of mismatches between target and reference (1.0 = 100%) */ const float allowed_mismatch_ratio = 0.1f; -const auto use_fp16 = framework::dataset::make("UseFP16", { false }); - const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), - combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), combine(datasets::BorderModes(), use_fp16))); + combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes())); } // namespace TEST_SUITE(CL) TEST_SUITE(CannyEdge) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), data), framework::dataset::make("Format", Format::U8)), - shape, gradient_size, normalization, border_mode, use_fp16, format) + shape, gradient_size, normalization, border_mode, format) { - ARM_COMPUTE_UNUSED(use_fp16); - ARM_COMPUTE_ERROR_ON(use_fp16); - CannyEdgeParameters params = canny_edge_parameters(); // Convert normalisation type to integer const auto norm_type = static_cast(normalization) + 1; diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp index 890367c166..ccc9293fef 100644 --- a/tests/validation/CL/HarrisCorners.cpp +++ b/tests/validation/CL/HarrisCorners.cpp @@ -46,21 +46,15 @@ namespace validation { namespace { -const auto use_fp16 = framework::dataset::make("UseFP16", -{ false }); - -const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), combine(datasets::BorderModes(), use_fp16))); +const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), datasets::BorderModes())); } // namespace TEST_SUITE(CL) TEST_SUITE(HarrisCorners) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), data), framework::dataset::make("Format", Format::U8)), shape, - gradient_size, block_size, border_mode, use_fp16, format) + gradient_size, block_size, border_mode, format) { - ARM_COMPUTE_UNUSED(use_fp16); - ARM_COMPUTE_ERROR_ON(use_fp16); - std::mt19937 gen(library->seed()); std::uniform_real_distribution real_dist(0.f, 0.01f); diff --git a/tests/validation/NEON/CannyEdge.cpp b/tests/validation/NEON/CannyEdge.cpp index 5697b622f2..7c4cd8090f 100644 --- a/tests/validation/NEON/CannyEdge.cpp +++ b/tests/validation/NEON/CannyEdge.cpp @@ -48,23 +48,15 @@ namespace /* Allowed ratio of mismatches between target and reference (1.0 = 100%) */ const float allowed_mismatch_ratio = 0.1f; -const auto use_fp16 = framework::dataset::make("UseFP16", -{ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - true, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - false -}); - const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), - combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), combine(datasets::BorderModes(), use_fp16))); + combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes())); } // namespace TEST_SUITE(NEON) TEST_SUITE(CannyEdge) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), data), framework::dataset::make("Format", Format::U8)), - shape, gradient_size, normalization, border_mode, use_fp16, format) + shape, gradient_size, normalization, border_mode, format) { CannyEdgeParameters params = canny_edge_parameters(); // Convert normalisation type to integer @@ -81,7 +73,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(conca // Create Canny edge configure function NECannyEdge canny_edge; - canny_edge.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, norm_type, border_mode, params.constant_border_value, use_fp16); + canny_edge.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, norm_type, border_mode, params.constant_border_value); // Validate valid region validate(src.info()->valid_region(), shape_to_valid_region(shape, (BorderMode::UNDEFINED == border_mode))); diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp index 3474a96f8f..e0bbd92a2e 100644 --- a/tests/validation/NEON/HarrisCorners.cpp +++ b/tests/validation/NEON/HarrisCorners.cpp @@ -50,22 +50,14 @@ const float allowed_missing_percentage = 10.f; /* Allowed percentage of keypoints mismatching between target and reference */ const float allowed_mismatch_percentage = 10.f; -const auto use_fp16 = framework::dataset::make("UseFP16", -{ -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - true, -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - false -}); - -const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), combine(datasets::BorderModes(), use_fp16))); +const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), datasets::BorderModes())); } // namespace TEST_SUITE(NEON) TEST_SUITE(HarrisCorners) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), data), framework::dataset::make("Format", Format::U8)), shape, - gradient_size, block_size, border_mode, use_fp16, format) + gradient_size, block_size, border_mode, format) { std::mt19937 gen(library->seed()); std::uniform_real_distribution real_dist(0.f, 0.01f); @@ -90,7 +82,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(conca // Create harris corners configure function NEHarrisCorners harris_corners; - harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &corners, border_mode, constant_border_value, use_fp16); + harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &corners, border_mode, constant_border_value); // Validate padding PaddingCalculator calculator(shape.x(), 8); diff --git a/tests/validation/fixtures/CannyEdgeFixture.h b/tests/validation/fixtures/CannyEdgeFixture.h index 0f37c46641..d52b17e550 100644 --- a/tests/validation/fixtures/CannyEdgeFixture.h +++ b/tests/validation/fixtures/CannyEdgeFixture.h @@ -47,12 +47,11 @@ class CannyEdgeValidationFixture : public framework::Fixture { public: template - void setup(std::string image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, bool use_fp16, Format format) + void setup(std::string image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format) { CannyEdgeParameters params = canny_edge_parameters(); - _target = compute_target(image, gradient_size, norm_type, border_mode, use_fp16, format, params); - //TODO(COMPMID-543): Add use_fp16 to reference + _target = compute_target(image, gradient_size, norm_type, border_mode, format, params); _reference = compute_reference(image, gradient_size, norm_type, border_mode, format, params); } @@ -63,21 +62,7 @@ protected: library->fill(tensor, raw); } - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, TensorType &dst, int gradient_size, int norm_type, BorderMode border_mode, bool use_fp16, const CannyEdgeParameters ¶ms) - { - func.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, norm_type, border_mode, params.constant_border_value, use_fp16); - } - - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, TensorType &dst, int gradient_size, int norm_type, BorderMode border_mode, bool use_fp16, const CannyEdgeParameters ¶ms) - { - ARM_COMPUTE_UNUSED(use_fp16); - ARM_COMPUTE_ERROR_ON(use_fp16); - func.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, norm_type, border_mode, params.constant_border_value); - } - - TensorType compute_target(const std::string &image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, bool use_fp16, Format format, const CannyEdgeParameters ¶ms) + TensorType compute_target(const std::string &image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format, const CannyEdgeParameters ¶ms) { // Load the image (cached by the library if loaded before) const RawTensor &raw = library->get(image, format); @@ -90,7 +75,7 @@ protected: // Create Canny edge configure function FunctionType canny_edge; - configure_target(canny_edge, src, dst, gradient_size, static_cast(norm_type) + 1, border_mode, use_fp16, params); + canny_edge.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, static_cast(norm_type) + 1, border_mode, params.constant_border_value); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); diff --git a/tests/validation/fixtures/HarrisCornersFixture.h b/tests/validation/fixtures/HarrisCornersFixture.h index 1c30157344..f1d1f2d135 100644 --- a/tests/validation/fixtures/HarrisCornersFixture.h +++ b/tests/validation/fixtures/HarrisCornersFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,12 +47,11 @@ class HarrisCornersValidationFixture : public framework::Fixture { public: template - void setup(std::string image, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format) + void setup(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format) { HarrisCornersParameters params = harris_corners_parameters(); - _target = compute_target(image, gradient_size, block_size, border_mode, use_fp16, format, params); - //TODO(COMPMID-543): Add use_fp16 to reference + _target = compute_target(image, gradient_size, block_size, border_mode, format, params); _reference = compute_reference(image, gradient_size, block_size, border_mode, format, params); } @@ -63,21 +62,7 @@ protected: library->fill(tensor, raw); } - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, ArrayType &corners, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, const HarrisCornersParameters ¶ms) - { - func.configure(&src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, &corners, border_mode, params.constant_border_value, use_fp16); - } - - template ::value, int>::type = 0> - void configure_target(F &func, TensorType &src, ArrayType &corners, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, const HarrisCornersParameters ¶ms) - { - ARM_COMPUTE_UNUSED(use_fp16); - ARM_COMPUTE_ERROR_ON(use_fp16); - func.configure(&src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, &corners, border_mode, params.constant_border_value); - } - - ArrayType compute_target(std::string image, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format, const HarrisCornersParameters ¶ms) + ArrayType compute_target(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format, const HarrisCornersParameters ¶ms) { // Load the image (cached by the library if loaded before) const RawTensor &raw = library->get(image, format); @@ -90,7 +75,7 @@ protected: // Create harris corners configure function FunctionType harris_corners; - configure_target(harris_corners, src, corners, gradient_size, block_size, border_mode, use_fp16, params); + harris_corners.configure(&src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, &corners, border_mode, params.constant_border_value); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); diff --git a/tests/validation/reference/CannyEdgeDetector.cpp b/tests/validation/reference/CannyEdgeDetector.cpp index cfe8ae8100..d50452bfe8 100644 --- a/tests/validation/reference/CannyEdgeDetector.cpp +++ b/tests/validation/reference/CannyEdgeDetector.cpp @@ -231,7 +231,8 @@ SimpleTensor canny_edge_detector_impl(const SimpleTensor &src, int32_t upp } // namespace template -SimpleTensor canny_edge_detector(const SimpleTensor &src, int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, +SimpleTensor canny_edge_detector(const SimpleTensor &src, + int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, T constant_border_value) { if(gradient_size < 7) @@ -244,7 +245,8 @@ SimpleTensor canny_edge_detector(const SimpleTensor &src, int32_t upper_th } } -template SimpleTensor canny_edge_detector(const SimpleTensor &src, int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, +template SimpleTensor canny_edge_detector(const SimpleTensor &src, + int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, uint8_t constant_border_value); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/CannyEdgeDetector.h b/tests/validation/reference/CannyEdgeDetector.h index a46c145153..ee6199d96e 100644 --- a/tests/validation/reference/CannyEdgeDetector.h +++ b/tests/validation/reference/CannyEdgeDetector.h @@ -36,7 +36,8 @@ namespace validation namespace reference { template -SimpleTensor canny_edge_detector(const SimpleTensor &src, int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, +SimpleTensor canny_edge_detector(const SimpleTensor &src, + int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, T constant_border_value = 0); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/HarrisCornerDetector.h b/tests/validation/reference/HarrisCornerDetector.h index 042e8570c2..f208eaa743 100644 --- a/tests/validation/reference/HarrisCornerDetector.h +++ b/tests/validation/reference/HarrisCornerDetector.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,8 +38,9 @@ namespace validation namespace reference { template -std::vector harris_corner_detector(const SimpleTensor &src, float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, BorderMode border_mode, - T constant_border_value = 0); +std::vector harris_corner_detector(const SimpleTensor &src, + float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, + BorderMode border_mode, T constant_border_value = 0); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1