COMPMID-1534 - Fixing FP16 tests on NEON

- Fixed GEMMConvolutionLayer test. The issue was related to the tolerance - Fixed DirectConvolutioNLayer test. The issue was in the convolver_3x3 Change-Id: I9d5b906d7e5e32a0a34300d529d6edb804ac1c4e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145377 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-08-23 10:25:06 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 41acb76af9c8512ac39121103b21ce2aafbcbfe8 (patch)
tree: 788106d83c95c88954698a3f7d25d02db1cfe024 /arm_compute/core/NEON
parent: 02baf01d75dc639440cf6a3196162f02413661dc (diff)
download: ComputeLibrary-41acb76af9c8512ac39121103b21ce2aafbcbfe8.tar.gz
1 files changed, 8 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
index d56fd44700..b245505ac6 100644
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
+++ b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
@@ -460,8 +460,12 @@ inline float16x8x2_t convolve_3x3<2>(const float16_t *in_top, const float16_t *i
 {
     float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
     out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 2), out.val[0], 1);
-    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 2);
-    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 3);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 4), out.val[0], 2);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 3);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 4);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 5);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 4), out.val[0], 6);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 6), out.val[0], 7);
     return out;
 }
 
@@ -470,6 +474,8 @@ inline float16x8x2_t convolve_3x3<3>(const float16_t *in_top, const float16_t *i
 {
     float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
     out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2);
+    out.val[0]        = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3);
     return out;
 }
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-08-23 10:25:06 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	41acb76af9c8512ac39121103b21ce2aafbcbfe8 (patch)
tree	788106d83c95c88954698a3f7d25d02db1cfe024 /arm_compute/core/NEON
parent	02baf01d75dc639440cf6a3196162f02413661dc (diff)
download	ComputeLibrary-41acb76af9c8512ac39121103b21ce2aafbcbfe8.tar.gz