aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-08-23 10:25:06 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit41acb76af9c8512ac39121103b21ce2aafbcbfe8 (patch)
tree788106d83c95c88954698a3f7d25d02db1cfe024 /arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
parent02baf01d75dc639440cf6a3196162f02413661dc (diff)
downloadComputeLibrary-41acb76af9c8512ac39121103b21ce2aafbcbfe8.tar.gz
COMPMID-1534 - Fixing FP16 tests on NEON
- Fixed GEMMConvolutionLayer test. The issue was related to the tolerance - Fixed DirectConvolutioNLayer test. The issue was in the convolver_3x3 Change-Id: I9d5b906d7e5e32a0a34300d529d6edb804ac1c4e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145377 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h')
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
index d56fd44700..b245505ac6 100644
--- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
+++ b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h
@@ -460,8 +460,12 @@ inline float16x8x2_t convolve_3x3<2>(const float16_t *in_top, const float16_t *i
{
float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 2), out.val[0], 1);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 2);
- out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 3);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 4), out.val[0], 2);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 3);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 4);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 5);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 4), out.val[0], 6);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 6), out.val[0], 7);
return out;
}
@@ -470,6 +474,8 @@ inline float16x8x2_t convolve_3x3<3>(const float16_t *in_top, const float16_t *i
{
float16x8x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2);
out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2);
+ out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3);
return out;
}