From cc859158faff318a81dd9aafcc5b609d7dd5d4a9 Mon Sep 17 00:00:00 2001 From: Freddie Liardet Date: Thu, 4 Nov 2021 12:13:03 +0000 Subject: Fix cpu conv3d quantized mismatch Resolves: COMPMID-4927 Signed-off-by: Freddie Liardet Change-Id: I265bf2b08598c6dec6303e3427bb9ba3f818cf03 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6578 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/cpu/kernels/conv3d/neon/quantized.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cpu/kernels/conv3d/neon/quantized.h b/src/cpu/kernels/conv3d/neon/quantized.h index 2958cd61d4..cdbc45e207 100644 --- a/src/cpu/kernels/conv3d/neon/quantized.h +++ b/src/cpu/kernels/conv3d/neon/quantized.h @@ -186,18 +186,18 @@ void directconv3d_quantized_neon_ndhwc(const ITensor *src0, const ITensor *src1, q32x4_t wei_q32_3 = wrapper::vdup_n(static_cast(weights_offset), tag_type()); const auto src_q16_0 = wrapper::vmovl(wrapper::vgetlow(src_vec)); - const auto src_q16_1 = wrapper::vmovl(wrapper::vgetlow(src_vec)); + const auto src_q16_1 = wrapper::vmovl(wrapper::vgethigh(src_vec)); const auto wei_q16_0 = wrapper::vmovl(wrapper::vgetlow(w_vec)); - const auto wei_q16_1 = wrapper::vmovl(wrapper::vgetlow(w_vec)); + const auto wei_q16_1 = wrapper::vmovl(wrapper::vgethigh(w_vec)); src_q32_0 = wrapper::vadd(src_q32_0, wrapper::vmovl(wrapper::vgetlow(src_q16_0))); - src_q32_1 = wrapper::vadd(src_q32_1, wrapper::vmovl(wrapper::vgetlow(src_q16_0))); - src_q32_2 = wrapper::vadd(src_q32_2, wrapper::vmovl(wrapper::vgethigh(src_q16_1))); + src_q32_1 = wrapper::vadd(src_q32_1, wrapper::vmovl(wrapper::vgethigh(src_q16_0))); + src_q32_2 = wrapper::vadd(src_q32_2, wrapper::vmovl(wrapper::vgetlow(src_q16_1))); src_q32_3 = wrapper::vadd(src_q32_3, wrapper::vmovl(wrapper::vgethigh(src_q16_1))); wei_q32_0 = wrapper::vadd(wei_q32_0, wrapper::vmovl(wrapper::vgetlow(wei_q16_0))); - wei_q32_1 = wrapper::vadd(wei_q32_1, wrapper::vmovl(wrapper::vgetlow(wei_q16_0))); - wei_q32_2 = wrapper::vadd(wei_q32_2, wrapper::vmovl(wrapper::vgethigh(wei_q16_1))); + wei_q32_1 = wrapper::vadd(wei_q32_1, wrapper::vmovl(wrapper::vgethigh(wei_q16_0))); + wei_q32_2 = wrapper::vadd(wei_q32_2, wrapper::vmovl(wrapper::vgetlow(wei_q16_1))); wei_q32_3 = wrapper::vadd(wei_q32_3, wrapper::vmovl(wrapper::vgethigh(wei_q16_1))); acc_q32_0 = wrapper::vmla(acc_q32_0, wei_q32_0, src_q32_0); -- cgit v1.2.1