aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFreddie Liardet <frederick.liardet@arm.com>2021-11-04 12:13:03 +0000
committerFreddie Liardet <frederick.liardet@arm.com>2021-11-04 14:07:35 +0000
commitcc859158faff318a81dd9aafcc5b609d7dd5d4a9 (patch)
tree0bce675e255bd2cca2f3581d076620a8ea80f345
parentd928735fee6baefdb74325c05d8152dd13044f32 (diff)
downloadComputeLibrary-cc859158faff318a81dd9aafcc5b609d7dd5d4a9.tar.gz
Fix cpu conv3d quantized mismatch
Resolves: COMPMID-4927 Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Change-Id: I265bf2b08598c6dec6303e3427bb9ba3f818cf03 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6578 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/cpu/kernels/conv3d/neon/quantized.h12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/cpu/kernels/conv3d/neon/quantized.h b/src/cpu/kernels/conv3d/neon/quantized.h
index 2958cd61d4..cdbc45e207 100644
--- a/src/cpu/kernels/conv3d/neon/quantized.h
+++ b/src/cpu/kernels/conv3d/neon/quantized.h
@@ -186,18 +186,18 @@ void directconv3d_quantized_neon_ndhwc(const ITensor *src0, const ITensor *src1,
q32x4_t wei_q32_3 = wrapper::vdup_n(static_cast<q32_t>(weights_offset), tag_type());
const auto src_q16_0 = wrapper::vmovl(wrapper::vgetlow(src_vec));
- const auto src_q16_1 = wrapper::vmovl(wrapper::vgetlow(src_vec));
+ const auto src_q16_1 = wrapper::vmovl(wrapper::vgethigh(src_vec));
const auto wei_q16_0 = wrapper::vmovl(wrapper::vgetlow(w_vec));
- const auto wei_q16_1 = wrapper::vmovl(wrapper::vgetlow(w_vec));
+ const auto wei_q16_1 = wrapper::vmovl(wrapper::vgethigh(w_vec));
src_q32_0 = wrapper::vadd(src_q32_0, wrapper::vmovl(wrapper::vgetlow(src_q16_0)));
- src_q32_1 = wrapper::vadd(src_q32_1, wrapper::vmovl(wrapper::vgetlow(src_q16_0)));
- src_q32_2 = wrapper::vadd(src_q32_2, wrapper::vmovl(wrapper::vgethigh(src_q16_1)));
+ src_q32_1 = wrapper::vadd(src_q32_1, wrapper::vmovl(wrapper::vgethigh(src_q16_0)));
+ src_q32_2 = wrapper::vadd(src_q32_2, wrapper::vmovl(wrapper::vgetlow(src_q16_1)));
src_q32_3 = wrapper::vadd(src_q32_3, wrapper::vmovl(wrapper::vgethigh(src_q16_1)));
wei_q32_0 = wrapper::vadd(wei_q32_0, wrapper::vmovl(wrapper::vgetlow(wei_q16_0)));
- wei_q32_1 = wrapper::vadd(wei_q32_1, wrapper::vmovl(wrapper::vgetlow(wei_q16_0)));
- wei_q32_2 = wrapper::vadd(wei_q32_2, wrapper::vmovl(wrapper::vgethigh(wei_q16_1)));
+ wei_q32_1 = wrapper::vadd(wei_q32_1, wrapper::vmovl(wrapper::vgethigh(wei_q16_0)));
+ wei_q32_2 = wrapper::vadd(wei_q32_2, wrapper::vmovl(wrapper::vgetlow(wei_q16_1)));
wei_q32_3 = wrapper::vadd(wei_q32_3, wrapper::vmovl(wrapper::vgethigh(wei_q16_1)));
acc_q32_0 = wrapper::vmla(acc_q32_0, wei_q32_0, src_q32_0);