From 1c29ffc7fe02de68cf2e82709a3bc3e210cb0ba4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 1 Aug 2019 15:03:00 +0100 Subject: COMPMID-2336: Fix build issues. Change-Id: I0932dc9ca4649f0825950ed9d6d249212bc6971e Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1671 Tested-by: Arm Jenkins --- src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp') diff --git a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp index f638f0bb38..e8f44b6bfd 100644 --- a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp +++ b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp @@ -373,12 +373,23 @@ static inline void tilefn( final_accs[i] = vminq_s32(final_accs[i], vdupq_n_s32(clamp_max)); } +#ifndef __aarch64__ + const int16x8x2_t zelems = vuzpq_s16(vreinterpretq_s16_s32(final_accs[0]), + vreinterpretq_s16_s32(final_accs[1])); + const int8x16_t elems = vreinterpretq_s8_s16(zelems.val[0]); + + const int8x16x2_t zoutput = vuzpq_s8(elems, elems); + const uint8x8_t output = + vget_low_u8(vreinterpretq_u8_s8(zoutput.val[0])); + vst1_u8(get_output_ptr(oi, oj, channel), output); +#else const int8x16_t elems = vreinterpretq_s8_s16( vuzp1q_s16(vreinterpretq_s16_s32(final_accs[0]), vreinterpretq_s16_s32(final_accs[1]))); const uint8x8_t output = vget_low_u8(vreinterpretq_u8_s8(vuzp1q_s8(elems, elems))); vst1_u8(get_output_ptr(oi, oj, channel), output); +#endif // __aarch64__ } } } -- cgit v1.2.1