From 1c29ffc7fe02de68cf2e82709a3bc3e210cb0ba4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 1 Aug 2019 15:03:00 +0100 Subject: COMPMID-2336: Fix build issues. Change-Id: I0932dc9ca4649f0825950ed9d6d249212bc6971e Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1671 Tested-by: Arm Jenkins --- .../CPP/kernels/CPPNonMaximumSuppressionKernel.cpp | 2 +- .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 4 ++- .../kernels/convolution/depthwise/impl_qa8_qa8.hpp | 11 +++++++ support/ToolchainSupport.h | 34 ++++++++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp index fb38bdcf94..f7edf8edd0 100644 --- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp +++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp @@ -116,7 +116,7 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo _scores_above_thd_vector.emplace_back(score_i); // Initialize respective index and visited _sorted_indices.emplace_back(num_above_thd); - _visited.emplace_back(false); + _visited.push_back(false); ++num_above_thd; } } diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index aafdb2e8a4..c9d4e9be50 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -28,6 +28,8 @@ #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "support/ToolchainSupport.h" + namespace arm_compute { namespace @@ -160,7 +162,7 @@ void depthwise_loop_generic(const ITensor *input, const ITensor *weights, const for(size_t m = 0; m < depth_multiplier; ++m) { const auto weights_val = *(reinterpret_cast(weights_ptr + m * sizeof(T) + w * weights_stride_y)); - acc.at(m) = std::fma(weights_val, input_val, acc.at(m)); + acc.at(m) = support::cpp11::fma(weights_val, input_val, acc.at(m)); } offs += dilation.x() * input_stride_y; diff --git a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp index f638f0bb38..e8f44b6bfd 100644 --- a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp +++ b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp @@ -373,12 +373,23 @@ static inline void tilefn( final_accs[i] = vminq_s32(final_accs[i], vdupq_n_s32(clamp_max)); } +#ifndef __aarch64__ + const int16x8x2_t zelems = vuzpq_s16(vreinterpretq_s16_s32(final_accs[0]), + vreinterpretq_s16_s32(final_accs[1])); + const int8x16_t elems = vreinterpretq_s8_s16(zelems.val[0]); + + const int8x16x2_t zoutput = vuzpq_s8(elems, elems); + const uint8x8_t output = + vget_low_u8(vreinterpretq_u8_s8(zoutput.val[0])); + vst1_u8(get_output_ptr(oi, oj, channel), output); +#else const int8x16_t elems = vreinterpretq_s8_s16( vuzp1q_s16(vreinterpretq_s16_s32(final_accs[0]), vreinterpretq_s16_s32(final_accs[1]))); const uint8x8_t output = vget_low_u8(vreinterpretq_u8_s8(vuzp1q_s8(elems, elems))); vst1_u8(get_output_ptr(oi, oj, channel), output); +#endif // __aarch64__ } } } diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h index 020a4a112b..03bbff9aba 100644 --- a/support/ToolchainSupport.h +++ b/support/ToolchainSupport.h @@ -195,6 +195,23 @@ inline T copysign(T x, T y) return ::copysign(x, y); } +/** Computes (x*y) + z as if to infinite precision and rounded only once to fit the result type. + * + * @note This function implements the same behaviour as std::fma except that it doesn't + * support Integral type. The latter is not in the namespace std in some Android toolchains. + * + * @param[in] x floating-point value + * @param[in] y floating-point value + * @param[in] z floating-point value + * + * @return Result floating point value equal to (x*y) + z.c + */ +template ::value>::type> +inline T fma(T x, T y, T z) +{ + return ::fma(x, y, z); +} + /** Loads the data from the given location, converts them to character string equivalents * and writes the result to a character string buffer. * @@ -304,6 +321,23 @@ inline T copysign(T x, T y) return std::copysign(x, y); } +/** Computes (x*y) + z as if to infinite precision and rounded only once to fit the result type. + * + * @note This function implements the same behaviour as std::fma except that it doesn't + * support Integral type. The latter is not in the namespace std in some Android toolchains. + * + * @param[in] x floating-point value + * @param[in] y floating-point value + * @param[in] z floating-point value + * + * @return Result floating point value equal to (x*y) + z. + */ +template ::value>::type> +inline T fma(T x, T y, T z) +{ + return std::fma(x, y, z); +} + /** Loads the data from the given location, converts them to character string equivalents * and writes the result to a character string buffer. * -- cgit v1.2.1