aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-08-01 15:03:00 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-08-01 18:15:42 +0000
commit1c29ffc7fe02de68cf2e82709a3bc3e210cb0ba4 (patch)
treeeeeb02221fe972141f54c01d33e6ed57eb0f59df
parent169cda3793ce4900b2bf103739f04bb83b1b6aae (diff)
downloadComputeLibrary-1c29ffc7fe02de68cf2e82709a3bc3e210cb0ba4.tar.gz
COMPMID-2336: Fix build issues.
Change-Id: I0932dc9ca4649f0825950ed9d6d249212bc6971e Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1671 Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp4
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp11
-rw-r--r--support/ToolchainSupport.h34
4 files changed, 49 insertions, 2 deletions
diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
index fb38bdcf94..f7edf8edd0 100644
--- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
+++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
@@ -116,7 +116,7 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
_scores_above_thd_vector.emplace_back(score_i);
// Initialize respective index and visited
_sorted_indices.emplace_back(num_above_thd);
- _visited.emplace_back(false);
+ _visited.push_back(false);
++num_above_thd;
}
}
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
index aafdb2e8a4..c9d4e9be50 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
@@ -28,6 +28,8 @@
#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "support/ToolchainSupport.h"
+
namespace arm_compute
{
namespace
@@ -160,7 +162,7 @@ void depthwise_loop_generic(const ITensor *input, const ITensor *weights, const
for(size_t m = 0; m < depth_multiplier; ++m)
{
const auto weights_val = *(reinterpret_cast<T *>(weights_ptr + m * sizeof(T) + w * weights_stride_y));
- acc.at(m) = std::fma(weights_val, input_val, acc.at(m));
+ acc.at(m) = support::cpp11::fma(weights_val, input_val, acc.at(m));
}
offs += dilation.x() * input_stride_y;
diff --git a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp
index f638f0bb38..e8f44b6bfd 100644
--- a/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp
+++ b/src/core/NEON/kernels/convolution/depthwise/impl_qa8_qa8.hpp
@@ -373,12 +373,23 @@ static inline void tilefn(
final_accs[i] = vminq_s32(final_accs[i], vdupq_n_s32(clamp_max));
}
+#ifndef __aarch64__
+ const int16x8x2_t zelems = vuzpq_s16(vreinterpretq_s16_s32(final_accs[0]),
+ vreinterpretq_s16_s32(final_accs[1]));
+ const int8x16_t elems = vreinterpretq_s8_s16(zelems.val[0]);
+
+ const int8x16x2_t zoutput = vuzpq_s8(elems, elems);
+ const uint8x8_t output =
+ vget_low_u8(vreinterpretq_u8_s8(zoutput.val[0]));
+ vst1_u8(get_output_ptr(oi, oj, channel), output);
+#else
const int8x16_t elems = vreinterpretq_s8_s16(
vuzp1q_s16(vreinterpretq_s16_s32(final_accs[0]),
vreinterpretq_s16_s32(final_accs[1])));
const uint8x8_t output =
vget_low_u8(vreinterpretq_u8_s8(vuzp1q_s8(elems, elems)));
vst1_u8(get_output_ptr(oi, oj, channel), output);
+#endif // __aarch64__
}
}
}
diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h
index 020a4a112b..03bbff9aba 100644
--- a/support/ToolchainSupport.h
+++ b/support/ToolchainSupport.h
@@ -195,6 +195,23 @@ inline T copysign(T x, T y)
return ::copysign(x, y);
}
+/** Computes (x*y) + z as if to infinite precision and rounded only once to fit the result type.
+ *
+ * @note This function implements the same behaviour as std::fma except that it doesn't
+ * support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] x floating-point value
+ * @param[in] y floating-point value
+ * @param[in] z floating-point value
+ *
+ * @return Result floating point value equal to (x*y) + z.c
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T fma(T x, T y, T z)
+{
+ return ::fma(x, y, z);
+}
+
/** Loads the data from the given location, converts them to character string equivalents
* and writes the result to a character string buffer.
*
@@ -304,6 +321,23 @@ inline T copysign(T x, T y)
return std::copysign(x, y);
}
+/** Computes (x*y) + z as if to infinite precision and rounded only once to fit the result type.
+ *
+ * @note This function implements the same behaviour as std::fma except that it doesn't
+ * support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] x floating-point value
+ * @param[in] y floating-point value
+ * @param[in] z floating-point value
+ *
+ * @return Result floating point value equal to (x*y) + z.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T fma(T x, T y, T z)
+{
+ return std::fma(x, y, z);
+}
+
/** Loads the data from the given location, converts them to character string equivalents
* and writes the result to a character string buffer.
*