aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIoan-Cristian Szabo <ioan-cristian.szabo@arm.com>2017-10-26 15:42:24 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit33fd07bd27be3cba183b7cacef63ea220c770c23 (patch)
tree0ccd4269992a90542697c85a0bd1c690872327b5
parenteae4ce085ed44c67de6d87eeba7726570ac23787 (diff)
downloadComputeLibrary-33fd07bd27be3cba183b7cacef63ea220c770c23.tar.gz
COMPMID-634: Enable clang with libc++ to compile for Android (32 and 64 bits)
Change-Id: I693f64e70cd478e93675a8b04360128ded3b60d4 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93015 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--SConstruct7
-rw-r--r--arm_compute/core/NEON/NEMath.h4
-rw-r--r--arm_compute/core/NEON/NEMath.inl4
-rw-r--r--arm_compute/core/NEON/kernels/NEAccumulateKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEActivationLayerKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NEBox3x3Kernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NECannyEdgeKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h4
-rw-r--r--docs/Doxyfile2
-rwxr-xr-xscripts/check_bad_style.sh6
-rwxr-xr-xscripts/clang_tidy_rules.py6
-rw-r--r--src/core/NEON/kernels/NEAccumulateKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEBox3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NECannyEdgeKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp24
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp28
-rw-r--r--src/core/NEON/kernels/NEHarrisCornersKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp16
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp24
-rw-r--r--src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp4
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowp.cpp4
-rw-r--r--support/ToolchainSupport.h14
-rw-r--r--tests/AssetsLibrary.cpp8
-rw-r--r--tests/benchmark/NEON/ActivationLayer.cpp6
-rw-r--r--tests/benchmark/NEON/BatchNormalizationLayer.cpp6
-rw-r--r--tests/benchmark/NEON/ConvolutionLayer.cpp6
-rw-r--r--tests/benchmark/NEON/FullyConnectedLayer.cpp6
-rw-r--r--tests/benchmark/NEON/GEMM.cpp4
-rw-r--r--tests/benchmark/NEON/NormalizationLayer.cpp6
-rw-r--r--tests/benchmark/NEON/PoolingLayer.cpp6
-rw-r--r--tests/benchmark/NEON/SYSTEM/AlexNet.cpp6
-rw-r--r--tests/validation/NEON/ActivationLayer.cpp8
-rw-r--r--tests/validation/NEON/ArithmeticAddition.cpp8
-rw-r--r--tests/validation/NEON/ArithmeticSubtraction.cpp8
-rw-r--r--tests/validation/NEON/BatchNormalizationLayer.cpp8
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp12
-rw-r--r--tests/validation/NEON/DepthConcatenateLayer.cpp4
-rw-r--r--tests/validation/NEON/DirectConvolutionLayer.cpp8
-rw-r--r--tests/validation/NEON/Flatten.cpp4
-rw-r--r--tests/validation/NEON/FullyConnectedLayer.cpp12
-rw-r--r--tests/validation/NEON/GEMM.cpp8
-rw-r--r--tests/validation/NEON/HarrisCorners.cpp4
-rw-r--r--tests/validation/NEON/NormalizationLayer.cpp8
-rw-r--r--tests/validation/NEON/PixelWiseMultiplication.cpp4
-rw-r--r--tests/validation/NEON/PoolingLayer.cpp8
-rw-r--r--tests/validation/NEON/SoftmaxLayer.cpp12
-rw-r--r--tests/validation/Validation.h5
63 files changed, 251 insertions, 227 deletions
diff --git a/SConstruct b/SConstruct
index 627f7f2076..c8fad8da0c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -126,8 +126,13 @@ elif env['arch'] == 'arm64-v8a':
elif env['os'] == 'android':
prefix = "aarch64-linux-android-"
elif env['arch'] == 'arm64-v8.2-a':
+ if os.environ.get('CXX', 'g++') == 'clang++':
+ print("Clang cannot compile armv8.2-a code")
+ Exit(1)
+
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd'])
- env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
+
if env['os'] == 'linux':
prefix = "aarch64-linux-gnu-"
elif env['os'] == 'bare_metal':
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 4c4085e544..1fc0d5c0b8 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -116,7 +116,7 @@ float32x4_t vtanhq_f32(float32x4_t val);
*/
float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** Calculate hyperbolic tangent.
*
* tanh(x) = (e^2x - 1)/(e^2x + 1)
@@ -179,7 +179,7 @@ float16x8_t vexpq_f16(float16x8_t x);
* @return The calculated power.
*/
float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
#include "arm_compute/core/NEON/NEMath.inl"
#endif /* __ARM_COMPUTE_NEMATH_H__ */
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index ebfc52d9a0..250114f4df 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -168,7 +168,7 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
{
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/* Exponent polynomial coefficients */
const std::array<float16x8_t, 8> exp_tab_f16 =
{
@@ -301,5 +301,5 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
{
return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
index ad8b02fbc4..d6ad0be03e 100644
--- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
@@ -80,7 +80,7 @@ protected:
float _alpha;
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** Interface for the accumulate weighted kernel using F16 */
class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
{
@@ -88,9 +88,9 @@ public:
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
/** Interface for the accumulate squared kernel
*
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 08fb3f915d..e70dd454df 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -27,9 +27,9 @@
#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/NEON/INEKernel.h"
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
#include <arm_fp16.h>
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
namespace arm_compute
{
@@ -76,14 +76,14 @@ private:
*/
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float>::value, void>::type activation(const Window &window);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** Function to apply an activation function on a tensor.
*
* @param[in] window Region on which to execute the kernel
*/
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float16_t>::value, void>::type activation(const Window &window);
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
/** Function to apply an activation function on a tensor.
*
* @param[in] window Region on which to execute the kernel
diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
index 1366adad3b..a53e4d77f7 100644
--- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
@@ -46,7 +46,7 @@ public:
BorderSize border_size() const override;
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** NEON kernel to perform a Box 3x3 filter using F16 simd
*/
class NEBox3x3FP16Kernel : public NEBox3x3Kernel
@@ -55,8 +55,8 @@ public:
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
using NEBox3x3FP16Kernel = NEBox3x3Kernel;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
index 37d86685d3..4f1a1f32dc 100644
--- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
+++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
@@ -81,7 +81,7 @@ protected:
ITensor *_phase; /**< Destination tensor - Quantized phase */
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** NEON kernel to perform Gradient computation
*/
class NEGradientFP16Kernel : public NEGradientKernel
@@ -90,9 +90,9 @@ public:
// Inherited methods overriden:
void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override;
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
using NEGradientFP16Kernel = NEGradientKernel;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
*
diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
index 2aef420e42..cfa5220c4d 100644
--- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
@@ -99,7 +99,7 @@ private:
HarrisScoreFunction *_func;
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** Interface for the accumulate Weighted kernel using F16 */
template <int32_t block_size>
class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel
@@ -118,9 +118,9 @@ private:
/** Harris Score function to use for the particular image types passed to configure() */
HarrisScoreFunction *_func;
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
template <int32_t block_size>
using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel<block_size>;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
index b853d22456..fba8d8dd39 100644
--- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
@@ -94,7 +94,7 @@ private:
ITensor *_phase; /**< Output - Phase */
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** Template interface for the kernel to compute magnitude and phase */
template <MagnitudeType mag_type, PhaseType phase_type>
class NEMagnitudePhaseFP16Kernel : public INEKernel
@@ -156,9 +156,9 @@ private:
ITensor *_magnitude; /**< Output - Magnitude */
ITensor *_phase; /**< Output - Phase */
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
template <MagnitudeType mag_type, PhaseType phase_type>
using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel<mag_type, phase_type>;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
index 3bce1a99f3..f47b487c91 100644
--- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
@@ -78,7 +78,7 @@ protected:
ITensor *_output; /**< Destination tensor */
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32
*/
class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
@@ -92,8 +92,8 @@ public:
*/
void configure(const ITensor *input, ITensor *output, bool border_undefined);
};
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace arm_compute
#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
index f218e1f006..4eab7f91fc 100644
--- a/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
+++ b/arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h
@@ -26,6 +26,9 @@
#include "arm_compute/core/NEON/kernels/NEGEMMLowpAssemblyBaseKernel.h"
+// Enable only if compiled for AArch64-V8.2-A targets
+#ifdef ARM_COMPUTE_AARCH64_V8_2
+
namespace arm_compute
{
class ITensor;
@@ -42,4 +45,5 @@ protected:
void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output) override;
};
} // namespace arm_compute
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
#endif /*__ARM_COMPUTE_NEGEMMLOWPAARCH64V8P4KERNEL_H__*/
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 44f9e92b58..a3e4f5c908 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -2076,7 +2076,7 @@ PREDEFINED = DOXYGEN_SKIP_THIS \
LOCATE_MAX \
HAS_BIAS \
POOL_AVG \
- ARM_COMPUTE_ENABLE_FP16
+ ARM_COMPUTE_AARCH64_V8_2
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
diff --git a/scripts/check_bad_style.sh b/scripts/check_bad_style.sh
index ab2b1a016d..827163f02c 100755
--- a/scripts/check_bad_style.sh
+++ b/scripts/check_bad_style.sh
@@ -57,15 +57,15 @@ grep -Hnir --exclude-dir=assembly "#else$\|#endif$" $DIRECTORIES | tee bad_style
if [[ $(cat bad_style.log | wc -l) > 0 ]]
then
echo ""
- echo "ERROR: #else and #endif should be followed by a comment of the guard they refer to (e.g /* ARM_COMPUTE_ENABLE_FP16 */ )"
+ echo "ERROR: #else and #endif should be followed by a comment of the guard they refer to (e.g /* ARM_COMPUTE_AARCH64_V8_2 */ )"
exit -1
fi
-grep -Hnir --exclude-dir=assembly "ARM_COMPUTE_ENABLE_FP16" ./tests/validation/CL | tee bad_style.log
+grep -Hnir --exclude-dir=assembly "ARM_COMPUTE_AARCH64_V8_2" ./tests/validation/CL | tee bad_style.log
if [[ $(cat bad_style.log | wc -l) > 0 ]]
then
echo ""
- echo "ERROR: Found ARM_COMPUTE_ENABLE_FP16 in CL tests though F16 is always supported for OpenCL"
+ echo "ERROR: Found ARM_COMPUTE_AARCH64_V8_2 in CL tests though armv8.2 features (FP16) are always supported for OpenCL"
exit -1
fi
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index e5e357e59d..b811847ba8 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -11,12 +11,10 @@ def get_list_includes():
def get_list_flags( filename, arch):
assert arch in ["armv7", "aarch64"]
flags = ["-std=c++11"]
- if "tests/validation_old" in filename:
- flags.append("-DBOOST")
flags.append("-DARM_COMPUTE_CPP_SCHEDULER=1")
flags.append("-DARM_COMPUTE_CL")
if arch == "aarch64":
- flags.append("-DARM_COMPUTE_ENABLE_FP16")
+ flags.append("-DARM_COMPUTE_AARCH64_V8_2")
return flags
def filter_files( list_files ):
@@ -28,8 +26,6 @@ def filter_files( list_files ):
continue
if "openvx-arm_compute" in f:
continue
- if "tests/validation_old" in f:
- continue
# Skip OMPScheduler as it causes problems in clang
if "OMPScheduler.cpp" in f:
continue
diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp
index deafabe1d4..856e3acb35 100644
--- a/src/core/NEON/kernels/NEAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp
@@ -41,7 +41,7 @@ class Coordinates;
/* Max S16 value used for saturation purposes. */
const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast<uint16_t>(INT16_MAX));
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
namespace fp16
{
inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input)
@@ -132,7 +132,7 @@ void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo
},
input, accum);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
namespace
{
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 67fc45bc70..8dfce0f7b5 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -81,7 +81,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
// Activation functions : FP16
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
{
@@ -96,7 +96,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
};
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* ARM_COMPUTE_AARCH64_V8_2*/
// Activation functions : QS8
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
@@ -140,11 +140,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
case DataType::F32:
_func = act_map_f32[activation_info.activation()];
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
_func = act_map_f16[activation_info.activation()];
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
}
@@ -174,7 +174,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
ICPPKernel::configure(win);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
{
@@ -305,7 +305,7 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
},
input, output);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index f263fd0df2..02fabcaff8 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -145,7 +145,7 @@ inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b)
return res;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
{
const float16x8x2_t res =
@@ -158,11 +158,11 @@ inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
return res;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
Iterator input1(in1, window);
Iterator input2(in2, window);
Iterator output(out, window);
@@ -175,13 +175,13 @@ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const
vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vadd2q_f16(a, b));
},
input1, input2, output);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(in1);
ARM_COMPUTE_UNUSED(in2);
ARM_COMPUTE_UNUSED(out);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 85f72c1421..8aadab9fc3 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -157,7 +157,7 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o
input1, input2, output);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
{
const float16x8x2_t res =
@@ -170,11 +170,11 @@ inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
return res;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
Iterator input1(in1, window);
Iterator input2(in2, window);
Iterator output(out, window);
@@ -187,13 +187,13 @@ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const
vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
},
input1, input2, output);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(in1);
ARM_COMPUTE_UNUSED(in2);
ARM_COMPUTE_UNUSED(out);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index f6f6f9cb61..c48653ad17 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -169,7 +169,7 @@ void batch_normalization_fp32(ITensor *in, ITensor *out, const ITensor *mean, co
input, output);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window)
{
Iterator input(in, window);
@@ -212,7 +212,7 @@ void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, co
},
input, output);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon)
{
@@ -258,11 +258,11 @@ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output,
num_elems_processed_per_iteration = 4;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
_func = &batch_normalization_fp16;
num_elems_processed_per_iteration = 8;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Element size not supported");
break;
diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
index d7178e4690..be8beaeacb 100644
--- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
@@ -33,7 +33,7 @@
using namespace arm_compute;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -104,7 +104,7 @@ void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info)
},
input, output);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
BorderSize NEBox3x3Kernel::border_size() const
{
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index bcbe790fd0..944f29d506 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -51,7 +51,7 @@ constexpr int EDGE = 255;
constexpr int MAYBE = 127;
} // namespace
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
namespace fp16
{
inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
@@ -787,7 +787,7 @@ void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITens
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
namespace
{
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index 6631359341..06620d45aa 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -124,7 +124,7 @@ inline qint32x4_t internal_vqaddq(const qint32x4_t &x, const qint32x4_t &y)
return vqaddq_qs32(x, y);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
inline float16x8_t internal_vld1q(const float16_t *in)
{
return vld1q_f16(in);
@@ -141,7 +141,7 @@ inline float16x8_t internal_vqaddq(const float16x8_t &x, const float16x8_t &y)
{
return vaddq_f16(x, y);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
template <typename T1, typename T2, bool in_place>
void accumulate_bias(ITensor *input, const ITensor *bias, const Window window, ITensor *output)
@@ -246,13 +246,13 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con
_func = (output == nullptr) ? &accumulate_bias<qint32_t, qint16_t, true> : &accumulate_bias<qint32_t, qint16_t, false>;
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
_func = (output == nullptr) ? &accumulate_bias<float16_t, float16_t, true> : &accumulate_bias<float16_t, float16_t, false>;
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::F32:
{
_func = (output == nullptr) ? &accumulate_bias<float, float, true> : &accumulate_bias<float, float, false>;
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 2766d698d9..8642a19f39 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -68,7 +68,7 @@ inline qint16x8_t internal_vdupq_n(qint16_t v)
return vdupq_n_qs16(v);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
template <unsigned int stridex>
float16x8_t internal_vld1q(const float16_t *in);
@@ -113,7 +113,7 @@ inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, co
ARM_COMPUTE_UNUSED(fixed_point_position);
return vaddq_f16(x, vmulq_f16(y, z));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
template <unsigned int stridex>
float32x4_t internal_vld1q(const float *in);
@@ -427,7 +427,7 @@ public:
}
};
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
inline float16x8x3_t load_matrix_row(const float16_t *ptr)
{
/* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes:
@@ -567,7 +567,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values)
vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0])));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
inline float32x4x3_t load_matrix_row(const float *ptr)
{
@@ -1433,9 +1433,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
{
switch(input->info()->data_type())
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::QS8:
case DataType::QS16:
_num_elems_written_per_iteration = 8;
@@ -1468,9 +1468,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
_num_elems_read_per_iteration = 12;
_num_elems_written_per_iteration = 16 >> conv_stride_x;
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::QS8:
case DataType::QS16:
_num_weight_elems_read_per_row = 8 + _kernel_size - 1;
@@ -1532,11 +1532,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
case DataType::F32:
convolve_1x1<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
convolve_1x1<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
@@ -1553,11 +1553,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
case DataType::F32:
convolve_3x3<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
convolve_3x3<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index fb07cb0333..e61f95221f 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -108,7 +108,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI
in0_out, in1);
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
execute_window_loop(window, [&](const Coordinates & id)
@@ -128,7 +128,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI
in0_out, in1);
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::QS8:
{
execute_window_loop(window, [&](const Coordinates & id)
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index 9dbce1de2f..c94d3b1416 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -66,7 +66,7 @@ void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &wi
in, out);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &window, float beta)
{
const float16x8_t beta_f16 = vdupq_n_f16(beta);
@@ -89,7 +89,7 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi
},
in, out);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta)
{
@@ -167,10 +167,10 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
_func = &matrix_addition_qs16;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
_func = &matrix_addition_f16;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index 69090825fa..4fcf6e2f37 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -53,7 +53,7 @@ namespace
template <bool multiply_alpha>
void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -186,7 +186,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
},
ina, inb, out);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
@@ -194,7 +194,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
template <bool multiply_alpha>
@@ -915,7 +915,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT
template <bool multiply_alpha>
void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
const size_t out_stride = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type());
const int num_elems_matrix_b_x = input1->info()->dimension(0);
@@ -1051,14 +1051,14 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
vst1q_f16(mtx_out + 3 * out_stride, c.val[3]);
},
ina, inb, out);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
template <bool multiply_alpha>
@@ -1454,13 +1454,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor
num_elems_processed_per_iteration_x = 16;
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
num_elems_processed_per_iteration_x = 32;
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1503,13 +1503,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor
num_elems_processed_per_iteration_x = 8;
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
num_elems_processed_per_iteration_x = 8;
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1563,14 +1563,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, info, _alpha);
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
multiply_alpha ? vector_matrix_multiply_f16<true>(_input0, _input1, _output, window, info, _alpha) :
vector_matrix_multiply_f16<false>(_input0, _input1, _output, window, info, _alpha);
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1600,14 +1600,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
matrix_matrix_multiply_qs16<false>(_input0, _input1, _output, window, _alpha);
break;
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
{
multiply_alpha ? matrix_matrix_multiply_f16<true>(_input0, _input1, _output, window, _alpha) :
matrix_matrix_multiply_f16<false>(_input0, _input1, _output, window, _alpha);
break;
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
index 233b2baabe..d8440e333e 100644
--- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
@@ -39,7 +39,7 @@
using namespace arm_compute;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
template class arm_compute::NEHarrisScoreFP16Kernel<3>;
template class arm_compute::NEHarrisScoreFP16Kernel<5>;
@@ -361,7 +361,7 @@ void NEHarrisScoreFP16Kernel<block_size>::configure(const IImage *input1, const
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
template class arm_compute::NEHarrisScoreKernel<3>;
template class arm_compute::NEHarrisScoreKernel<5>;
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 1c018b269b..099f2f1be3 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -311,11 +311,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
case DataType::F32:
_func = &NEIm2ColKernel::run_reduced<float>;
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
_func = &NEIm2ColKernel::run_reduced<float16_t>;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::QS8:
_func = &NEIm2ColKernel::run_reduced<qint8_t>;
break;
@@ -334,11 +334,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
case DataType::F32:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::QS8:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
break;
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 1b2942cd93..99b4250bb9 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -51,7 +51,7 @@ namespace
{
void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -160,14 +160,14 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
vst1q_f16(vec_out + 24, acc3);
},
ina, out);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR("Not supported, recompile with -march=armv8.2-a+fp16+simd.");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
index 433985f6fa..2424ec139e 100644
--- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
+++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
@@ -51,7 +51,7 @@ constexpr float COEFF1 = 0.0663f;
constexpr float COEFF2 = 0.2447f;
} // namespace
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
namespace fp16
{
inline float16x8_t inv(float16x8_t x)
@@ -429,7 +429,7 @@ template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, Ph
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::SIGNED>;
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, PhaseType::UNSIGNED>;
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::UNSIGNED>;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
namespace
{
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
index b7dfb59252..c4517dafaa 100644
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
@@ -41,7 +41,7 @@ namespace arm_compute
class Coordinates;
} // namespace arm_compute
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
namespace fp16
{
inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
@@ -224,7 +224,7 @@ void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITenso
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
namespace
{
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index d6d26e2d12..f6f3d5f238 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -232,7 +232,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window)
},
input, input_squared, output);
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
else if(dt == DataType::F16)
{
const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff());
@@ -268,7 +268,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window)
},
input, input_squared, output);
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
else
{
ARM_COMPUTE_ERROR("Not supported");
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index 19d45e2cb5..2c90d9aa22 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -38,9 +38,9 @@
#include <cstdint>
#include <cstdlib>
-#if ARM_COMPUTE_ENABLE_FP16
+#if ARM_COMPUTE_AARCH64_V8_2
#include <arm_fp16.h> // needed for float16_t
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
using namespace arm_compute;
@@ -335,7 +335,7 @@ void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict
template <bool is_scale255, bool is_sat>
void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto input1 = static_cast<const float16_t *__restrict>(input1_ptr);
const auto input2 = static_cast<const float16_t *__restrict>(input2_ptr);
const auto output = static_cast<float16_t *__restrict>(output_ptr);
@@ -350,13 +350,13 @@ void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict
}
};
vst2q_f16(output, result);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(input1_ptr);
ARM_COMPUTE_UNUSED(input2_ptr);
ARM_COMPUTE_UNUSED(output_ptr);
ARM_COMPUTE_UNUSED(scale);
ARM_COMPUTE_ERROR("Not supported. Recompile the library with arch=arm64-v8.2-a.");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
template <bool is_scale255, bool is_sat>
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 8d4e46500f..0024e33723 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -173,7 +173,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
case DataType::F16:
switch(pool_size)
{
@@ -192,7 +192,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
break;
}
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
case DataType::F32:
switch(pool_size)
{
@@ -536,7 +536,7 @@ void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window
template <PoolingType pooling_type>
void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
Iterator input(_input, window_input);
Iterator output(_output, window);
@@ -595,17 +595,17 @@ void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window
*(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
},
input, output);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(window_input);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
template <PoolingType pooling_type>
void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
Iterator input(_input, window_input);
Iterator output(_output, window);
constexpr int pool_size = 2;
@@ -654,11 +654,11 @@ void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window
vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
},
input, output);
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
ARM_COMPUTE_UNUSED(window_input);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
template <PoolingType pooling_type>
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 648dac46c0..73aba284ca 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -106,7 +106,7 @@ void logits_1d_max_qs16(const ITensor *in, ITensor *out, const Window &window)
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window)
{
Window in_slice = window.first_slice_window_1D();
@@ -138,7 +138,7 @@ void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window)
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void logits_1d_max_f32(const ITensor *in, ITensor *out, const Window &window)
{
@@ -213,10 +213,10 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
_func = &logits_1d_max_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
_func = &logits_1d_max_f16;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
}
@@ -374,7 +374,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
{
Window window_max(window);
@@ -434,7 +434,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
{
@@ -532,10 +532,10 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor
_func = &logits_1d_shift_exp_sum_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
_func = &logits_1d_shift_exp_sum_f16;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
break;
@@ -637,7 +637,7 @@ void logits_1d_norm_qs16(const ITensor *in, const ITensor *sum, ITensor *out, co
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
}
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
{
Window window_sum(window);
@@ -668,7 +668,7 @@ void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, con
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
}
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
void logits_1d_norm_f32(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
{
@@ -738,10 +738,10 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I
_func = &logits_1d_norm_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
_func = &logits_1d_norm_f16;
break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
break;
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
index 939f1b7c40..8728e77d9e 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
@@ -39,6 +39,9 @@
#include <cstddef>
#include <cstdint>
+// Enable only if compiled for AArch64-V8.2-A targets
+#ifdef ARM_COMPUTE_AARCH64_V8_2
+
#define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
#define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
#define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
@@ -517,3 +520,4 @@ void NEGEMMLowpAArch64V8P4Kernel::run(const Window &window, const ThreadInfo &in
}
}
} // namespace arm_compute
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
diff --git a/src/runtime/NEON/functions/NEGEMMLowp.cpp b/src/runtime/NEON/functions/NEGEMMLowp.cpp
index 716d52ae63..12136cbcb5 100644
--- a/src/runtime/NEON/functions/NEGEMMLowp.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowp.cpp
@@ -58,7 +58,7 @@ void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output)
const int cpu_has_dotprod = static_cast<int>(ci.CPU) & static_cast<int>(CPUTarget::DOT);
if(cpu_has_dotprod != 0)
{
-#if defined(__aarch64__)
+#ifdef ARM_COMPUTE_AARCH64_V8_2
// NEGEMMLowpAArch64V8P4Kernel only compiled in AArch64 targets
_mm_optimised_kernel = support::cpp14::make_unique<NEGEMMLowpAArch64V8P4Kernel>();
TensorShape shape_a_int = a->info()->tensor_shape();
@@ -83,7 +83,7 @@ void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output)
_tmp_a.allocator()->allocate();
_tmp_b.allocator()->allocate();
-#endif /* defined(__aarch64__) */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
}
else
{
diff --git a/support/ToolchainSupport.h b/support/ToolchainSupport.h
index ab2a9fe80f..ad09c6535a 100644
--- a/support/ToolchainSupport.h
+++ b/support/ToolchainSupport.h
@@ -34,6 +34,8 @@
#include <string>
#include <type_traits>
+#include "support/Half.h"
+
namespace arm_compute
{
namespace support
@@ -319,6 +321,18 @@ inline void *align(std::size_t alignment, std::size_t size, void *&ptr, std::siz
return ptr = reinterpret_cast<void *>(aligned);
}
+
+// std::isfinite
+template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+inline bool isfinite(T value)
+{
+ return std::isfinite(value);
+}
+
+inline bool isfinite(half_float::half value)
+{
+ return half_float::isfinite(value);
+}
} // namespace cpp11
namespace cpp14
diff --git a/tests/AssetsLibrary.cpp b/tests/AssetsLibrary.cpp
index 87b0389039..f5eac8a59a 100644
--- a/tests/AssetsLibrary.cpp
+++ b/tests/AssetsLibrary.cpp
@@ -355,7 +355,7 @@ const RawTensor &AssetsLibrary::find_or_create_raw_tensor(const std::string &nam
{
std::lock_guard<std::mutex> guard(_format_lock);
- const RawTensor *ptr = _cache.find(std::make_tuple(name, format));
+ const RawTensor *ptr = _cache.find(std::forward_as_tuple(name, format));
if(ptr != nullptr)
{
@@ -372,14 +372,14 @@ const RawTensor &AssetsLibrary::find_or_create_raw_tensor(const std::string &nam
raw = std::move(dst);
}
- return _cache.add(std::make_tuple(name, format), std::move(raw));
+ return _cache.add(std::forward_as_tuple(name, format), std::move(raw));
}
const RawTensor &AssetsLibrary::find_or_create_raw_tensor(const std::string &name, Format format, Channel channel) const
{
std::lock_guard<std::mutex> guard(_channel_lock);
- const RawTensor *ptr = _cache.find(std::make_tuple(name, format, channel));
+ const RawTensor *ptr = _cache.find(std::forward_as_tuple(name, format, channel));
if(ptr != nullptr)
{
@@ -392,7 +392,7 @@ const RawTensor &AssetsLibrary::find_or_create_raw_tensor(const std::string &nam
(*get_extractor(format, channel))(src, dst);
- return _cache.add(std::make_tuple(name, format, channel), std::move(dst));
+ return _cache.add(std::forward_as_tuple(name, format, channel), std::move(dst));
}
TensorShape AssetsLibrary::get_image_shape(const std::string &name)
diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp
index a00e7ec2e6..fccb0f8ee2 100644
--- a/tests/benchmark/NEON/ActivationLayer.cpp
+++ b/tests/benchmark/NEON/ActivationLayer.cpp
@@ -45,11 +45,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEActivationLayerFixture = ActivationLayerFixture<Tensor, NEActivationLayer, Accessor>;
diff --git a/tests/benchmark/NEON/BatchNormalizationLayer.cpp b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
index 5bfd234704..84f3ccc3e4 100644
--- a/tests/benchmark/NEON/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
@@ -41,11 +41,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEBatchNormalizationLayerFixture = BatchNormalizationLayerFixture<Tensor, NEBatchNormalizationLayer, Accessor>;
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
index effcf63b8b..f2d9305464 100644
--- a/tests/benchmark/NEON/ConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/ConvolutionLayer.cpp
@@ -45,11 +45,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEConvolutionLayer, Accessor>;
diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp
index c12d7cedb9..befa530c8f 100644
--- a/tests/benchmark/NEON/FullyConnectedLayer.cpp
+++ b/tests/benchmark/NEON/FullyConnectedLayer.cpp
@@ -43,11 +43,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEFullyConnectedLayerFixture = FullyConnectedLayerFixture<Tensor, NEFullyConnectedLayer, Accessor>;
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
index a713501d9a..fb43b9cf0d 100644
--- a/tests/benchmark/NEON/GEMM.cpp
+++ b/tests/benchmark/NEON/GEMM.cpp
@@ -43,9 +43,9 @@ namespace
{
const auto data_types = framework::dataset::make("DataType",
{
-#if ARM_COMPUTE_ENABLE_FP16
+#if ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8
});
diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp
index 383cec98d0..22957e4a2e 100644
--- a/tests/benchmark/NEON/NormalizationLayer.cpp
+++ b/tests/benchmark/NEON/NormalizationLayer.cpp
@@ -40,11 +40,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NENormalizationLayerFixture = NormalizationLayerFixture<Tensor, NENormalizationLayer, Accessor>;
diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp
index 8d4e0a086d..9999fec5ce 100644
--- a/tests/benchmark/NEON/PoolingLayer.cpp
+++ b/tests/benchmark/NEON/PoolingLayer.cpp
@@ -45,11 +45,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEPoolingLayerFixture = PoolingLayerFixture<Tensor, NEPoolingLayer, Accessor>;
diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
index 19598e3ca5..1b90fbddfb 100644
--- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
@@ -45,11 +45,11 @@ namespace test
{
namespace
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
-#else /* ARM_COMPUTE_ENABLE_FP16 */
+#else /* ARM_COMPUTE_AARCH64_V8_2 */
const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
} // namespace
using NEAlexNetFixture = AlexNetFixture<ITensor,
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index f58f3a8338..0e32d60485 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -78,9 +78,9 @@ AbsoluteTolerance<float> tolerance(DataType data_type, ActivationLayerInfo::Acti
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8,
DataType::QS16,
@@ -141,7 +141,7 @@ template <typename T>
using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset),
framework::dataset::make("DataType",
@@ -158,7 +158,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerFixture<half>, framework::Data
validate(Accessor(_target), _reference, tolerance(_data_type, _function));
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index edc59a909f..6bffd02ea2 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -52,10 +52,10 @@ const auto ArithmeticAdditionQS8Dataset = combine(combine(framework::dataset::ma
framework::dataset::make("DataType", DataType::QS8));
const auto ArithmeticAdditionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)),
framework::dataset::make("DataType", DataType::QS16));
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
} // namespace
@@ -180,7 +180,7 @@ TEST_SUITE_END()
TEST_SUITE_END()
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(F16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
@@ -189,7 +189,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::D
validate(Accessor(_target), _reference);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(F32)
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index 3d184f13f8..dcaf9d987b 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -53,10 +53,10 @@ const auto ArithmeticSubtractionQS8Dataset = combine(combine(framework::dataset:
framework::dataset::make("DataType", DataType::QS8));
const auto ArithmeticSubtractionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)),
framework::dataset::make("DataType", DataType::QS16));
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataType", DataType::F16));
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataType", DataType::F32));
} // namespace
@@ -182,7 +182,7 @@ TEST_SUITE_END()
TEST_SUITE_END()
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
@@ -191,7 +191,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework
validate(Accessor(_target), _reference);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index 9ca26ebdaa..163f71f375 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -44,9 +44,9 @@ namespace validation
namespace
{
constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
constexpr AbsoluteTolerance<float> tolerance_qs8(3.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
constexpr AbsoluteTolerance<float> tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */
} // namespace
@@ -89,7 +89,7 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<float>, framewor
}
TEST_SUITE_END()
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(Float16)
FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
framework::dataset::make("DataType", DataType::F16)))
@@ -98,7 +98,7 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<half>, framework
validate(Accessor(_target), _reference, tolerance_f16, 0);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(Quantized)
template <typename T>
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index f74925c82b..3003409062 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -44,17 +44,17 @@ namespace validation
namespace
{
const AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
const AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
const AbsoluteTolerance<float> tolerance_q(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8,
DataType::QS16,
@@ -104,7 +104,7 @@ template <typename T>
using NEConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true, false })),
@@ -121,7 +121,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixture<half>, framework::Dat
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index c500041a7c..6b0f311ec5 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -48,7 +48,7 @@ template <typename T>
using NEDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenate, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
DataType::F16)))
@@ -63,7 +63,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework
validate(Accessor(_target), _reference);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 9349ff8db8..23c02cb0d5 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -43,9 +43,9 @@ namespace validation
namespace
{
constexpr AbsoluteTolerance<float> tolerance_qs(1.f); /**< Tolerance for fixed point tests */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr AbsoluteTolerance<float> tolerance_fp16(0.01f); /**< Tolerance for half precision floating point tests */
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
/** Direct convolution data set. */
@@ -97,7 +97,7 @@ template <typename T>
using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F16)))
{
@@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<half>, framework::Da
validate(Accessor(_target), _reference, tolerance_fp16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp
index f8d8301b14..72da22e034 100644
--- a/tests/validation/NEON/Flatten.cpp
+++ b/tests/validation/NEON/Flatten.cpp
@@ -62,7 +62,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<float>, framework::Datase
}
TEST_SUITE_END()
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<half>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
framework::dataset::make("DataType", DataType::F16)))
@@ -77,7 +77,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<half>, framework::Dataset
validate(Accessor(_target), _reference);
}
TEST_SUITE_END()
-#endif // ARM_COMPUTE_ENABLE_FP16
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE_END()
TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index b5ae48a1a0..ec3422cee7 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -44,18 +44,18 @@ namespace
{
/** Tolerance for float operations */
constexpr RelativeTolerance<float> tolerance_f32(0.01f);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr RelativeTolerance<float> tolerance_f16(0.01f);
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* ARM_COMPUTE_AARCH64_V8_2*/
/** Tolerance for fixed point operations */
constexpr AbsoluteTolerance<float> tolerance_fixed_point(1.f);
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8,
DataType::QS16,
@@ -119,7 +119,7 @@ template <typename T>
using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
@@ -136,7 +136,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index f2cfd0c957..d5e3aee363 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -49,9 +49,9 @@ constexpr AbsoluteTolerance<float> tolerance_q(1.0f); /**< Tolerance value for
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8,
DataType::QS16,
@@ -89,7 +89,7 @@ template <typename T>
using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F16)))
{
@@ -103,7 +103,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NI
validate(Accessor(_target), _reference, tolerance_f);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp
index 6d66549a8c..1ad32bed4d 100644
--- a/tests/validation/NEON/HarrisCorners.cpp
+++ b/tests/validation/NEON/HarrisCorners.cpp
@@ -46,9 +46,9 @@ namespace
{
const auto use_fp16 = framework::dataset::make("UseFP16",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
true,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
false
});
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index 74a8ead144..2bc26810d5 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -44,9 +44,9 @@ namespace validation
namespace
{
/** Tolerance for float operations */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr AbsoluteTolerance<float> tolerance_f16(0.001f);
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
/** Tolerance for fixed point operations */
constexpr AbsoluteTolerance<int8_t> tolerance_qs8(2);
@@ -66,7 +66,7 @@ template <typename T>
using NENormalizationLayerFixture = NormalizationValidationFixture<Tensor, Accessor, NENormalizationLayer, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
{
@@ -79,7 +79,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixture<half>, framework::D
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index e1e62e46ef..d7131068a9 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -186,7 +186,7 @@ TEST_SUITE_END() // ScaleOther
TEST_SUITE_END() // S16toS16
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(F16toF16)
TEST_SUITE(Scale255)
@@ -194,7 +194,7 @@ PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture<half_flo
TEST_SUITE_END() // Scale255
TEST_SUITE_END() // F16toF16
-#endif // ARM_COMPUTE_ENABLE_FP16
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(F32toF32)
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index ff68ecf481..a721fb9d15 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -52,9 +52,9 @@ const auto PoolingLayerDatasetQS = combine(combine(framework::dataset::make("Poo
framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
constexpr AbsoluteTolerance<float> tolerance_qs8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
constexpr AbsoluteTolerance<float> tolerance_qs16(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
} // namespace
@@ -83,7 +83,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<float>, framework::Datase
}
TEST_SUITE_END()
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP,
framework::dataset::make("DataType", DataType::F16))))
@@ -98,7 +98,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<half>, framework::Dataset
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE_END()
template <typename T>
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 5ede321db1..fde5867a3d 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -44,18 +44,18 @@ namespace
{
/** Tolerance for float operations */
constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f);
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* ARM_COMPUTE_AARCH64_V8_2*/
/** Tolerance for fixed point operations */
constexpr AbsoluteTolerance<int16_t> tolerance_fixed_point(2);
/** CNN data types */
const auto CNNDataTypes = framework::dataset::make("DataType",
{
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
DataType::F32,
DataType::QS8,
DataType::QS16,
@@ -97,7 +97,7 @@ template <typename T>
using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>;
TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
{
@@ -110,7 +110,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::Dataset
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index 8ed98fbc82..7a01085514 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
+#include "support/ToolchainSupport.h"
#include "tests/IAccessor.h"
#include "tests/SimpleTensor.h"
#include "tests/Types.h"
@@ -256,7 +257,7 @@ struct compare<AbsoluteTolerance<U>> : public compare_base<AbsoluteTolerance<U>>
operator bool() const
{
- if(!std::isfinite(this->_target) || !std::isfinite(this->_reference))
+ if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
{
return false;
}
@@ -286,7 +287,7 @@ struct compare<RelativeTolerance<U>> : public compare_base<RelativeTolerance<U>>
operator bool() const
{
- if(!std::isfinite(this->_target) || !std::isfinite(this->_reference))
+ if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
{
return false;
}