From 5edbd1c5dce43b66f30c903797a91e39369c5b62 Mon Sep 17 00:00:00 2001 From: Ioan-Cristian Szabo Date: Mon, 13 Nov 2017 13:34:08 +0000 Subject: COMPMID-556: Add support to build arm64-v8.2-a for Android platform (clang compiler) Change-Id: Ibb779dd3a8d10786da6d8f70590e654e14654d7b Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95530 Tested-by: Kaizen Reviewed-by: Pablo Tello --- SConstruct | 8 +++---- arm_compute/core/NEON/NEMath.h | 4 ++-- arm_compute/core/NEON/NEMath.inl | 4 ++-- arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 6 ++--- .../core/NEON/kernels/NEActivationLayerKernel.h | 8 +++---- arm_compute/core/NEON/kernels/NEBox3x3Kernel.h | 6 ++--- arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 6 ++--- .../core/NEON/kernels/NEHarrisCornersKernel.h | 6 ++--- .../core/NEON/kernels/NEMagnitudePhaseKernel.h | 6 ++--- .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 6 ++--- .../convolution/NEDirectConvolutionDetail.h | 4 ++-- scripts/fix_code_formatting.sh | 8 +++---- src/core/NEON/kernels/NEAccumulateKernel.cpp | 4 ++-- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 12 +++++----- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 10 ++++---- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 10 ++++---- .../kernels/NEBatchNormalizationLayerKernel.cpp | 8 +++---- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 4 ++-- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 4 ++-- ...EDirectConvolutionLayerBiasAccumulateKernel.cpp | 8 +++---- .../kernels/NEDirectConvolutionLayerKernel.cpp | 24 +++++++++---------- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 4 ++-- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 8 +++---- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 28 +++++++++++----------- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 4 ++-- src/core/NEON/kernels/NEIm2ColKernel.cpp | 8 +++---- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 6 ++--- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 4 ++-- .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 4 ++-- .../NEON/kernels/NENormalizationLayerKernel.cpp | 4 ++-- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 10 ++++---- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 16 ++++++------- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 24 +++++++++---------- tests/benchmark/NEON/ActivationLayer.cpp | 6 ++--- tests/benchmark/NEON/BatchNormalizationLayer.cpp | 6 ++--- tests/benchmark/NEON/ConvolutionLayer.cpp | 6 ++--- tests/benchmark/NEON/FullyConnectedLayer.cpp | 6 ++--- tests/benchmark/NEON/GEMM.cpp | 4 ++-- tests/benchmark/NEON/NormalizationLayer.cpp | 6 ++--- tests/benchmark/NEON/PoolingLayer.cpp | 6 ++--- tests/benchmark/NEON/SYSTEM/AlexNet.cpp | 6 ++--- tests/validation/NEON/ActivationLayer.cpp | 8 +++---- tests/validation/NEON/ArithmeticAddition.cpp | 8 +++---- tests/validation/NEON/ArithmeticSubtraction.cpp | 8 +++---- tests/validation/NEON/BatchNormalizationLayer.cpp | 8 +++---- tests/validation/NEON/ConvolutionLayer.cpp | 12 +++++----- tests/validation/NEON/DepthConcatenateLayer.cpp | 4 ++-- tests/validation/NEON/DirectConvolutionLayer.cpp | 8 +++---- tests/validation/NEON/Flatten.cpp | 4 ++-- tests/validation/NEON/FullyConnectedLayer.cpp | 12 +++++----- tests/validation/NEON/GEMM.cpp | 8 +++---- tests/validation/NEON/HarrisCorners.cpp | 4 ++-- tests/validation/NEON/Magnitude.cpp | 8 +++---- tests/validation/NEON/NormalizationLayer.cpp | 8 +++---- tests/validation/NEON/PixelWiseMultiplication.cpp | 4 ++-- tests/validation/NEON/PoolingLayer.cpp | 8 +++---- tests/validation/NEON/SoftmaxLayer.cpp | 12 +++++----- 57 files changed, 222 insertions(+), 224 deletions(-) diff --git a/SConstruct b/SConstruct index 264ed9cbe0..0385e2e392 100644 --- a/SConstruct +++ b/SConstruct @@ -127,13 +127,11 @@ elif env['arch'] == 'arm64-v8a': elif env['os'] == 'android': prefix = "aarch64-linux-android-" elif env['arch'] == 'arm64-v8.2-a': + env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2']) + if os.environ.get('CXX', 'g++') == 'clang++': - print("Clang cannot compile armv8.2-a code") - Exit(1) + env.Append(CXXFLAGS = ['-fno-integrated-as']) - env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd']) - env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2']) - if env['os'] == 'linux': prefix = "aarch64-linux-gnu-" elif env['os'] == 'bare_metal': diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index 1fc0d5c0b8..5c60d73de4 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -116,7 +116,7 @@ float32x4_t vtanhq_f32(float32x4_t val); */ float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Calculate hyperbolic tangent. * * tanh(x) = (e^2x - 1)/(e^2x + 1) @@ -179,7 +179,7 @@ float16x8_t vexpq_f16(float16x8_t x); * @return The calculated power. */ float16x8_t vpowq_f16(float16x8_t val, float16x8_t n); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #include "arm_compute/core/NEON/NEMath.inl" #endif /* __ARM_COMPUTE_NEMATH_H__ */ diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 250114f4df..50f217c1f1 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -168,7 +168,7 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) { return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /* Exponent polynomial coefficients */ const std::array exp_tab_f16 = { @@ -301,5 +301,5 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n) { return vexpq_f16(vmulq_f16(n, vlogq_f16(val))); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h index d6ad0be03e..fa8a3be92f 100644 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -80,7 +80,7 @@ protected: float _alpha; }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Interface for the accumulate weighted kernel using F16 */ class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel { @@ -88,9 +88,9 @@ public: // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** Interface for the accumulate squared kernel * diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index e70dd454df..a3fd3feb58 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -27,9 +27,9 @@ #include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/NEON/INEKernel.h" -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #include -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ namespace arm_compute { @@ -76,14 +76,14 @@ private: */ template typename std::enable_if::value, void>::type activation(const Window &window); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template typename std::enable_if::value, void>::type activation(const Window &window); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h index a53e4d77f7..29248f6538 100644 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -46,7 +46,7 @@ public: BorderSize border_size() const override; }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** NEON kernel to perform a Box 3x3 filter using F16 simd */ class NEBox3x3FP16Kernel : public NEBox3x3Kernel @@ -55,8 +55,8 @@ public: // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ using NEBox3x3FP16Kernel = NEBox3x3Kernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h index 4f1a1f32dc..a57c3894b1 100644 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -81,7 +81,7 @@ protected: ITensor *_phase; /**< Destination tensor - Quantized phase */ }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** NEON kernel to perform Gradient computation */ class NEGradientFP16Kernel : public NEGradientKernel @@ -90,9 +90,9 @@ public: // Inherited methods overriden: void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ using NEGradientFP16Kernel = NEGradientKernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** NEON kernel to perform Non-Maxima suppression for Canny Edge. * diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index cfa5220c4d..c3c37e4d24 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -99,7 +99,7 @@ private: HarrisScoreFunction *_func; }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Interface for the accumulate Weighted kernel using F16 */ template class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel @@ -118,9 +118,9 @@ private: /** Harris Score function to use for the particular image types passed to configure() */ HarrisScoreFunction *_func; }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h index fba8d8dd39..46b2a8ddb4 100644 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -94,7 +94,7 @@ private: ITensor *_phase; /**< Output - Phase */ }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Template interface for the kernel to compute magnitude and phase */ template class NEMagnitudePhaseFP16Kernel : public INEKernel @@ -156,9 +156,9 @@ private: ITensor *_magnitude; /**< Output - Magnitude */ ITensor *_phase; /**< Output - Phase */ }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index f47b487c91..da8aecff5c 100644 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -78,7 +78,7 @@ protected: ITensor *_output; /**< Destination tensor */ }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32 */ class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel @@ -92,8 +92,8 @@ public: */ void configure(const ITensor *input, ITensor *output, bool border_undefined); }; -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute #endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h index aac70b0a81..657bcd63d0 100644 --- a/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h +++ b/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h @@ -315,7 +315,7 @@ inline void store_results<3>(qint16_t *buffer, const qint16x8x2_t &values) vst1_qs16(buffer, vget_low_s16(values.val[0])); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Loads a 3x3 matrix as a row (float16_t). * * @param[in] ptr Pointer to a float 3x3 matrix. @@ -455,7 +455,7 @@ inline void store_results<3>(float16_t *buffer, const float16x8x2_t &values) { vst1_f16(buffer, vget_low_f16(values.val[0])); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** Get the number of elements processed on 3x3 convolution. * diff --git a/scripts/fix_code_formatting.sh b/scripts/fix_code_formatting.sh index 721ade853f..021f92516f 100755 --- a/scripts/fix_code_formatting.sh +++ b/scripts/fix_code_formatting.sh @@ -24,7 +24,7 @@ if [ $# -eq 0 ] then files=$(find $DIRECTORIES -type f \( -name \*.cpp -o -iname \*.h -o -name \*.inl -o -name \*.cl -o -name \*.cs \)) else - files=$@ + files=$@ fi for f in $files do @@ -33,7 +33,7 @@ do continue fi - sed -i 's/\t/ /g' $f - clang-format -i -style=file $f - astyle -n -q $ASTYLE_PARAMETERS $f + sed -i 's/\t/ /g' $f + clang-format -i -style=file $f + astyle -n -q $ASTYLE_PARAMETERS $f done diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 856e3acb35..dae08008fd 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -41,7 +41,7 @@ class Coordinates; /* Max S16 value used for saturation purposes. */ const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast(INT16_MAX)); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC namespace fp16 { inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input) @@ -132,7 +132,7 @@ void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo }, input, accum); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ namespace { diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 8dfce0f7b5..94bd5f15e3 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -81,7 +81,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC // Activation functions : FP16 static std::map act_map_f16 = { @@ -96,7 +96,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; -#endif /* ARM_COMPUTE_AARCH64_V8_2*/ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ // Activation functions : QS8 static std::map act_map_qs8 = @@ -140,11 +140,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat case DataType::F32: _func = act_map_f32[activation_info.activation()]; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: _func = act_map_f16[activation_info.activation()]; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -174,7 +174,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat ICPPKernel::configure(win); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { @@ -305,7 +305,7 @@ typename std::enable_if::value, void>::type NEActivat }, input, output); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 02fabcaff8..8e55994aaa 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -145,7 +145,7 @@ inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b) return res; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) { const float16x8x2_t res = @@ -158,11 +158,11 @@ inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) return res; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC Iterator input1(in1, window); Iterator input2(in2, window); Iterator output(out, window); @@ -175,13 +175,13 @@ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const vst2q_f16(reinterpret_cast(output.ptr()), vadd2q_f16(a, b)); }, input1, input2, output); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(in1); ARM_COMPUTE_UNUSED(in2); ARM_COMPUTE_UNUSED(out); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 8aadab9fc3..1d86a35cc4 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -157,7 +157,7 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o input1, input2, output); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) { const float16x8x2_t res = @@ -170,11 +170,11 @@ inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) return res; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC Iterator input1(in1, window); Iterator input2(in2, window); Iterator output(out, window); @@ -187,13 +187,13 @@ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const vst2q_f16(reinterpret_cast(output.ptr()), vsub2q_f16(a, b)); }, input1, input2, output); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(in1); ARM_COMPUTE_UNUSED(in2); ARM_COMPUTE_UNUSED(out); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index c48653ad17..1123f2c9ca 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -169,7 +169,7 @@ void batch_normalization_fp32(ITensor *in, ITensor *out, const ITensor *mean, co input, output); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window) { Iterator input(in, window); @@ -212,7 +212,7 @@ void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, co }, input, output); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon) { @@ -258,11 +258,11 @@ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, num_elems_processed_per_iteration = 4; break; case DataType::F16: -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC _func = &batch_normalization_fp16; num_elems_processed_per_iteration = 8; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Element size not supported"); break; diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index be8beaeacb..0c9700526b 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -33,7 +33,7 @@ using namespace arm_compute; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -104,7 +104,7 @@ void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) }, input, output); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ BorderSize NEBox3x3Kernel::border_size() const { diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index 944f29d506..9dfd580a25 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -51,7 +51,7 @@ constexpr int EDGE = 255; constexpr int MAYBE = 127; } // namespace -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC namespace fp16 { inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) @@ -787,7 +787,7 @@ void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITens INEKernel::configure(win); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ namespace { diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp index 06620d45aa..f00af9f93e 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp @@ -124,7 +124,7 @@ inline qint32x4_t internal_vqaddq(const qint32x4_t &x, const qint32x4_t &y) return vqaddq_qs32(x, y); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC inline float16x8_t internal_vld1q(const float16_t *in) { return vld1q_f16(in); @@ -141,7 +141,7 @@ inline float16x8_t internal_vqaddq(const float16x8_t &x, const float16x8_t &y) { return vaddq_f16(x, y); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template void accumulate_bias(ITensor *input, const ITensor *bias, const Window window, ITensor *output) @@ -246,13 +246,13 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: { _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index bf23c4b2d2..78afbc2c20 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -70,7 +70,7 @@ inline qint16x8_t internal_vdupq_n(qint16_t v) return vdupq_n_qs16(v); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template float16x8_t internal_vld1q(const float16_t *in); @@ -115,7 +115,7 @@ inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, co ARM_COMPUTE_UNUSED(fixed_point_position); return vaddq_f16(x, vmulq_f16(y, z)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template float32x4_t internal_vld1q(const float *in); @@ -429,7 +429,7 @@ public: } }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template void accumulate_results(float16_t *buffer, const float16x8x2_t &values); @@ -453,7 +453,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values) vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0]))); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template float32x4x2_t convolve_5x5(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4, @@ -1064,9 +1064,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens { switch(input->info()->data_type()) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: case DataType::QS16: _num_elems_written_per_iteration = 8; @@ -1099,9 +1099,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens _num_elems_read_per_iteration = 12; _num_elems_written_per_iteration = 16 >> conv_stride_x; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: case DataType::QS16: _num_weight_elems_read_per_row = 8 + _kernel_size - 1; @@ -1163,11 +1163,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_1x1(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: convolve_1x1(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; @@ -1184,11 +1184,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_3x3(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: convolve_3x3(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index e61f95221f..683510879b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -108,7 +108,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI in0_out, in1); break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { execute_window_loop(window, [&](const Coordinates & id) @@ -128,7 +128,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI in0_out, in1); break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: { execute_window_loop(window, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index c94d3b1416..dfba74355b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -66,7 +66,7 @@ void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &wi in, out); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &window, float beta) { const float16x8_t beta_f16 = vdupq_n_f16(beta); @@ -89,7 +89,7 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi }, in, out); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta) { @@ -167,10 +167,10 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output _func = &matrix_addition_qs16; break; case DataType::F16: -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC _func = &matrix_addition_f16; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 4fcf6e2f37..a583c1dfd4 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -53,7 +53,7 @@ namespace template void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto width_matrix_b = static_cast(output->info()->dimension(0)); const auto in_b_stride = static_cast(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast(input0->info()->dimension(0)); @@ -186,7 +186,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT }, ina, inb, out); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); @@ -194,7 +194,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_ERROR("Not implemented"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } template @@ -915,7 +915,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT template void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()); const size_t out_stride = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type()); const int num_elems_matrix_b_x = input1->info()->dimension(0); @@ -1051,14 +1051,14 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT vst1q_f16(mtx_out + 3 * out_stride, c.val[3]); }, ina, inb, out); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_ERROR("Not implemented"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } template @@ -1454,13 +1454,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor num_elems_processed_per_iteration_x = 16; break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { num_elems_processed_per_iteration_x = 32; break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1503,13 +1503,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor num_elems_processed_per_iteration_x = 8; break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { num_elems_processed_per_iteration_x = 8; break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1563,14 +1563,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf vector_matrix_multiply_qs16(_input0, _input1, _output, window, info, _alpha); break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { multiply_alpha ? vector_matrix_multiply_f16(_input0, _input1, _output, window, info, _alpha) : vector_matrix_multiply_f16(_input0, _input1, _output, window, info, _alpha); break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1600,14 +1600,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf matrix_matrix_multiply_qs16(_input0, _input1, _output, window, _alpha); break; } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { multiply_alpha ? matrix_matrix_multiply_f16(_input0, _input1, _output, window, _alpha) : matrix_matrix_multiply_f16(_input0, _input1, _output, window, _alpha); break; } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: { ARM_COMPUTE_ERROR("Data type not supported"); diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index d8440e333e..14fa1b492f 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -39,7 +39,7 @@ using namespace arm_compute; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template class arm_compute::NEHarrisScoreFP16Kernel<3>; template class arm_compute::NEHarrisScoreFP16Kernel<5>; @@ -361,7 +361,7 @@ void NEHarrisScoreFP16Kernel::configure(const IImage *input1, const INEKernel::configure(win); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template class arm_compute::NEHarrisScoreKernel<3>; template class arm_compute::NEHarrisScoreKernel<5>; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 099f2f1be3..b03d5b2d03 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -311,11 +311,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F32: _func = &NEIm2ColKernel::run_reduced; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: _func = &NEIm2ColKernel::run_reduced; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: _func = &NEIm2ColKernel::run_reduced; break; @@ -334,11 +334,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F32: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 99b4250bb9..52e30066de 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -51,7 +51,7 @@ namespace { void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto width_matrix_b = static_cast(output->info()->dimension(0)); const auto in_b_stride = static_cast(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast(input0->info()->dimension(0)); @@ -160,14 +160,14 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT vst1q_f16(vec_out + 24, acc3); }, ina, out); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR("Not supported, recompile with -march=armv8.2-a+fp16+simd."); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 646cb84444..2d7c29d9a0 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -51,7 +51,7 @@ constexpr float COEFF1 = 0.0663f; constexpr float COEFF2 = 0.2447f; } // namespace -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC namespace fp16 { inline float16x8_t inv(float16x8_t x) @@ -429,7 +429,7 @@ template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ namespace { diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index c4517dafaa..8f97e6ac16 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -41,7 +41,7 @@ namespace arm_compute class Coordinates; } // namespace arm_compute -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC namespace fp16 { inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask) @@ -224,7 +224,7 @@ void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITenso INEKernel::configure(win); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ namespace { diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index f6f3d5f238..a409519114 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -232,7 +232,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window) }, input, input_squared, output); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC else if(dt == DataType::F16) { const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff()); @@ -268,7 +268,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window) }, input, input_squared, output); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ else { ARM_COMPUTE_ERROR("Not supported"); diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index c622d4ffc2..a2f3cffdf3 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -37,9 +37,9 @@ #include #include -#if ARM_COMPUTE_AARCH64_V8_2 +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #include // needed for float16_t -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ using namespace arm_compute; @@ -334,7 +334,7 @@ void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict template void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto input1 = static_cast(input1_ptr); const auto input2 = static_cast(input2_ptr); const auto output = static_cast(output_ptr); @@ -349,13 +349,13 @@ void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict } }; vst2q_f16(output, result); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(input1_ptr); ARM_COMPUTE_UNUSED(input2_ptr); ARM_COMPUTE_UNUSED(output_ptr); ARM_COMPUTE_UNUSED(scale); ARM_COMPUTE_ERROR("Not supported. Recompile the library with arch=arm64-v8.2-a."); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } template diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 122540b07e..3ea5bb5870 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -181,7 +181,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons } num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: switch(pool_size) { @@ -200,7 +200,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons break; } break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: switch(pool_size) { @@ -544,7 +544,7 @@ void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window template void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC Iterator input(_input, window_input); Iterator output(_output, window); @@ -603,17 +603,17 @@ void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window *(reinterpret_cast(output.ptr())) = vget_lane_f16(res, 0); }, input, output); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(window_input); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } template void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC Iterator input(_input, window_input); Iterator output(_output, window); constexpr int pool_size = 2; @@ -662,11 +662,11 @@ void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window vst1q_f16(reinterpret_cast(output.ptr()), res); }, input, output); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ ARM_COMPUTE_UNUSED(window_input); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } template diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 73aba284ca..f1027590e4 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -106,7 +106,7 @@ void logits_1d_max_qs16(const ITensor *in, ITensor *out, const Window &window) while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window) { Window in_slice = window.first_slice_window_1D(); @@ -138,7 +138,7 @@ void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window) } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void logits_1d_max_f32(const ITensor *in, ITensor *out, const Window &window) { @@ -213,10 +213,10 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) _func = &logits_1d_max_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC _func = &logits_1d_max_f16; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -374,7 +374,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) { Window window_max(window); @@ -434,7 +434,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) { @@ -532,10 +532,10 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor _func = &logits_1d_shift_exp_sum_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC _func = &logits_1d_shift_exp_sum_f16; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Unsupported data type."); break; @@ -637,7 +637,7 @@ void logits_1d_norm_qs16(const ITensor *in, const ITensor *sum, ITensor *out, co } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window) { Window window_sum(window); @@ -668,7 +668,7 @@ void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, con } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void logits_1d_norm_f32(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window) { @@ -738,10 +738,10 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I _func = &logits_1d_norm_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC _func = &logits_1d_norm_f16; break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Unsupported data type."); break; diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp index 0e73b29c83..6344da8853 100644 --- a/tests/benchmark/NEON/ActivationLayer.cpp +++ b/tests/benchmark/NEON/ActivationLayer.cpp @@ -45,11 +45,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEActivationLayerFixture = ActivationLayerFixture; diff --git a/tests/benchmark/NEON/BatchNormalizationLayer.cpp b/tests/benchmark/NEON/BatchNormalizationLayer.cpp index 84f3ccc3e4..5b568de5f2 100644 --- a/tests/benchmark/NEON/BatchNormalizationLayer.cpp +++ b/tests/benchmark/NEON/BatchNormalizationLayer.cpp @@ -41,11 +41,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEBatchNormalizationLayerFixture = BatchNormalizationLayerFixture; diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp index 33cc3bc3ad..768a2907ae 100644 --- a/tests/benchmark/NEON/ConvolutionLayer.cpp +++ b/tests/benchmark/NEON/ConvolutionLayer.cpp @@ -45,11 +45,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEConvolutionLayerFixture = ConvolutionLayerFixture; diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp index dda65882a4..c914d26861 100644 --- a/tests/benchmark/NEON/FullyConnectedLayer.cpp +++ b/tests/benchmark/NEON/FullyConnectedLayer.cpp @@ -43,11 +43,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEFullyConnectedLayerFixture = FullyConnectedLayerFixture; diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp index fb43b9cf0d..9c4cd5936d 100644 --- a/tests/benchmark/NEON/GEMM.cpp +++ b/tests/benchmark/NEON/GEMM.cpp @@ -43,9 +43,9 @@ namespace { const auto data_types = framework::dataset::make("DataType", { -#if ARM_COMPUTE_AARCH64_V8_2 +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8 }); diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp index 22957e4a2e..3b1720dc95 100644 --- a/tests/benchmark/NEON/NormalizationLayer.cpp +++ b/tests/benchmark/NEON/NormalizationLayer.cpp @@ -40,11 +40,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NENormalizationLayerFixture = NormalizationLayerFixture; diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp index aa1ca5b42b..4815959ae6 100644 --- a/tests/benchmark/NEON/PoolingLayer.cpp +++ b/tests/benchmark/NEON/PoolingLayer.cpp @@ -45,11 +45,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEPoolingLayerFixture = PoolingLayerFixture; diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp index 1b90fbddfb..ad16d47b46 100644 --- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp +++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp @@ -45,11 +45,11 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 }); -#else /* ARM_COMPUTE_AARCH64_V8_2 */ +#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 }); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace using NEAlexNetFixture = AlexNetFixture tolerance(DataType data_type, ActivationLayerInfo::Acti /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8, DataType::QS16, @@ -141,7 +141,7 @@ template using NEActivationLayerFixture = ActivationValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType", @@ -158,7 +158,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerFixture, framework::Data validate(Accessor(_target), _reference, tolerance(_data_type, _function)); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index 6bffd02ea2..4431371326 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -52,10 +52,10 @@ const auto ArithmeticAdditionQS8Dataset = combine(combine(framework::dataset::ma framework::dataset::make("DataType", DataType::QS8)); const auto ArithmeticAdditionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)), framework::dataset::make("DataType", DataType::QS16)); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); } // namespace @@ -180,7 +180,7 @@ TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) @@ -189,7 +189,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::D validate(Accessor(_target), _reference); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp index fcd415b130..0c2a7be60b 100644 --- a/tests/validation/NEON/ArithmeticSubtraction.cpp +++ b/tests/validation/NEON/ArithmeticSubtraction.cpp @@ -59,10 +59,10 @@ const auto ArithmeticSubtractionQS8Dataset = combine(combine(framework::dataset: framework::dataset::make("DataType", DataType::QS8)); const auto ArithmeticSubtractionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)), framework::dataset::make("DataType", DataType::QS16)); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); } // namespace @@ -254,7 +254,7 @@ TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) @@ -263,7 +263,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture, framework validate(Accessor(_target), _reference); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index 163f71f375..b401f5585e 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -44,9 +44,9 @@ namespace validation namespace { constexpr AbsoluteTolerance tolerance_f32(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ constexpr AbsoluteTolerance tolerance_qs8(3.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */ constexpr AbsoluteTolerance tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */ } // namespace @@ -89,7 +89,7 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framewor } TEST_SUITE_END() -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(Float16) FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), framework::dataset::make("DataType", DataType::F16))) @@ -98,7 +98,7 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework validate(Accessor(_target), _reference, tolerance_f16, 0); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(Quantized) template diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 3003409062..5e14a7c3cc 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -44,17 +44,17 @@ namespace validation namespace { const AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const AbsoluteTolerance tolerance_q(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */ /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8, DataType::QS16, @@ -104,7 +104,7 @@ template using NEConvolutionLayerFixture = ConvolutionValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true, false })), @@ -121,7 +121,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixture, framework::Dat validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(), diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp index 6b0f311ec5..9a0a34f8f8 100644 --- a/tests/validation/NEON/DepthConcatenateLayer.cpp +++ b/tests/validation/NEON/DepthConcatenateLayer.cpp @@ -48,7 +48,7 @@ template using NEDepthConcatenateLayerFixture = DepthConcatenateValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16))) @@ -63,7 +63,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework validate(Accessor(_target), _reference); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp index 23c02cb0d5..52e2b2c034 100644 --- a/tests/validation/NEON/DirectConvolutionLayer.cpp +++ b/tests/validation/NEON/DirectConvolutionLayer.cpp @@ -43,9 +43,9 @@ namespace validation namespace { constexpr AbsoluteTolerance tolerance_qs(1.f); /**< Tolerance for fixed point tests */ -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance tolerance_fp16(0.01f); /**< Tolerance for half precision floating point tests */ -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ /** Direct convolution data set. */ @@ -97,7 +97,7 @@ template using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F16))) { @@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture, framework::Da validate(Accessor(_target), _reference, tolerance_fp16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32))) diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp index 72da22e034..d80faf49db 100644 --- a/tests/validation/NEON/Flatten.cpp +++ b/tests/validation/NEON/Flatten.cpp @@ -62,7 +62,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture, framework::Datase } TEST_SUITE_END() -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()), framework::dataset::make("DataType", DataType::F16))) @@ -77,7 +77,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture, framework::Dataset validate(Accessor(_target), _reference); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE_END() TEST_SUITE(Quantized) diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index ec3422cee7..2ff432b2d3 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -44,18 +44,18 @@ namespace { /** Tolerance for float operations */ constexpr RelativeTolerance tolerance_f32(0.01f); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr RelativeTolerance tolerance_f16(0.01f); -#endif /* ARM_COMPUTE_AARCH64_V8_2*/ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ /** Tolerance for fixed point operations */ constexpr AbsoluteTolerance tolerance_fixed_point(1.f); /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8, DataType::QS16, @@ -119,7 +119,7 @@ template using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), @@ -136,7 +136,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework:: validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index d5e3aee363..129544e2f1 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -49,9 +49,9 @@ constexpr AbsoluteTolerance tolerance_q(1.0f); /**< Tolerance value for /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8, DataType::QS16, @@ -89,7 +89,7 @@ template using NEGEMMFixture = GEMMValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F16))) { @@ -103,7 +103,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture, framework::DatasetMode::NI validate(Accessor(_target), _reference, tolerance_f); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32))) diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp index 1ad32bed4d..e5770e64a4 100644 --- a/tests/validation/NEON/HarrisCorners.cpp +++ b/tests/validation/NEON/HarrisCorners.cpp @@ -46,9 +46,9 @@ namespace { const auto use_fp16 = framework::dataset::make("UseFP16", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC true, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ false }); diff --git a/tests/validation/NEON/Magnitude.cpp b/tests/validation/NEON/Magnitude.cpp index cdc29a58dc..3b7562b61c 100644 --- a/tests/validation/NEON/Magnitude.cpp +++ b/tests/validation/NEON/Magnitude.cpp @@ -45,13 +45,13 @@ AbsoluteTolerance tolerance(MagnitudeType magnitude_type) return AbsoluteTolerance((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <> AbsoluteTolerance tolerance(MagnitudeType magnitude_type) { return AbsoluteTolerance((MagnitudeType::L1NORM == magnitude_type) ? half(0.0) : half(1.0)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace @@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEMagnitudeFixture, framework::Dataset } TEST_SUITE_END() // S16 -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)), @@ -116,7 +116,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture, framework validate(Accessor(_target), _reference, tolerance(_magnitude_type)); } TEST_SUITE_END() // F16 -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp index 2bc26810d5..e22922cf8a 100644 --- a/tests/validation/NEON/NormalizationLayer.cpp +++ b/tests/validation/NEON/NormalizationLayer.cpp @@ -44,9 +44,9 @@ namespace validation namespace { /** Tolerance for float operations */ -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance tolerance_f16(0.001f); -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ constexpr AbsoluteTolerance tolerance_f32(0.00001f); /** Tolerance for fixed point operations */ constexpr AbsoluteTolerance tolerance_qs8(2); @@ -66,7 +66,7 @@ template using NENormalizationLayerFixture = NormalizationValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16))) { @@ -79,7 +79,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixture, framework::D validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32))) diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index d7131068a9..8d2ece539b 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -186,7 +186,7 @@ TEST_SUITE_END() // ScaleOther TEST_SUITE_END() // S16toS16 -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16toF16) TEST_SUITE(Scale255) @@ -194,7 +194,7 @@ PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ constexpr AbsoluteTolerance tolerance_qs8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ constexpr AbsoluteTolerance tolerance_qs16(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ } // namespace @@ -85,7 +85,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture, framework::Datase } TEST_SUITE_END() -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", DataType::F16)))) @@ -100,7 +100,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture, framework::Dataset validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE_END() template diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index fde5867a3d..9d1795ebb0 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -44,18 +44,18 @@ namespace { /** Tolerance for float operations */ constexpr AbsoluteTolerance tolerance_f32(0.000001f); -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance tolerance_f16(0.0001f); -#endif /* ARM_COMPUTE_AARCH64_V8_2*/ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ /** Tolerance for fixed point operations */ constexpr AbsoluteTolerance tolerance_fixed_point(2); /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ DataType::F32, DataType::QS8, DataType::QS16, @@ -97,7 +97,7 @@ template using NESoftmaxLayerFixture = SoftmaxValidationFixture; TEST_SUITE(Float) -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) { @@ -110,7 +110,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture, framework::Dataset validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32))) -- cgit v1.2.1