aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorIoan-Cristian Szabo <ioan-cristian.szabo@arm.com>2017-11-13 13:34:08 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit5edbd1c5dce43b66f30c903797a91e39369c5b62 (patch)
treed4c20c1a92ff9a7e26ffc9b1f6976ee12a2e2ae0 /src/core
parent84b51ad1aaa530d397761f2b6da65add9dc8a6b0 (diff)
downloadComputeLibrary-5edbd1c5dce43b66f30c903797a91e39369c5b62.tar.gz
COMPMID-556: Add support to build arm64-v8.2-a for Android platform (clang compiler)
Change-Id: Ibb779dd3a8d10786da6d8f70590e654e14654d7b Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95530 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/NEON/kernels/NEAccumulateKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEBox3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NECannyEdgeKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp24
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp28
-rw-r--r--src/core/NEON/kernels/NEHarrisCornersKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp16
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp24
21 files changed, 102 insertions, 102 deletions
diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp
index 856e3acb35..dae08008fd 100644
--- a/src/core/NEON/kernels/NEAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp
@@ -41,7 +41,7 @@ class Coordinates;
/* Max S16 value used for saturation purposes. */
const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast<uint16_t>(INT16_MAX));
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
namespace fp16
{
inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input)
@@ -132,7 +132,7 @@ void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo
},
input, accum);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
namespace
{
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 8dfce0f7b5..94bd5f15e3 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -81,7 +81,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
};
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
// Activation functions : FP16
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
{
@@ -96,7 +96,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
{ ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
};
-#endif /* ARM_COMPUTE_AARCH64_V8_2*/
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
// Activation functions : QS8
static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
@@ -140,11 +140,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
case DataType::F32:
_func = act_map_f32[activation_info.activation()];
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = act_map_f16[activation_info.activation()];
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
}
@@ -174,7 +174,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
ICPPKernel::configure(win);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
{
@@ -305,7 +305,7 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
},
input, output);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index 02fabcaff8..8e55994aaa 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -145,7 +145,7 @@ inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b)
return res;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
{
const float16x8x2_t res =
@@ -158,11 +158,11 @@ inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
return res;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Iterator input1(in1, window);
Iterator input2(in2, window);
Iterator output(out, window);
@@ -175,13 +175,13 @@ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const
vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vadd2q_f16(a, b));
},
input1, input2, output);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(in1);
ARM_COMPUTE_UNUSED(in2);
ARM_COMPUTE_UNUSED(out);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 8aadab9fc3..1d86a35cc4 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -157,7 +157,7 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o
input1, input2, output);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
{
const float16x8x2_t res =
@@ -170,11 +170,11 @@ inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
return res;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Iterator input1(in1, window);
Iterator input2(in2, window);
Iterator output(out, window);
@@ -187,13 +187,13 @@ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const
vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
},
input1, input2, output);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(in1);
ARM_COMPUTE_UNUSED(in2);
ARM_COMPUTE_UNUSED(out);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index c48653ad17..1123f2c9ca 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -169,7 +169,7 @@ void batch_normalization_fp32(ITensor *in, ITensor *out, const ITensor *mean, co
input, output);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window)
{
Iterator input(in, window);
@@ -212,7 +212,7 @@ void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, co
},
input, output);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon)
{
@@ -258,11 +258,11 @@ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output,
num_elems_processed_per_iteration = 4;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &batch_normalization_fp16;
num_elems_processed_per_iteration = 8;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Element size not supported");
break;
diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
index be8beaeacb..0c9700526b 100644
--- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
@@ -33,7 +33,7 @@
using namespace arm_compute;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -104,7 +104,7 @@ void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info)
},
input, output);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
BorderSize NEBox3x3Kernel::border_size() const
{
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index 944f29d506..9dfd580a25 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -51,7 +51,7 @@ constexpr int EDGE = 255;
constexpr int MAYBE = 127;
} // namespace
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
namespace fp16
{
inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
@@ -787,7 +787,7 @@ void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITens
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
namespace
{
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index 06620d45aa..f00af9f93e 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -124,7 +124,7 @@ inline qint32x4_t internal_vqaddq(const qint32x4_t &x, const qint32x4_t &y)
return vqaddq_qs32(x, y);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline float16x8_t internal_vld1q(const float16_t *in)
{
return vld1q_f16(in);
@@ -141,7 +141,7 @@ inline float16x8_t internal_vqaddq(const float16x8_t &x, const float16x8_t &y)
{
return vaddq_f16(x, y);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
template <typename T1, typename T2, bool in_place>
void accumulate_bias(ITensor *input, const ITensor *bias, const Window window, ITensor *output)
@@ -246,13 +246,13 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con
_func = (output == nullptr) ? &accumulate_bias<qint32_t, qint16_t, true> : &accumulate_bias<qint32_t, qint16_t, false>;
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
_func = (output == nullptr) ? &accumulate_bias<float16_t, float16_t, true> : &accumulate_bias<float16_t, float16_t, false>;
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::F32:
{
_func = (output == nullptr) ? &accumulate_bias<float, float, true> : &accumulate_bias<float, float, false>;
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index bf23c4b2d2..78afbc2c20 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -70,7 +70,7 @@ inline qint16x8_t internal_vdupq_n(qint16_t v)
return vdupq_n_qs16(v);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <unsigned int stridex>
float16x8_t internal_vld1q(const float16_t *in);
@@ -115,7 +115,7 @@ inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, co
ARM_COMPUTE_UNUSED(fixed_point_position);
return vaddq_f16(x, vmulq_f16(y, z));
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
template <unsigned int stridex>
float32x4_t internal_vld1q(const float *in);
@@ -429,7 +429,7 @@ public:
}
};
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <unsigned int stridex>
void accumulate_results(float16_t *buffer, const float16x8x2_t &values);
@@ -453,7 +453,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values)
vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0])));
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
template <unsigned int stridex>
float32x4x2_t convolve_5x5(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
@@ -1064,9 +1064,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
{
switch(input->info()->data_type())
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
case DataType::QS16:
_num_elems_written_per_iteration = 8;
@@ -1099,9 +1099,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
_num_elems_read_per_iteration = 12;
_num_elems_written_per_iteration = 16 >> conv_stride_x;
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
case DataType::QS16:
_num_weight_elems_read_per_row = 8 + _kernel_size - 1;
@@ -1163,11 +1163,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
case DataType::F32:
convolve_1x1<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
convolve_1x1<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
@@ -1184,11 +1184,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo
case DataType::F32:
convolve_3x3<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
convolve_3x3<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index e61f95221f..683510879b 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -108,7 +108,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI
in0_out, in1);
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
execute_window_loop(window, [&](const Coordinates & id)
@@ -128,7 +128,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI
in0_out, in1);
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
{
execute_window_loop(window, [&](const Coordinates & id)
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index c94d3b1416..dfba74355b 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -66,7 +66,7 @@ void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &wi
in, out);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &window, float beta)
{
const float16x8_t beta_f16 = vdupq_n_f16(beta);
@@ -89,7 +89,7 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi
},
in, out);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta)
{
@@ -167,10 +167,10 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
_func = &matrix_addition_qs16;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &matrix_addition_f16;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index 4fcf6e2f37..a583c1dfd4 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -53,7 +53,7 @@ namespace
template <bool multiply_alpha>
void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -186,7 +186,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
},
ina, inb, out);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
@@ -194,7 +194,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
template <bool multiply_alpha>
@@ -915,7 +915,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT
template <bool multiply_alpha>
void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
const size_t out_stride = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type());
const int num_elems_matrix_b_x = input1->info()->dimension(0);
@@ -1051,14 +1051,14 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
vst1q_f16(mtx_out + 3 * out_stride, c.val[3]);
},
ina, inb, out);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
template <bool multiply_alpha>
@@ -1454,13 +1454,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor
num_elems_processed_per_iteration_x = 16;
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
num_elems_processed_per_iteration_x = 32;
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1503,13 +1503,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor
num_elems_processed_per_iteration_x = 8;
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
num_elems_processed_per_iteration_x = 8;
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1563,14 +1563,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, info, _alpha);
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
multiply_alpha ? vector_matrix_multiply_f16<true>(_input0, _input1, _output, window, info, _alpha) :
vector_matrix_multiply_f16<false>(_input0, _input1, _output, window, info, _alpha);
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
@@ -1600,14 +1600,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf
matrix_matrix_multiply_qs16<false>(_input0, _input1, _output, window, _alpha);
break;
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
multiply_alpha ? matrix_matrix_multiply_f16<true>(_input0, _input1, _output, window, _alpha) :
matrix_matrix_multiply_f16<false>(_input0, _input1, _output, window, _alpha);
break;
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
{
ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
index d8440e333e..14fa1b492f 100644
--- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
@@ -39,7 +39,7 @@
using namespace arm_compute;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template class arm_compute::NEHarrisScoreFP16Kernel<3>;
template class arm_compute::NEHarrisScoreFP16Kernel<5>;
@@ -361,7 +361,7 @@ void NEHarrisScoreFP16Kernel<block_size>::configure(const IImage *input1, const
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
template class arm_compute::NEHarrisScoreKernel<3>;
template class arm_compute::NEHarrisScoreKernel<5>;
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 099f2f1be3..b03d5b2d03 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -311,11 +311,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
case DataType::F32:
_func = &NEIm2ColKernel::run_reduced<float>;
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = &NEIm2ColKernel::run_reduced<float16_t>;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
_func = &NEIm2ColKernel::run_reduced<qint8_t>;
break;
@@ -334,11 +334,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
case DataType::F32:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QS8:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
break;
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 99b4250bb9..52e30066de 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -51,7 +51,7 @@ namespace
{
void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const auto width_matrix_b = static_cast<int>(output->info()->dimension(0));
const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -160,14 +160,14 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT
vst1q_f16(vec_out + 24, acc3);
},
ina, out);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(input0);
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR("Not supported, recompile with -march=armv8.2-a+fp16+simd.");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
index 646cb84444..2d7c29d9a0 100644
--- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
+++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
@@ -51,7 +51,7 @@ constexpr float COEFF1 = 0.0663f;
constexpr float COEFF2 = 0.2447f;
} // namespace
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
namespace fp16
{
inline float16x8_t inv(float16x8_t x)
@@ -429,7 +429,7 @@ template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, Ph
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::SIGNED>;
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, PhaseType::UNSIGNED>;
template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::UNSIGNED>;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
namespace
{
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
index c4517dafaa..8f97e6ac16 100644
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
@@ -41,7 +41,7 @@ namespace arm_compute
class Coordinates;
} // namespace arm_compute
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
namespace fp16
{
inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
@@ -224,7 +224,7 @@ void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITenso
INEKernel::configure(win);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
namespace
{
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index f6f3d5f238..a409519114 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -232,7 +232,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window)
},
input, input_squared, output);
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
else if(dt == DataType::F16)
{
const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff());
@@ -268,7 +268,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window)
},
input, input_squared, output);
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
else
{
ARM_COMPUTE_ERROR("Not supported");
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index c622d4ffc2..a2f3cffdf3 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -37,9 +37,9 @@
#include <cstdint>
#include <cstdlib>
-#if ARM_COMPUTE_AARCH64_V8_2
+#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#include <arm_fp16.h> // needed for float16_t
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
using namespace arm_compute;
@@ -334,7 +334,7 @@ void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict
template <bool is_scale255, bool is_sat>
void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const auto input1 = static_cast<const float16_t *__restrict>(input1_ptr);
const auto input2 = static_cast<const float16_t *__restrict>(input2_ptr);
const auto output = static_cast<float16_t *__restrict>(output_ptr);
@@ -349,13 +349,13 @@ void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict
}
};
vst2q_f16(output, result);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(input1_ptr);
ARM_COMPUTE_UNUSED(input2_ptr);
ARM_COMPUTE_UNUSED(output_ptr);
ARM_COMPUTE_UNUSED(scale);
ARM_COMPUTE_ERROR("Not supported. Recompile the library with arch=arm64-v8.2-a.");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
template <bool is_scale255, bool is_sat>
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 122540b07e..3ea5bb5870 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -181,7 +181,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
break;
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
switch(pool_size)
{
@@ -200,7 +200,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
break;
}
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::F32:
switch(pool_size)
{
@@ -544,7 +544,7 @@ void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Iterator input(_input, window_input);
Iterator output(_output, window);
@@ -603,17 +603,17 @@ void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window
*(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
},
input, output);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(window_input);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)
{
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Iterator input(_input, window_input);
Iterator output(_output, window);
constexpr int pool_size = 2;
@@ -662,11 +662,11 @@ void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window
vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
},
input, output);
-#else /* ARM_COMPUTE_AARCH64_V8_2 */
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
ARM_COMPUTE_UNUSED(window_input);
ARM_COMPUTE_UNUSED(window);
ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
}
template <PoolingType pooling_type, bool exclude_padding>
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 73aba284ca..f1027590e4 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -106,7 +106,7 @@ void logits_1d_max_qs16(const ITensor *in, ITensor *out, const Window &window)
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window)
{
Window in_slice = window.first_slice_window_1D();
@@ -138,7 +138,7 @@ void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window)
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void logits_1d_max_f32(const ITensor *in, ITensor *out, const Window &window)
{
@@ -213,10 +213,10 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output)
_func = &logits_1d_max_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &logits_1d_max_f16;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
}
@@ -374,7 +374,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
{
Window window_max(window);
@@ -434,7 +434,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
{
@@ -532,10 +532,10 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor
_func = &logits_1d_shift_exp_sum_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &logits_1d_shift_exp_sum_f16;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
break;
@@ -637,7 +637,7 @@ void logits_1d_norm_qs16(const ITensor *in, const ITensor *sum, ITensor *out, co
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
}
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
{
Window window_sum(window);
@@ -668,7 +668,7 @@ void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, con
}
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
}
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
void logits_1d_norm_f32(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
{
@@ -738,10 +738,10 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I
_func = &logits_1d_norm_f32;
break;
case DataType::F16:
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
_func = &logits_1d_norm_f16;
break;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Unsupported data type.");
break;