From 33fd07bd27be3cba183b7cacef63ea220c770c23 Mon Sep 17 00:00:00 2001 From: Ioan-Cristian Szabo Date: Thu, 26 Oct 2017 15:42:24 +0100 Subject: COMPMID-634: Enable clang with libc++ to compile for Android (32 and 64 bits) Change-Id: I693f64e70cd478e93675a8b04360128ded3b60d4 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93015 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- src/core/NEON/kernels/NEAccumulateKernel.cpp | 4 ++-- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 12 +++++----- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 10 ++++---- .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 10 ++++---- .../kernels/NEBatchNormalizationLayerKernel.cpp | 8 +++---- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 4 ++-- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 4 ++-- ...EDirectConvolutionLayerBiasAccumulateKernel.cpp | 8 +++---- .../kernels/NEDirectConvolutionLayerKernel.cpp | 24 +++++++++---------- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 4 ++-- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 8 +++---- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 28 +++++++++++----------- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 4 ++-- src/core/NEON/kernels/NEIm2ColKernel.cpp | 8 +++---- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 6 ++--- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 4 ++-- .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 4 ++-- .../NEON/kernels/NENormalizationLayerKernel.cpp | 4 ++-- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 10 ++++---- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 16 ++++++------- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 24 +++++++++---------- .../kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp | 4 ++++ 22 files changed, 106 insertions(+), 102 deletions(-) (limited to 'src/core/NEON') diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index deafabe1d4..856e3acb35 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -41,7 +41,7 @@ class Coordinates; /* Max S16 value used for saturation purposes. */ const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast(INT16_MAX)); -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 namespace fp16 { inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input) @@ -132,7 +132,7 @@ void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo }, input, accum); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ namespace { diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 67fc45bc70..8dfce0f7b5 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -81,7 +81,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 // Activation functions : FP16 static std::map act_map_f16 = { @@ -96,7 +96,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; -#endif /* ARM_COMPUTE_ENABLE_FP16*/ +#endif /* ARM_COMPUTE_AARCH64_V8_2*/ // Activation functions : QS8 static std::map act_map_qs8 = @@ -140,11 +140,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat case DataType::F32: _func = act_map_f32[activation_info.activation()]; break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: _func = act_map_f16[activation_info.activation()]; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -174,7 +174,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat ICPPKernel::configure(win); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { @@ -305,7 +305,7 @@ typename std::enable_if::value, void>::type NEActivat }, input, output); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index f263fd0df2..02fabcaff8 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -145,7 +145,7 @@ inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b) return res; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) { const float16x8x2_t res = @@ -158,11 +158,11 @@ inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) return res; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 Iterator input1(in1, window); Iterator input2(in2, window); Iterator output(out, window); @@ -175,13 +175,13 @@ void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const vst2q_f16(reinterpret_cast(output.ptr()), vadd2q_f16(a, b)); }, input1, input2, output); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(in1); ARM_COMPUTE_UNUSED(in2); ARM_COMPUTE_UNUSED(out); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 85f72c1421..8aadab9fc3 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -157,7 +157,7 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o input1, input2, output); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) { const float16x8x2_t res = @@ -170,11 +170,11 @@ inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b) return res; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 Iterator input1(in1, window); Iterator input2(in2, window); Iterator output(out, window); @@ -187,13 +187,13 @@ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const vst2q_f16(reinterpret_cast(output.ptr()), vsub2q_f16(a, b)); }, input1, input2, output); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(in1); ARM_COMPUTE_UNUSED(in2); ARM_COMPUTE_UNUSED(out); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index f6f6f9cb61..c48653ad17 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -169,7 +169,7 @@ void batch_normalization_fp32(ITensor *in, ITensor *out, const ITensor *mean, co input, output); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window) { Iterator input(in, window); @@ -212,7 +212,7 @@ void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, co }, input, output); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon) { @@ -258,11 +258,11 @@ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, num_elems_processed_per_iteration = 4; break; case DataType::F16: -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 _func = &batch_normalization_fp16; num_elems_processed_per_iteration = 8; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Element size not supported"); break; diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index d7178e4690..be8beaeacb 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -33,7 +33,7 @@ using namespace arm_compute; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -104,7 +104,7 @@ void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) }, input, output); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ BorderSize NEBox3x3Kernel::border_size() const { diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index bcbe790fd0..944f29d506 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -51,7 +51,7 @@ constexpr int EDGE = 255; constexpr int MAYBE = 127; } // namespace -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 namespace fp16 { inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) @@ -787,7 +787,7 @@ void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITens INEKernel::configure(win); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ namespace { diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp index 6631359341..06620d45aa 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp @@ -124,7 +124,7 @@ inline qint32x4_t internal_vqaddq(const qint32x4_t &x, const qint32x4_t &y) return vqaddq_qs32(x, y); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 inline float16x8_t internal_vld1q(const float16_t *in) { return vld1q_f16(in); @@ -141,7 +141,7 @@ inline float16x8_t internal_vqaddq(const float16x8_t &x, const float16x8_t &y) { return vaddq_f16(x, y); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ template void accumulate_bias(ITensor *input, const ITensor *bias, const Window window, ITensor *output) @@ -246,13 +246,13 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::F32: { _func = (output == nullptr) ? &accumulate_bias : &accumulate_bias; diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 2766d698d9..8642a19f39 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -68,7 +68,7 @@ inline qint16x8_t internal_vdupq_n(qint16_t v) return vdupq_n_qs16(v); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 template float16x8_t internal_vld1q(const float16_t *in); @@ -113,7 +113,7 @@ inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, co ARM_COMPUTE_UNUSED(fixed_point_position); return vaddq_f16(x, vmulq_f16(y, z)); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ template float32x4_t internal_vld1q(const float *in); @@ -427,7 +427,7 @@ public: } }; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 inline float16x8x3_t load_matrix_row(const float16_t *ptr) { /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes: @@ -567,7 +567,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values) vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0]))); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ inline float32x4x3_t load_matrix_row(const float *ptr) { @@ -1433,9 +1433,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens { switch(input->info()->data_type()) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::QS8: case DataType::QS16: _num_elems_written_per_iteration = 8; @@ -1468,9 +1468,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens _num_elems_read_per_iteration = 12; _num_elems_written_per_iteration = 16 >> conv_stride_x; break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::QS8: case DataType::QS16: _num_weight_elems_read_per_row = 8 + _kernel_size - 1; @@ -1532,11 +1532,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_1x1(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: convolve_1x1(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; @@ -1553,11 +1553,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_3x3(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: convolve_3x3(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index fb07cb0333..e61f95221f 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -108,7 +108,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI in0_out, in1); break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { execute_window_loop(window, [&](const Coordinates & id) @@ -128,7 +128,7 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadI in0_out, in1); break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::QS8: { execute_window_loop(window, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 9dbce1de2f..c94d3b1416 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -66,7 +66,7 @@ void matrix_addition_f32(const ITensor *input, ITensor *output, const Window &wi in, out); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &window, float beta) { const float16x8_t beta_f16 = vdupq_n_f16(beta); @@ -89,7 +89,7 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi }, in, out); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta) { @@ -167,10 +167,10 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output _func = &matrix_addition_qs16; break; case DataType::F16: -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 _func = &matrix_addition_f16; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 69090825fa..4fcf6e2f37 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -53,7 +53,7 @@ namespace template void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 const auto width_matrix_b = static_cast(output->info()->dimension(0)); const auto in_b_stride = static_cast(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast(input0->info()->dimension(0)); @@ -186,7 +186,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT }, ina, inb, out); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); @@ -194,7 +194,7 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_ERROR("Not implemented"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } template @@ -915,7 +915,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT template void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 const size_t in_b_stride = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()); const size_t out_stride = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type()); const int num_elems_matrix_b_x = input1->info()->dimension(0); @@ -1051,14 +1051,14 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT vst1q_f16(mtx_out + 3 * out_stride, c.val[3]); }, ina, inb, out); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_ERROR("Not implemented"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } template @@ -1454,13 +1454,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor num_elems_processed_per_iteration_x = 16; break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { num_elems_processed_per_iteration_x = 32; break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1503,13 +1503,13 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor num_elems_processed_per_iteration_x = 8; break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { num_elems_processed_per_iteration_x = 8; break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1563,14 +1563,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf vector_matrix_multiply_qs16(_input0, _input1, _output, window, info, _alpha); break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { multiply_alpha ? vector_matrix_multiply_f16(_input0, _input1, _output, window, info, _alpha) : vector_matrix_multiply_f16(_input0, _input1, _output, window, info, _alpha); break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: { ARM_COMPUTE_ERROR("Data type not supported"); @@ -1600,14 +1600,14 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &inf matrix_matrix_multiply_qs16(_input0, _input1, _output, window, _alpha); break; } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: { multiply_alpha ? matrix_matrix_multiply_f16(_input0, _input1, _output, window, _alpha) : matrix_matrix_multiply_f16(_input0, _input1, _output, window, _alpha); break; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: { ARM_COMPUTE_ERROR("Data type not supported"); diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index 233b2baabe..d8440e333e 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -39,7 +39,7 @@ using namespace arm_compute; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 template class arm_compute::NEHarrisScoreFP16Kernel<3>; template class arm_compute::NEHarrisScoreFP16Kernel<5>; @@ -361,7 +361,7 @@ void NEHarrisScoreFP16Kernel::configure(const IImage *input1, const INEKernel::configure(win); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ template class arm_compute::NEHarrisScoreKernel<3>; template class arm_compute::NEHarrisScoreKernel<5>; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 1c018b269b..099f2f1be3 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -311,11 +311,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F32: _func = &NEIm2ColKernel::run_reduced; break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: _func = &NEIm2ColKernel::run_reduced; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::QS8: _func = &NEIm2ColKernel::run_reduced; break; @@ -334,11 +334,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F32: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::QS8: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 1b2942cd93..99b4250bb9 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -51,7 +51,7 @@ namespace { void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 const auto width_matrix_b = static_cast(output->info()->dimension(0)); const auto in_b_stride = static_cast(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast(input0->info()->dimension(0)); @@ -160,14 +160,14 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT vst1q_f16(vec_out + 24, acc3); }, ina, out); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(input0); ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR("Not supported, recompile with -march=armv8.2-a+fp16+simd."); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 433985f6fa..2424ec139e 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -51,7 +51,7 @@ constexpr float COEFF1 = 0.0663f; constexpr float COEFF2 = 0.2447f; } // namespace -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 namespace fp16 { inline float16x8_t inv(float16x8_t x) @@ -429,7 +429,7 @@ template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ namespace { diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index b7dfb59252..c4517dafaa 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -41,7 +41,7 @@ namespace arm_compute class Coordinates; } // namespace arm_compute -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 namespace fp16 { inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask) @@ -224,7 +224,7 @@ void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITenso INEKernel::configure(win); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ namespace { diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index d6d26e2d12..f6f3d5f238 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -232,7 +232,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window) }, input, input_squared, output); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 else if(dt == DataType::F16) { const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff()); @@ -268,7 +268,7 @@ void NENormalizationLayerKernel::normalize_float(const Window &window) }, input, input_squared, output); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ else { ARM_COMPUTE_ERROR("Not supported"); diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 19d45e2cb5..2c90d9aa22 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -38,9 +38,9 @@ #include #include -#if ARM_COMPUTE_ENABLE_FP16 +#if ARM_COMPUTE_AARCH64_V8_2 #include // needed for float16_t -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ using namespace arm_compute; @@ -335,7 +335,7 @@ void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict template void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 const auto input1 = static_cast(input1_ptr); const auto input2 = static_cast(input2_ptr); const auto output = static_cast(output_ptr); @@ -350,13 +350,13 @@ void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict } }; vst2q_f16(output, result); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(input1_ptr); ARM_COMPUTE_UNUSED(input2_ptr); ARM_COMPUTE_UNUSED(output_ptr); ARM_COMPUTE_UNUSED(scale); ARM_COMPUTE_ERROR("Not supported. Recompile the library with arch=arm64-v8.2-a."); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } template diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 8d4e46500f..0024e33723 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -173,7 +173,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons } num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8; break; -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 case DataType::F16: switch(pool_size) { @@ -192,7 +192,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons break; } break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ case DataType::F32: switch(pool_size) { @@ -536,7 +536,7 @@ void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window template void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 Iterator input(_input, window_input); Iterator output(_output, window); @@ -595,17 +595,17 @@ void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window *(reinterpret_cast(output.ptr())) = vget_lane_f16(res, 0); }, input, output); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(window_input); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } template void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window) { -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 Iterator input(_input, window_input); Iterator output(_output, window); constexpr int pool_size = 2; @@ -654,11 +654,11 @@ void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window vst1q_f16(reinterpret_cast(output.ptr()), res); }, input, output); -#else /* ARM_COMPUTE_ENABLE_FP16 */ +#else /* ARM_COMPUTE_AARCH64_V8_2 */ ARM_COMPUTE_UNUSED(window_input); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a"); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ } template diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 648dac46c0..73aba284ca 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -106,7 +106,7 @@ void logits_1d_max_qs16(const ITensor *in, ITensor *out, const Window &window) while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window) { Window in_slice = window.first_slice_window_1D(); @@ -138,7 +138,7 @@ void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window) } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void logits_1d_max_f32(const ITensor *in, ITensor *out, const Window &window) { @@ -213,10 +213,10 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) _func = &logits_1d_max_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 _func = &logits_1d_max_f16; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -374,7 +374,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) { Window window_max(window); @@ -434,7 +434,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) { @@ -532,10 +532,10 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor _func = &logits_1d_shift_exp_sum_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 _func = &logits_1d_shift_exp_sum_f16; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); break; @@ -637,7 +637,7 @@ void logits_1d_norm_qs16(const ITensor *in, const ITensor *sum, ITensor *out, co } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window) { Window window_sum(window); @@ -668,7 +668,7 @@ void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, con } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ void logits_1d_norm_f32(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window) { @@ -738,10 +738,10 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I _func = &logits_1d_norm_f32; break; case DataType::F16: -#ifdef ARM_COMPUTE_ENABLE_FP16 +#ifdef ARM_COMPUTE_AARCH64_V8_2 _func = &logits_1d_norm_f16; break; -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); break; diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp index 939f1b7c40..8728e77d9e 100644 --- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp +++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp @@ -39,6 +39,9 @@ #include #include +// Enable only if compiled for AArch64-V8.2-A targets +#ifdef ARM_COMPUTE_AARCH64_V8_2 + #define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n" #define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n" #define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n" @@ -517,3 +520,4 @@ void NEGEMMLowpAArch64V8P4Kernel::run(const Window &window, const ThreadInfo &in } } } // namespace arm_compute +#endif /* ARM_COMPUTE_AARCH64_V8_2 */ -- cgit v1.2.1