diff options
author | Ioan-Cristian Szabo <ioan-cristian.szabo@arm.com> | 2017-11-13 13:34:08 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 5edbd1c5dce43b66f30c903797a91e39369c5b62 (patch) | |
tree | d4c20c1a92ff9a7e26ffc9b1f6976ee12a2e2ae0 /src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp | |
parent | 84b51ad1aaa530d397761f2b6da65add9dc8a6b0 (diff) | |
download | ComputeLibrary-5edbd1c5dce43b66f30c903797a91e39369c5b62.tar.gz |
COMPMID-556: Add support to build arm64-v8.2-a for Android platform (clang compiler)
Change-Id: Ibb779dd3a8d10786da6d8f70590e654e14654d7b
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95530
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index bf23c4b2d2..78afbc2c20 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -70,7 +70,7 @@ inline qint16x8_t internal_vdupq_n(qint16_t v) return vdupq_n_qs16(v); } -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <unsigned int stridex> float16x8_t internal_vld1q(const float16_t *in); @@ -115,7 +115,7 @@ inline float16x8_t internal_vmlal(const float16x8_t &x, const float16x8_t &y, co ARM_COMPUTE_UNUSED(fixed_point_position); return vaddq_f16(x, vmulq_f16(y, z)); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template <unsigned int stridex> float32x4_t internal_vld1q(const float *in); @@ -429,7 +429,7 @@ public: } }; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <unsigned int stridex> void accumulate_results(float16_t *buffer, const float16x8x2_t &values); @@ -453,7 +453,7 @@ void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values) vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0]))); } -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ template <unsigned int stridex> float32x4x2_t convolve_5x5(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4, @@ -1064,9 +1064,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens { switch(input->info()->data_type()) { -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: case DataType::QS16: _num_elems_written_per_iteration = 8; @@ -1099,9 +1099,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens _num_elems_read_per_iteration = 12; _num_elems_written_per_iteration = 16 >> conv_stride_x; break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::QS8: case DataType::QS16: _num_weight_elems_read_per_row = 8 + _kernel_size - 1; @@ -1163,11 +1163,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_1x1<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: convolve_1x1<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; @@ -1184,11 +1184,11 @@ void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo case DataType::F32: convolve_3x3<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#ifdef ARM_COMPUTE_AARCH64_V8_2 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: convolve_3x3<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); break; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; |