aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
diff options
context:
space:
mode:
authorMatthew Bentham <Matthew.Bentham@arm.com>2023-05-31 13:18:33 +0000
committerMatthew Bentham <matthew.bentham@arm.com>2023-06-26 12:44:15 +0000
commit7d9a78ebfb3553b95421a0da5e2686a3923748db (patch)
tree666915383e31a9da81f2605f7023c0e85e8226b3 /src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
parenta2bb80ea7111509c24caad8629533089decef430 (diff)
downloadComputeLibrary-7d9a78ebfb3553b95421a0da5e2686a3923748db.tar.gz
Remove dependency on fp16 definitions from some core include files
This significantly improves the compilation times for parts of the core library that just need a definition of float16_t rather than access to all of the fp16 intrinsics. Signed-off-by: Matthew Bentham <Matthew.Bentham@arm.com> Change-Id: I5da1c6b0df0dd87d1d17948cd2e9b7375874f455 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/529385 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9781 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/addmuladd/generic/neon/fp16.cpp')
-rw-r--r--src/cpu/kernels/addmuladd/generic/neon/fp16.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp b/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
index 6fce1cd9c4..15f62fe502 100644
--- a/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
+#include "src/cpu/CpuTypes.h"
#include <cstddef>
#include <cstdint>
@@ -35,6 +36,8 @@
#if defined(__aarch64__) && defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
namespace
{
+using arm_compute::float16_t;
+
void a64_add_bn_clamp_direct_fp16_2x32(
float16_t *out, size_t out_stride,
float16_t *out_direct, size_t out_direct_stride,