aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-06-28 17:27:56 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commit221f38176b0d4dbc212441779d9bbac3cc0eecfa (patch)
treee838d673b35c5b40d4b484a3645cc7ae3c9d3ecc /scripts
parent6410fb2a14427713251f5d97144ac5d4f17c988c (diff)
downloadComputeLibrary-221f38176b0d4dbc212441779d9bbac3cc0eecfa.tar.gz
COMPMID-421: Fixed FP16 support in Neon GEMM.
Fixed GEMM FP16 problem with matrices that are not multiple of 32. Added a new test suite NEON/GEMM/Float16/SmallGEMM. Implemented FP16 function to multiply vector by a matrix. Change-Id: Ie6c692885a48d0206bd6fe748332fa83bc286d67 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79118 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/check_clang-tidy.py1
-rw-r--r--scripts/clang-tidy.h25
2 files changed, 26 insertions, 0 deletions
diff --git a/scripts/check_clang-tidy.py b/scripts/check_clang-tidy.py
index a376d0b898..6ab1747482 100755
--- a/scripts/check_clang-tidy.py
+++ b/scripts/check_clang-tidy.py
@@ -39,6 +39,7 @@ if __name__ == "__main__":
("Validation.cpp" in line and "parameter 'expected_labels' is unused" in line) or
("Reference.cpp" in line and "parameter 'rois' is unused" in line) or
("ReferenceCPP.cpp" in line and "parameter 'rois' is unused" in line) or
+ ("NEGEMMMatrixMultiplyKernel.cpp" in line and "do not use C-style cast to convert between unrelated types" in line) or
"3rdparty" in line):
continue
diff --git a/scripts/clang-tidy.h b/scripts/clang-tidy.h
index 32b0f6955e..cbc0d07cd6 100644
--- a/scripts/clang-tidy.h
+++ b/scripts/clang-tidy.h
@@ -1,5 +1,30 @@
#include <arm_neon.h>
+inline float16x8_t vmulq_lane_f16 (float16x8_t, float16x4_t, const int)
+{
+ return vdupq_n_f16(0);
+}
+
+inline float16x4_t vmul_f16 (float16x4_t, float16x4_t)
+{
+ return vdup_n_u16(0);
+}
+
+inline float16x4_t vadd_f16 (float16x4_t, float16x4_t)
+{
+ return vdup_n_u16(0);
+}
+
+inline float16x4_t vmul_lane_f16 (float16x4_t, float16x4_t, const int)
+{
+ return vdup_n_u16(0);
+}
+
+inline float16x4_t vmul_n_f16 (float16x4_t, float16_t)
+{
+ return vdup_n_u16(0);
+}
+
inline float16x8_t vcvtq_f16_u16(uint16x8_t)
{
return vdupq_n_f16(0);