From 221f38176b0d4dbc212441779d9bbac3cc0eecfa Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Wed, 28 Jun 2017 17:27:56 +0100
Subject: COMPMID-421: Fixed FP16 support in Neon GEMM.

Fixed GEMM FP16 problem with matrices that are not multiple of 32.
Added a new test suite NEON/GEMM/Float16/SmallGEMM.
Implemented FP16 function to multiply vector by a matrix.

Change-Id: Ie6c692885a48d0206bd6fe748332fa83bc286d67
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79118
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
---
 tests/NEON/Helper.h            |  1 +
 tests/TensorLibrary.h          |  2 +-
 tests/validation/NEON/GEMM.cpp | 20 +++++++++++++++++++-
 tests/validation/Reference.cpp |  2 +-
 tests/validation/Reference.h   |  1 +
 5 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'tests')
diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h
index e77615406e..0651c9c709 100644
--- a/tests/NEON/Helper.h
+++ b/tests/NEON/Helper.h
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/Array.h"
 
 #include <algorithm>
+#include <vector>
 
 namespace arm_compute
 {
diff --git a/tests/TensorLibrary.h b/tests/TensorLibrary.h
index 4d7143a206..69b2381171 100644
--- a/tests/TensorLibrary.h
+++ b/tests/TensorLibrary.h
@@ -505,7 +505,7 @@ void TensorLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t
             fill(tensor, distribution_f16, seed_offset);
             break;
         }
-#endif
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
         case DataType::F32:
         {
             // It doesn't make sense to check [-inf, inf], so hard code it to a big number
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 35f65c8fe2..75ce39716c 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -77,7 +77,7 @@ Tensor compute_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2
     BOOST_TEST(!dst.info()->is_resizable());
 
     // Fill tensors
-    if(dt == DataType::F32)
+    if(dt == DataType::F16 || dt == DataType::F32)
     {
         std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
         library->fill(NEAccessor(src1), distribution, 0);
@@ -137,6 +137,24 @@ BOOST_DATA_TEST_CASE(Configuration,
     validate(dst.info()->valid_region(), dst_valid_region);
 }
 
+#ifdef ARM_COMPUTE_ENABLE_FP16
+BOOST_AUTO_TEST_SUITE(Float16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallGEMM, SmallGEMMDataset() * boost::unit_test::data::make(DataType::F16),
+                     gemm_set, dt)
+{
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Compute function
+    Tensor dst = compute_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
 BOOST_AUTO_TEST_SUITE(Float)
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
 BOOST_DATA_TEST_CASE(SmallGEMM, SmallGEMMDataset() * boost::unit_test::data::make(DataType::F32),
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 0518819173..62dfcba37e 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -335,7 +335,7 @@ RawTensor Reference::compute_reference_gemm(const TensorShape &src_shape1, const
     RawTensor dst  = library->get(dst_shape, dt, 1, fixed_point_position);
 
     // Fill reference
-    if(dt == DataType::F32)
+    if(dt == DataType::F16 || dt == DataType::F32)
     {
         std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
         library->fill(src1, distribution, 0);
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
index 3aca1eaaae..ebd5fa76c4 100644
--- a/tests/validation/Reference.h
+++ b/tests/validation/Reference.h
@@ -26,6 +26,7 @@
 
 #include "RawTensor.h"
 #include "Types.h"
+#include <vector>
 
 #include <vector>
 
-- 
cgit v1.2.1