From 718f347a2d886381de19420b5b5b99db8f2b7338 Mon Sep 17 00:00:00 2001
From: Jeremy Johnson <jeremy.johnson@arm.com>
Date: Thu, 30 Nov 2023 14:18:19 +0000
Subject: Main Compliance FP16 support - generate and verify.

FP16 support for all existing operators for compliance:
* DOT_PRODUCT
* ULP
* EXACT
* ABS_ERROR

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: I8d25448a793375b53880da3787d8f839767f02cf
---
 .../src/generate/generate_dot_product.cc           | 283 ++++++++++++++-------
 .../src/generate/generate_pseudo_random.cc         |  21 +-
 reference_model/src/verify/verify_abs_error.cc     |  13 +-
 reference_model/src/verify/verify_dot_product.cc   |  25 +-
 reference_model/src/verify/verify_exact.cc         |  20 +-
 reference_model/src/verify/verify_ulp.cc           |  36 ++-
 reference_model/src/verify/verify_utils.cc         |  25 +-
 reference_model/src/verify/verify_utils.h          |  12 +-
 verif/generator/datagenerator.py                   |  24 +-
 verif/generator/tosa_arg_gen.py                    |   4 +-
 verif/generator/tosa_utils.py                      |   2 +-
 11 files changed, 318 insertions(+), 147 deletions(-)
diff --git a/reference_model/src/generate/generate_dot_product.cc b/reference_model/src/generate/generate_dot_product.cc
index c8a2b13..130b41d 100644
--- a/reference_model/src/generate/generate_dot_product.cc
+++ b/reference_model/src/generate/generate_dot_product.cc
@@ -13,6 +13,7 @@
 //    limitations under the License.
 
 #include "generate_dot_product.h"
+#include "half.hpp"
 
 namespace
 {
@@ -20,34 +21,34 @@ namespace
 //                              MatMul                                       //
 //---------------------------------------------------------------------------//
 
+template <typename DataType>
 void generateMatMulA(const TosaReference::GenerateConfig& cfg,
                      TosaReference::IDotProductGenerator& generator,
-                     void* data,
+                     DataType* data,
                      size_t size)
 {
-    float* a         = reinterpret_cast<float*>(data);
     const uint32_t T = cfg.shape[0] * cfg.shape[1] * cfg.shape[2];
     const uint32_t C = cfg.shape[2];
 
     for (uint32_t t = 0; t < T; ++t)
     {
-        a[t] = generator(t % C);    // k = c
+        data[t] = static_cast<DataType>(generator(t % C));    // k = c
     }
 }
 
+template <typename DataType>
 void generateMatMulB(const TosaReference::GenerateConfig& cfg,
                      TosaReference::IDotProductGenerator& generator,
-                     void* data,
+                     DataType* data,
                      size_t size)
 {
-    float* b         = reinterpret_cast<float*>(data);
     const uint32_t T = cfg.shape[0] * cfg.shape[1] * cfg.shape[2];
     const uint32_t C = cfg.shape[1];
     const uint32_t W = cfg.shape[2];
 
     for (uint32_t t = 0; t < T; ++t)
     {
-        b[t] = generator((t / W) % C);    // k = c
+        data[t] = static_cast<DataType>(generator((t / W) % C));    // k = c
     }
 }
 
@@ -56,11 +57,6 @@ bool generateMatMul(const TosaReference::GenerateConfig& cfg,
                     void* data,
                     size_t size)
 {
-    if (cfg.dataType != DType::DType_FP32)
-    {
-        WARNING("[Generator][DP][MatMul] Only supports FP32.");
-        return false;
-    }
     if (cfg.shape.size() != 3)
     {
         WARNING("[Generator][DP][MatMul] Tensor shape expected 3 dimensions.");
@@ -72,7 +68,24 @@ bool generateMatMul(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    (cfg.inputPos == 0) ? generateMatMulA(cfg, generator, data, size) : generateMatMulB(cfg, generator, data, size);
+    switch (cfg.dataType)
+    {
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            (cfg.inputPos == 0) ? generateMatMulA(cfg, generator, outData, size)
+                                : generateMatMulB(cfg, generator, outData, size);
+            break;
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            (cfg.inputPos == 0) ? generateMatMulA(cfg, generator, outData, size)
+                                : generateMatMulB(cfg, generator, outData, size);
+            break;
+        }
+        default:
+            WARNING("[Generator][DP][MatMul] Only supports FP32 or FP16.");
+            return false;
+    }
 
     return true;
 }
@@ -80,9 +93,10 @@ bool generateMatMul(const TosaReference::GenerateConfig& cfg,
 //                              Conv2D                                       //
 //---------------------------------------------------------------------------//
 
+template <typename DataType>
 bool generateConv2DInput(const TosaReference::GenerateConfig& cfg,
                          TosaReference::IDotProductGenerator& generator,
-                         void* data,
+                         DataType* data,
                          size_t size)
 {
     if (cfg.dotProductInfo.kernel.size() != 2 || cfg.dotProductInfo.kernel[0] <= 0 || cfg.dotProductInfo.kernel[1] <= 0)
@@ -96,7 +110,6 @@ bool generateConv2DInput(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* input      = reinterpret_cast<float*>(data);
     const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
     const uint32_t IH = cfg.shape[1];
     const uint32_t IW = cfg.shape[2];
@@ -111,14 +124,15 @@ bool generateConv2DInput(const TosaReference::GenerateConfig& cfg,
         uint32_t iy = ((t / IC) / IW) % IH;
         uint32_t k  = ((iy % KH) * KW + (ix % KW)) * IC + ic;
 
-        input[t] = generator(k);
+        data[t] = static_cast<DataType>(generator(k));
     }
     return true;
 }
 
+template <typename DataType>
 bool generateConv2DWeight(const TosaReference::GenerateConfig& cfg,
                           TosaReference::IDotProductGenerator& generator,
-                          void* data,
+                          DataType* data,
                           size_t size)
 {
     if (cfg.shape.size() != 4)
@@ -127,7 +141,6 @@ bool generateConv2DWeight(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* weight     = reinterpret_cast<float*>(data);
     const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
     const uint32_t KH = cfg.shape[1];
     const uint32_t KW = cfg.shape[2];
@@ -140,14 +153,15 @@ bool generateConv2DWeight(const TosaReference::GenerateConfig& cfg,
         uint32_t ky = ((t / IC) / KW) % KH;
         uint32_t k  = (ky + KW * kx) * IC + ic;
 
-        weight[t] = generator(k);
+        data[t] = static_cast<DataType>(generator(k));
     }
     return true;
 }
 
+template <typename DataType>
 bool generateConv2DBias(const TosaReference::GenerateConfig& cfg,
                         TosaReference::IDotProductGenerator& generator,
-                        void* data,
+                        DataType* data,
                         size_t size)
 {
     if (cfg.shape.size() != 1)
@@ -156,12 +170,11 @@ bool generateConv2DBias(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* bias      = reinterpret_cast<float*>(data);
     const uint32_t T = cfg.shape[0];
 
     for (uint32_t t = 0; t < T; ++t)
     {
-        bias[t] = generator(2);
+        data[t] = static_cast<DataType>(generator(2));
     }
     return true;
 }
@@ -171,21 +184,42 @@ bool generateConv2D(const TosaReference::GenerateConfig& cfg,
                     void* data,
                     size_t size)
 {
-    if (cfg.dataType != DType::DType_FP32)
-    {
-        WARNING("[Generator][DP][Conv2D] Only supports FP32.");
-        return false;
-    }
-    switch (cfg.inputPos)
-    {
-        case 0:
-            return generateConv2DInput(cfg, generator, data, size);
-        case 1:
-            return generateConv2DWeight(cfg, generator, data, size);
-        case 2:
-            return generateConv2DBias(cfg, generator, data, size);
+    switch (cfg.dataType)
+    {
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateConv2DInput(cfg, generator, outData, size);
+                case 1:
+                    return generateConv2DWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateConv2DBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][Conv2D] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateConv2DInput(cfg, generator, outData, size);
+                case 1:
+                    return generateConv2DWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateConv2DBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][Conv2D] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
         default:
-            WARNING("[Generator][DP][Conv2D] Invalid input tensor slot position to operator.");
+            WARNING("[Generator][DP][Conv2D] Only supports FP32 or FP16.");
             return false;
     }
 }
@@ -193,16 +227,33 @@ bool generateConv2D(const TosaReference::GenerateConfig& cfg,
 //                              Reduce Sum                                   //
 //---------------------------------------------------------------------------//
 
+template <typename DataType>
+void generateReduceSumData(const TosaReference::GenerateConfig& cfg,
+                           TosaReference::IDotProductGenerator& generator,
+                           DataType* data,
+                           size_t size)
+{
+    const int64_t T     = TosaReference::numElementsFromShape(cfg.shape);
+    const uint32_t axis = cfg.dotProductInfo.axis;
+
+    for (int64_t t = 0; t < T; ++t)
+    {
+        uint64_t k = t;
+        for (uint32_t d = cfg.shape.size() - 1; d > axis; --d)
+        {
+            k = k / cfg.shape[d];
+        }
+        k = k % cfg.shape[axis];
+
+        data[t] = static_cast<DataType>(generator(static_cast<int32_t>(k)));
+    }
+}
+
 bool generateReduceSum(const TosaReference::GenerateConfig& cfg,
                        TosaReference::IDotProductGenerator& generator,
                        void* data,
                        size_t size)
 {
-    if (cfg.dataType != DType::DType_FP32)
-    {
-        WARNING("[Generator][DP][ReduceSum] Only supports FP32.");
-        return false;
-    }
     if (cfg.inputPos != 0)
     {
         WARNING("[Generator][DP][ReduceSum] Invalid input tensor slot position to operator.");
@@ -214,30 +265,33 @@ bool generateReduceSum(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* input        = reinterpret_cast<float*>(data);
-    const int64_t T     = TosaReference::numElementsFromShape(cfg.shape);
-    const uint32_t axis = cfg.dotProductInfo.axis;
-
-    for (int64_t t = 0; t < T; ++t)
+    switch (cfg.dataType)
     {
-        uint64_t k = t;
-        for (uint32_t d = cfg.shape.size() - 1; d > axis; --d)
-        {
-            k = k / cfg.shape[d];
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            generateReduceSumData(cfg, generator, outData, size);
+            break;
         }
-        k = k % cfg.shape[axis];
-
-        input[t] = generator(static_cast<int32_t>(k));
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            generateReduceSumData(cfg, generator, outData, size);
+            break;
+        }
+        default:
+            WARNING("[Generator][DP][ReduceSum] Only supports FP32 or FP16.");
+            return false;
     }
+
     return true;
 }
 //---------------------------------------------------------------------------//
 //                              Fully Connected                              //
 //---------------------------------------------------------------------------//
 
+template <typename DataType>
 bool generateFullyConnectedInput(const TosaReference::GenerateConfig& cfg,
                                  TosaReference::IDotProductGenerator& generator,
-                                 void* data,
+                                 DataType* data,
                                  size_t size)
 {
     if (cfg.shape.size() != 2)
@@ -246,7 +300,6 @@ bool generateFullyConnectedInput(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* input      = reinterpret_cast<float*>(data);
     const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
     const uint32_t IC = cfg.shape[1];
 
@@ -254,14 +307,15 @@ bool generateFullyConnectedInput(const TosaReference::GenerateConfig& cfg,
     {
         uint32_t k = t % IC;
 
-        input[t] = generator(k);
+        data[t] = static_cast<DataType>(generator(k));
     }
     return true;
 }
 
+template <typename DataType>
 bool generateFullyConnectedWeight(const TosaReference::GenerateConfig& cfg,
                                   TosaReference::IDotProductGenerator& generator,
-                                  void* data,
+                                  DataType* data,
                                   size_t size)
 {
     if (cfg.shape.size() != 2)
@@ -270,7 +324,6 @@ bool generateFullyConnectedWeight(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* weight     = reinterpret_cast<float*>(data);
     const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
     const uint32_t IC = cfg.shape[1];
 
@@ -278,14 +331,15 @@ bool generateFullyConnectedWeight(const TosaReference::GenerateConfig& cfg,
     {
         uint32_t k = t % IC;
 
-        weight[t] = generator(k);
+        data[t] = static_cast<DataType>(generator(k));
     }
     return true;
 }
 
+template <typename DataType>
 bool generateFullyConnectedBias(const TosaReference::GenerateConfig& cfg,
                                 TosaReference::IDotProductGenerator& generator,
-                                void* data,
+                                DataType* data,
                                 size_t size)
 {
     if (cfg.shape.size() != 1)
@@ -294,12 +348,11 @@ bool generateFullyConnectedBias(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* bias      = reinterpret_cast<float*>(data);
     const uint32_t T = cfg.shape[0];
 
     for (uint32_t t = 0; t < T; ++t)
     {
-        bias[t] = generator(2);
+        data[t] = static_cast<DataType>(generator(2));
     }
     return true;
 }
@@ -309,21 +362,42 @@ bool generateFullyConnected(const TosaReference::GenerateConfig& cfg,
                             void* data,
                             size_t size)
 {
-    if (cfg.dataType != DType::DType_FP32)
-    {
-        WARNING("[Generator][DP][FullyConnected] Only supports FP32.");
-        return false;
-    }
-    switch (cfg.inputPos)
-    {
-        case 0:
-            return generateFullyConnectedInput(cfg, generator, data, size);
-        case 1:
-            return generateFullyConnectedWeight(cfg, generator, data, size);
-        case 2:
-            return generateFullyConnectedBias(cfg, generator, data, size);
+    switch (cfg.dataType)
+    {
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateFullyConnectedInput(cfg, generator, outData, size);
+                case 1:
+                    return generateFullyConnectedWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateFullyConnectedBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][FullyConnected] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateFullyConnectedInput(cfg, generator, outData, size);
+                case 1:
+                    return generateFullyConnectedWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateFullyConnectedBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][FullyConnected] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
         default:
-            WARNING("[Generator][DP][FullyConnected] Invalid input tensor slot position to operator.");
+            WARNING("[Generator][DP][FullyConnected] Only supports FP32 or FP16.");
             return false;
     }
 }
@@ -331,16 +405,35 @@ bool generateFullyConnected(const TosaReference::GenerateConfig& cfg,
 //                              Avg Pool 2D                                   //
 //---------------------------------------------------------------------------//
 
+template <typename DataType>
+void generateAvgPool2DData(const TosaReference::GenerateConfig& cfg,
+                           TosaReference::IDotProductGenerator& generator,
+                           DataType* data,
+                           size_t size)
+{
+    const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
+    const uint32_t IH = cfg.shape[1];
+    const uint32_t IW = cfg.shape[2];
+    const uint32_t C  = cfg.shape[3];
+    const uint32_t KY = cfg.dotProductInfo.kernel[0];
+    const uint32_t KX = cfg.dotProductInfo.kernel[1];
+
+    for (int64_t t = 0; t < T; ++t)
+    {
+        uint32_t c  = t % C;
+        uint32_t ix = (t / C) % IW;
+        uint32_t iy = ((t / C) / IW) % IH;
+        uint32_t k  = ((iy % KY) * KX + (ix % KX)) * C + c;
+
+        data[t] = static_cast<DataType>(generator(k));
+    }
+}
+
 bool generateAvgPool2D(const TosaReference::GenerateConfig& cfg,
                        TosaReference::IDotProductGenerator& generator,
                        void* data,
                        size_t size)
 {
-    if (cfg.dataType != DType::DType_FP32)
-    {
-        WARNING("[Generator][DP][AvgPool2D] Only supports FP32.");
-        return false;
-    }
     if (cfg.inputPos != 0)
     {
         WARNING("[Generator][DP][AvgPool2D] Invalid input tensor slot position to operator.");
@@ -357,23 +450,23 @@ bool generateAvgPool2D(const TosaReference::GenerateConfig& cfg,
         return false;
     }
 
-    float* input      = reinterpret_cast<float*>(data);
-    const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
-    const uint32_t IH = cfg.shape[1];
-    const uint32_t IW = cfg.shape[2];
-    const uint32_t C  = cfg.shape[3];
-    const uint32_t KY = cfg.dotProductInfo.kernel[0];
-    const uint32_t KX = cfg.dotProductInfo.kernel[1];
-
-    for (int64_t t = 0; t < T; ++t)
+    switch (cfg.dataType)
     {
-        uint32_t c  = t % C;
-        uint32_t ix = (t / C) % IW;
-        uint32_t iy = ((t / C) / IW) % IH;
-        uint32_t k  = ((iy % KY) * KX + (ix % KX)) * C + c;
-
-        input[t] = generator(k);
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            generateAvgPool2DData(cfg, generator, outData, size);
+            break;
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            generateAvgPool2DData(cfg, generator, outData, size);
+            break;
+        }
+        default:
+            WARNING("[Generator][DP][AvgPool2D] Only supports FP32 or FP16.");
+            return false;
     }
+
     return true;
 }
 }    // namespace
diff --git a/reference_model/src/generate/generate_pseudo_random.cc b/reference_model/src/generate/generate_pseudo_random.cc
index b51424d..b62c38f 100644
--- a/reference_model/src/generate/generate_pseudo_random.cc
+++ b/reference_model/src/generate/generate_pseudo_random.cc
@@ -13,6 +13,7 @@
 //    limitations under the License.
 #include "generate.h"
 #include "generate_utils.h"
+#include "half.hpp"
 
 #include <array>
 #include <iterator>
@@ -88,7 +89,8 @@ private:
     bool _useUniform;
 };
 
-bool generateFP32(const TosaReference::GenerateConfig& cfg, void* data, size_t size)
+template <typename DataType>
+bool generateFP(const TosaReference::GenerateConfig& cfg, DataType* data, size_t size)
 {
     const TosaReference::PseudoRandomInfo& prinfo = cfg.pseudoRandomInfo;
 
@@ -106,21 +108,20 @@ bool generateFP32(const TosaReference::GenerateConfig& cfg, void* data, size_t s
         generator = new PseudoRandomGeneratorFloat<float>(prinfo.rngSeed);
     }
 
-    float* a     = reinterpret_cast<float*>(data);
     const auto T = TosaReference::numElementsFromShape(cfg.shape);
     const bool comparisonOp =
         (cfg.opType == Op::Op_EQUAL) || (cfg.opType == Op::Op_GREATER_EQUAL) || (cfg.opType == Op::Op_GREATER);
     for (auto t = 0; t < T; ++t)
     {
-        a[t] = generator->getRandomFloat();
+        data[t] = static_cast<DataType>(generator->getRandomFloat());
         if (comparisonOp && (t % 4 == 0))
         {
             // Set every 4th value to 0 to enable better comparison testing
-            a[t] = 0.f;
+            data[t] = static_cast<DataType>(0.f);
         }
         else if (roundMode)
         {
-            a[t] = std::roundf(a[t]);
+            data[t] = static_cast<DataType>(std::roundf(data[t]));
         }
     }
     return true;
@@ -146,8 +147,14 @@ bool generatePseudoRandom(const GenerateConfig& cfg, void* data, size_t size)
 
     switch (cfg.dataType)
     {
-        case DType::DType_FP32:
-            return generateFP32(cfg, data, size);
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            return generateFP(cfg, outData, size);
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            return generateFP(cfg, outData, size);
+        }
         default:
             WARNING("[Generator][PR] Unsupported type.");
             return false;
diff --git a/reference_model/src/verify/verify_abs_error.cc b/reference_model/src/verify/verify_abs_error.cc
index b43da08..5aaa0ad 100644
--- a/reference_model/src/verify/verify_abs_error.cc
+++ b/reference_model/src/verify/verify_abs_error.cc
@@ -18,6 +18,7 @@
 #include <type_traits>
 #include <utility>
 
+#include "half.hpp"
 #include "verifiers.h"
 
 namespace TosaReference
@@ -25,14 +26,15 @@ namespace TosaReference
 
 namespace
 {
-bool validateData(const double* ref, const double* bnd, const float* imp, const std::vector<int32_t>& shape)
+template <typename OutDtype>
+bool validateData(const double* ref, const double* bnd, const OutDtype* imp, const std::vector<int32_t>& shape)
 {
     const size_t T = static_cast<size_t>(numElements(shape));
     TOSA_REF_REQUIRE(T > 0, "[AE] Invalid shape for reference tensor");
 
     for (size_t i = 0; i < T; ++i)
     {
-        double errBound = std::abs(ref[i]) * exp2(-AccPrecision<float>::normal_frac) * bnd[i];
+        double errBound = std::abs(ref[i]) * exp2(-AccPrecision<OutDtype>::normal_frac) * bnd[i];
         bool valid      = tosaCheckFloatBound(imp[i], ref[i], errBound);
         if (!valid)
         {
@@ -60,7 +62,12 @@ bool verifyAbsError(const CTensor* ref, const CTensor* refBnd, const CTensor* im
     switch (imp->data_type)
     {
         case tosa_datatype_fp32_t: {
-            const float* impData = reinterpret_cast<const float*>(imp->data);
+            const auto* impData = reinterpret_cast<const float*>(imp->data);
+            TOSA_REF_REQUIRE(impData != nullptr, "[AE] Missing data for implementation");
+            return validateData(refData, refBndData, impData, refShape);
+        }
+        case tosa_datatype_fp16_t: {
+            const auto* impData = reinterpret_cast<const half_float::half*>(imp->data);
             TOSA_REF_REQUIRE(impData != nullptr, "[AE] Missing data for implementation");
             return validateData(refData, refBndData, impData, refShape);
         }
diff --git a/reference_model/src/verify/verify_dot_product.cc b/reference_model/src/verify/verify_dot_product.cc
index 15de427..a036cba 100644
--- a/reference_model/src/verify/verify_dot_product.cc
+++ b/reference_model/src/verify/verify_dot_product.cc
@@ -13,6 +13,7 @@
 //    limitations under the License.
 
 #include "func_debug.h"
+#include "half.hpp"
 #include "verifiers.h"
 
 #include <cmath>
@@ -25,13 +26,19 @@ namespace TosaReference
 namespace
 {
 // Generic element validation function
-template <typename AccType, typename std::enable_if_t<std::is_floating_point_v<AccType>, int> = 0>
+template <typename AccType>
 std::optional<double> validateElement(size_t index, double ref, double bnd, AccType imp, size_t KS)
 {
     double err    = 0.0;
     bool is_valid = true;
 
-    if (bnd == 0.0)
+    if (std::isinf(static_cast<AccType>(bnd)))
+    {
+        // dot product can overflow and there is no accuracy limit
+        is_valid = true;
+        err      = 0.0;
+    }
+    else if (bnd == 0.0)
     {
         is_valid = (ref == 0.0) && (imp == 0.0);
         if (!is_valid)
@@ -40,12 +47,6 @@ std::optional<double> validateElement(size_t index, double ref, double bnd, AccT
         }
         err = 0.0;
     }
-    else if (std::isinf(static_cast<AccType>(bnd)))
-    {
-        // dot product can overflow and there is no accuracy limit
-        is_valid = true;
-        err      = 0.0;
-    }
     else
     {
         // 0.0 < bnd < infinity
@@ -64,7 +65,7 @@ std::optional<double> validateElement(size_t index, double ref, double bnd, AccT
 }
 
 // Generic data validation function
-template <typename AccType, typename std::enable_if_t<std::is_floating_point_v<AccType>, int> = 0>
+template <typename AccType>
 bool validateData(const double* ref, const double* bnd, const AccType* imp, size_t T, const DotProductVerifyInfo& cfg)
 {
     const int32_t S = cfg.s;
@@ -121,6 +122,12 @@ bool verifyDotProduct(const CTensor* ref, const CTensor* refBnd, const CTensor*
             return validateData(refData, refBndData, impData, static_cast<size_t>(T), dpInfo);
             break;
         }
+        case tosa_datatype_fp16_t: {
+            const half_float::half* impData = reinterpret_cast<const half_float::half*>(imp->data);
+            TOSA_REF_REQUIRE(impData != nullptr, "[DP] Missing data for implementation");
+            return validateData(refData, refBndData, impData, static_cast<size_t>(T), dpInfo);
+            break;
+        }
         default: {
             WARNING("[Verifier][DP] Data-type not supported.");
             break;
diff --git a/reference_model/src/verify/verify_exact.cc b/reference_model/src/verify/verify_exact.cc
index 36b4ec9..971df9c 100644
--- a/reference_model/src/verify/verify_exact.cc
+++ b/reference_model/src/verify/verify_exact.cc
@@ -13,12 +13,14 @@
 //    limitations under the License.
 
 #include "func_debug.h"
+#include "half.hpp"
 #include "verifiers.h"
 #include <cmath>
 
 namespace
 {
-bool exact_fp32(const double& referenceValue, const float& implementationValue)
+template <typename OutDtype>
+bool exact_fp(const double& referenceValue, const OutDtype& implementationValue)
 {
     return std::isnan(referenceValue) ? std::isnan(implementationValue) : (referenceValue == implementationValue);
 }
@@ -38,16 +40,24 @@ bool verifyExact(const CTensor* referenceTensor, const CTensor* implementationTe
         numElements(std::vector<int32_t>(referenceTensor->shape, referenceTensor->shape + referenceTensor->num_dims));
     TOSA_REF_REQUIRE(elementCount > 0, "[E] Invalid shape for reference tensor");
 
+    TOSA_REF_REQUIRE(referenceTensor->data_type == tosa_datatype_fp64_t, "[E] Reference tensor is not fp64");
+    const auto* refData = reinterpret_cast<const double*>(referenceTensor->data);
+    TOSA_REF_REQUIRE(refData != nullptr, "[E] Missing data for reference");
+
     switch (implementationTensor->data_type)
     {
         case tosa_datatype_fp32_t: {
-            TOSA_REF_REQUIRE(referenceTensor->data_type == tosa_datatype_fp64_t, "[E] Reference tensor is not fp64");
-            const auto* refData = reinterpret_cast<const double*>(referenceTensor->data);
-            TOSA_REF_REQUIRE(refData != nullptr, "[E] Missing data for reference");
             const auto* impData = reinterpret_cast<const float*>(implementationTensor->data);
             TOSA_REF_REQUIRE(impData != nullptr, "[E] Missing data for implementation");
             auto result = std::equal(refData, std::next(refData, elementCount), impData,
-                                     std::next(impData, elementCount), exact_fp32);
+                                     std::next(impData, elementCount), exact_fp<float>);
+            return result;
+        }
+        case tosa_datatype_fp16_t: {
+            const auto* impData = reinterpret_cast<const half_float::half*>(implementationTensor->data);
+            TOSA_REF_REQUIRE(impData != nullptr, "[E] Missing data for implementation");
+            auto result = std::equal(refData, std::next(refData, elementCount), impData,
+                                     std::next(impData, elementCount), exact_fp<half_float::half>);
             return result;
         }
         default:
diff --git a/reference_model/src/verify/verify_ulp.cc b/reference_model/src/verify/verify_ulp.cc
index 6e78b96..1b38fe6 100644
--- a/reference_model/src/verify/verify_ulp.cc
+++ b/reference_model/src/verify/verify_ulp.cc
@@ -18,6 +18,7 @@
 #include <type_traits>
 #include <utility>
 
+#include "half.hpp"
 #include "verifiers.h"
 
 namespace TosaReference
@@ -25,7 +26,8 @@ namespace TosaReference
 
 namespace
 {
-bool tosaCheckULP(float testValue, double referenceValue, double ulpNum)
+template <typename OutType>
+bool tosaCheckULP(OutType testValue, double referenceValue, double ulpNum)
 {
     double errorBound = 0.0;
     if (std::isfinite(referenceValue) && std::abs(referenceValue) != 0.0)
@@ -35,10 +37,10 @@ bool tosaCheckULP(float testValue, double referenceValue, double ulpNum)
 
         // Work out the values magnitude - by raising 2 to the power of the
         // exponent and taking the normalized minimum for denormal values
-        const double referencePower2 = std::max(exp2(referenceExponent), AccPrecision<float>::normal_min);
+        const double referencePower2 = std::max(exp2(referenceExponent), AccPrecision<OutType>::normal_min);
         // Get the value of changing the last bit - by shifting the least significant bit to this magnitude
         // i.e. the ULP.
-        double ulpValue = referencePower2 * exp2(-AccPrecision<float>::normal_frac);
+        double ulpValue = referencePower2 * exp2(-AccPrecision<OutType>::normal_frac);
 
         errorBound = ulpValue * ulpNum;
     }
@@ -57,15 +59,35 @@ bool verifyULP(const CTensor* referenceTensor, const CTensor* implementationTens
     const auto elementCount = numElements(refShape);
     TOSA_REF_REQUIRE(elementCount > 0, "[ULP] Invalid shape for reference tensor");
 
-    const double ulp = ulpInfo.ulp;
+    const double ulp    = ulpInfo.ulp;
+    const auto* refData = reinterpret_cast<const double*>(referenceTensor->data);
+    TOSA_REF_REQUIRE(refData != nullptr, "[ULP] Missing data for reference");
+    const auto* refDataEnd = std::next(refData, elementCount);
     switch (implementationTensor->data_type)
     {
         case tosa_datatype_fp32_t: {
-            const auto* refData = reinterpret_cast<const double*>(referenceTensor->data);
-            TOSA_REF_REQUIRE(refData != nullptr, "[ULP] Missing data for reference");
             const auto* impData = reinterpret_cast<const float*>(implementationTensor->data);
             TOSA_REF_REQUIRE(impData != nullptr, "[ULP] Missing data for implementation");
-            const auto* refDataEnd = std::next(refData, elementCount);
+            // Use mismatch to get the location of the first unequal value
+            auto pair = std::mismatch(refData, refDataEnd, impData, std::next(impData, elementCount),
+                                      [ulp](const auto& referenceValue, const auto& implementationValue) {
+                                          return tosaCheckULP(implementationValue, referenceValue, ulp);
+                                      });
+            if (std::get<0>(pair) == refDataEnd)
+            {
+                // No mismatch found
+                return true;
+            }
+            else
+            {
+                auto pos = indexToPosition(std::get<0>(pair) - refData, refShape);
+                WARNING("[Verfier][ULP] Location %s", positionToString(pos).c_str());
+                return false;
+            }
+        }
+        case tosa_datatype_fp16_t: {
+            const auto* impData = reinterpret_cast<const half_float::half*>(implementationTensor->data);
+            TOSA_REF_REQUIRE(impData != nullptr, "[ULP] Missing data for implementation");
             // Use mismatch to get the location of the first unequal value
             auto pair = std::mismatch(refData, refDataEnd, impData, std::next(impData, elementCount),
                                       [ulp](const auto& referenceValue, const auto& implementationValue) {
diff --git a/reference_model/src/verify/verify_utils.cc b/reference_model/src/verify/verify_utils.cc
index 9aa6ba2..3bdc99f 100644
--- a/reference_model/src/verify/verify_utils.cc
+++ b/reference_model/src/verify/verify_utils.cc
@@ -202,7 +202,8 @@ static_assert(std::numeric_limits<double>::is_iec559,
               "TOSA Reference Model has not been built with standard IEEE 754 64-bit float support; Bounds based "
               "verification is invalid");
 
-bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBound)
+template <typename OutType>
+bool tosaCheckFloatBound(OutType testValue, double referenceValue, double errorBound)
 {
     // Both must be NaNs to be correct
     if (std::isnan(referenceValue) || std::isnan(testValue))
@@ -236,8 +237,8 @@ bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBou
     {
         // We already canonicalized the input such that the reference value is positive
         // so no need to check again here.
-        referenceMin = std::numeric_limits<float>::infinity();
-        referenceMax = std::numeric_limits<float>::infinity();
+        referenceMin = std::numeric_limits<OutType>::infinity();
+        referenceMax = std::numeric_limits<OutType>::infinity();
     }
     else if (referenceValue == 0)
     {
@@ -253,23 +254,23 @@ bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBou
         referenceMin = referenceValue - errorBound;
 
         // Handle the overflow cases.
-        if (referenceMax > AccPrecision<float>::normal_max)
+        if (referenceMax > AccPrecision<OutType>::normal_max)
         {
-            referenceMax = std::numeric_limits<float>::infinity();
+            referenceMax = std::numeric_limits<OutType>::infinity();
         }
 
-        if (referenceMin > AccPrecision<float>::normal_max)
+        if (referenceMin > AccPrecision<OutType>::normal_max)
         {
-            referenceMin = std::numeric_limits<float>::infinity();
+            referenceMin = std::numeric_limits<OutType>::infinity();
         }
 
         // And the underflow cases.
-        if (referenceMax < AccPrecision<float>::normal_min)
+        if (referenceMax < AccPrecision<OutType>::normal_min)
         {
-            referenceMax = AccPrecision<float>::normal_min;
+            referenceMax = AccPrecision<OutType>::normal_min;
         }
 
-        if (referenceMin < AccPrecision<float>::normal_min)
+        if (referenceMin < AccPrecision<OutType>::normal_min)
         {
             referenceMin = 0.0;
         }
@@ -286,4 +287,8 @@ bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBou
     }
     return withinBound;
 }
+
+// Instantiate the needed check functions
+template bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBound);
+template bool tosaCheckFloatBound(half_float::half testValue, double referenceValue, double errorBound);
 }    // namespace TosaReference
diff --git a/reference_model/src/verify/verify_utils.h b/reference_model/src/verify/verify_utils.h
index a58950e..45daeac 100644
--- a/reference_model/src/verify/verify_utils.h
+++ b/reference_model/src/verify/verify_utils.h
@@ -17,6 +17,7 @@
 #define VERIFY_UTILS_H_
 
 #include "dtype.h"
+#include "half.hpp"
 #include "types.h"
 
 #include <cstdint>
@@ -135,10 +136,17 @@ struct AccPrecision<float>
     static constexpr double normal_max   = const_exp2(128) - const_exp2(127 - 23);
     static constexpr int32_t normal_frac = 23;
 };
+template <>
+struct AccPrecision<half_float::half>
+{
+    static constexpr double normal_min   = const_exp2(-14);
+    static constexpr double normal_max   = const_exp2(16) - const_exp2(15 - 10);
+    static constexpr int32_t normal_frac = 7;
+};
 
 /// \brief Error bounds check for ULP and ABS_ERROR modes
-bool tosaCheckFloatBound(float testValue, double referenceValue, double errorBound);
-
+template <typename OutType>
+bool tosaCheckFloatBound(OutType testValue, double referenceValue, double errorBound);
 };    // namespace TosaReference
 
 #endif    // VERIFY_UTILS_H_
diff --git a/verif/generator/datagenerator.py b/verif/generator/datagenerator.py
index 0d59084..9de421b 100644
--- a/verif/generator/datagenerator.py
+++ b/verif/generator/datagenerator.py
@@ -68,19 +68,33 @@ class GenerateLibrary:
 
     def _create_buffer(self, dtype: str, shape: tuple):
         """Helper to create a buffer of the required type."""
-        size = 1
-        for dim in shape:
-            size *= dim
+        size = np.prod(shape)
 
         if dtype == "FP32":
             # Create buffer and initialize to zero
             buffer = (ct.c_float * size)(0)
             size_bytes = size * 4
+        elif dtype == "FP16":
+            size_bytes = size * 2
+            # Create buffer of bytes and initialize to zero
+            buffer = (ct.c_ubyte * size_bytes)(0)
         else:
             raise GenerateError(f"Unsupported data type {dtype}")
 
         return buffer, size_bytes
 
+    def _convert_buffer(self, buffer, dtype: str, shape: tuple):
+        """Helper to convert a buffer to a numpy array."""
+        arr = np.ctypeslib.as_array(buffer)
+
+        if dtype == "FP16":
+            # Convert from bytes back to FP16
+            arr = np.frombuffer(arr, np.float16)
+
+        arr = np.reshape(arr, shape)
+
+        return arr
+
     def _data_gen_array(self, json_config: str, tensor_name: str):
         """Generate the named tensor data and return a numpy array."""
         try:
@@ -106,9 +120,7 @@ class GenerateLibrary:
         if not result:
             raise GenerateError("Data generate failed")
 
-        arr = np.ctypeslib.as_array(buffer)
-        arr = np.reshape(arr, shape)
-
+        arr = self._convert_buffer(buffer, dtype, shape)
         return arr
 
     def _data_gen_write(
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index 8e88390..193da73 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -1415,9 +1415,9 @@ class TosaTensorValuesGen:
         if (
             error_name is None
             and argsDict["dg_type"] != gtu.ComplianceMode.DOT_PRODUCT
-            and dtype in (DType.FP16, DType.BF16)
+            and dtype in (DType.BF16,)
         ):
-            # TODO - Remove once FP16 and BF16 enabled for DOT_PRODUCT compliance
+            # TODO - Remove once BF16 enabled for DOT_PRODUCT compliance
             # Limit ranges for (non error & non compliance) FP tests by using
             # values that can be multiplied on any axis to not hit infinity/NaN
             IC = shapeList[0][1]
diff --git a/verif/generator/tosa_utils.py b/verif/generator/tosa_utils.py
index 318f296..3d733f4 100644
--- a/verif/generator/tosa_utils.py
+++ b/verif/generator/tosa_utils.py
@@ -55,7 +55,7 @@ def dtypeIsSupportedByCompliance(dtype):
     """Types supported by the new data generation and compliance flow."""
     if isinstance(dtype, list) or isinstance(dtype, tuple):
         dtype = dtype[0]
-    return dtype in (DType.FP32,)
+    return dtype in (DType.FP32, DType.FP16)
 
 
 def getOpNameFromOpListName(opName):
-- 
cgit v1.2.1