From 2e4d889fb036d1c0a34503400a3f45cfc6f9f3e1 Mon Sep 17 00:00:00 2001
From: Matthew Sloyan <matthew.sloyan@arm.com>
Date: Tue, 18 Oct 2022 18:02:48 +0100
Subject: Add FP16 support to IModelRunner

 * Added specific FP16 readfromVector and writeToVector methods.
 * Added FP16 support to float readfromVector and writeToVector methods.
 * Added missing reference to IModelRunner::setInput.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I6b66468737e672afc925ccad4fb710fbb9427c14
---
 reference_model/include/model_runner.h   |  2 +-
 reference_model/src/model_runner.cc      | 12 +++---
 reference_model/src/model_runner_impl.cc | 14 ++++---
 reference_model/src/model_runner_impl.h  |  2 +-
 reference_model/src/tensor.cc            | 65 ++++++++++++++++++++++++++++++++
 reference_model/src/tensor.h             |  2 +
 6 files changed, 84 insertions(+), 13 deletions(-)
diff --git a/reference_model/include/model_runner.h b/reference_model/include/model_runner.h
index 4629467..4335794 100644
--- a/reference_model/include/model_runner.h
+++ b/reference_model/include/model_runner.h
@@ -68,7 +68,7 @@ public:
      * NOTE: setInput() must be called for each input tensor before run() is called.
      */
     template <typename T>
-    int setInput(std::string input_name, std::vector<T> vals);
+    int setInput(std::string input_name, std::vector<T>& vals);
 
     /*
      * Retrieve the output tensors from the graph after running.
diff --git a/reference_model/src/model_runner.cc b/reference_model/src/model_runner.cc
index 2395a85..5c086e6 100644
--- a/reference_model/src/model_runner.cc
+++ b/reference_model/src/model_runner.cc
@@ -53,7 +53,7 @@ GraphStatus IModelRunner::run()
 }
 
 template <typename T>
-int IModelRunner::setInput(std::string input_name, std::vector<T> vals)
+int IModelRunner::setInput(std::string input_name, std::vector<T>& vals)
 {
     return model_runner_impl->setInput<T>(input_name, vals);
 }
@@ -65,12 +65,14 @@ std::vector<T> IModelRunner::getOutput(std::string output_name)
 }
 
 // Template explicit specialization
-template int IModelRunner::setInput<float>(std::string input_name, std::vector<float> vals);
-template int IModelRunner::setInput<int32_t>(std::string input_name, std::vector<int32_t> vals);
-template int IModelRunner::setInput<int64_t>(std::string input_name, std::vector<int64_t> vals);
-template int IModelRunner::setInput<unsigned char>(std::string input_name, std::vector<unsigned char> vals);
+template int IModelRunner::setInput<float>(std::string input_name, std::vector<float>& vals);
+template int IModelRunner::setInput<half_float::half>(std::string input_name, std::vector<half_float::half>& vals);
+template int IModelRunner::setInput<int32_t>(std::string input_name, std::vector<int32_t>& vals);
+template int IModelRunner::setInput<int64_t>(std::string input_name, std::vector<int64_t>& vals);
+template int IModelRunner::setInput<unsigned char>(std::string input_name, std::vector<unsigned char>& vals);
 
 template std::vector<float> IModelRunner::getOutput<float>(std::string output_name);
+template std::vector<half_float::half> IModelRunner::getOutput<half_float::half>(std::string output_name);
 template std::vector<int32_t> IModelRunner::getOutput<int32_t>(std::string output_name);
 template std::vector<int64_t> IModelRunner::getOutput<int64_t>(std::string output_name);
 template std::vector<unsigned char> IModelRunner::getOutput<unsigned char>(std::string output_name);
\ No newline at end of file
diff --git a/reference_model/src/model_runner_impl.cc b/reference_model/src/model_runner_impl.cc
index e0fdc49..8427150 100644
--- a/reference_model/src/model_runner_impl.cc
+++ b/reference_model/src/model_runner_impl.cc
@@ -156,7 +156,7 @@ done:
 }
 
 template <typename T>
-int ModelRunnerImpl::setInput(std::string input_name, std::vector<T> vals)
+int ModelRunnerImpl::setInput(std::string input_name, std::vector<T>& vals)
 {
     if (_main_gt == nullptr)
     {
@@ -214,7 +214,7 @@ std::vector<T> ModelRunnerImpl::getOutput(std::string output_name)
         return std::vector<T>();
     }
 
-    std::vector<T> outputs(tensor->getElementCount(), 0);
+    std::vector<T> outputs(tensor->getElementCount());
 
     if (tensor->writeToVector(outputs))
     {
@@ -266,12 +266,14 @@ void ModelRunnerImpl::checkGraphStatus(SubgraphTraverser& main_gt)
 }
 
 // Template explicit specialization
-template int ModelRunnerImpl::setInput<float>(std::string input_name, std::vector<float> vals);
-template int ModelRunnerImpl::setInput<int32_t>(std::string input_name, std::vector<int32_t> vals);
-template int ModelRunnerImpl::setInput<int64_t>(std::string input_name, std::vector<int64_t> vals);
-template int ModelRunnerImpl::setInput<unsigned char>(std::string input_name, std::vector<unsigned char> vals);
+template int ModelRunnerImpl::setInput<float>(std::string input_name, std::vector<float>& vals);
+template int ModelRunnerImpl::setInput<half_float::half>(std::string input_name, std::vector<half_float::half>& vals);
+template int ModelRunnerImpl::setInput<int32_t>(std::string input_name, std::vector<int32_t>& vals);
+template int ModelRunnerImpl::setInput<int64_t>(std::string input_name, std::vector<int64_t>& vals);
+template int ModelRunnerImpl::setInput<unsigned char>(std::string input_name, std::vector<unsigned char>& vals);
 
 template std::vector<float> ModelRunnerImpl::getOutput<float>(std::string output_name);
+template std::vector<half_float::half> ModelRunnerImpl::getOutput<half_float::half>(std::string output_name);
 template std::vector<int32_t> ModelRunnerImpl::getOutput<int32_t>(std::string output_name);
 template std::vector<int64_t> ModelRunnerImpl::getOutput<int64_t>(std::string output_name);
 template std::vector<unsigned char> ModelRunnerImpl::getOutput<unsigned char>(std::string output_name);
\ No newline at end of file
diff --git a/reference_model/src/model_runner_impl.h b/reference_model/src/model_runner_impl.h
index 7a91bfe..f26c484 100644
--- a/reference_model/src/model_runner_impl.h
+++ b/reference_model/src/model_runner_impl.h
@@ -46,7 +46,7 @@ public:
     GraphStatus run();
 
     template <typename T>
-    int setInput(std::string input_name, std::vector<T> vals);
+    int setInput(std::string input_name, std::vector<T>& vals);
 
     template <typename T>
     std::vector<T> getOutput(std::string output_name);
diff --git a/reference_model/src/tensor.cc b/reference_model/src/tensor.cc
index 8e65a27..8d192ca 100644
--- a/reference_model/src/tensor.cc
+++ b/reference_model/src/tensor.cc
@@ -429,6 +429,7 @@ int TosaReference::Tensor::readfromVector(const std::vector<float>& vals)
     uint32_t elements = getElementCount();
     switch (getDtype())
     {
+        case DType_FP16:
         case DType_FP32:
             if (vals.size() != elements)
             {
@@ -448,6 +449,38 @@ int TosaReference::Tensor::readfromVector(const std::vector<float>& vals)
     return 0;
 }
 
+int TosaReference::Tensor::readfromVector(const std::vector<half_float::half>& vals)
+{
+    uint32_t elements = getElementCount();
+    std::vector<float> tensor(elements);
+
+    switch (getDtype())
+    {
+        case DType_FP16:
+            if (vals.size() != elements)
+            {
+                WARNING("The input size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            // Convert from fp16 to fp32
+            for (uint32_t i=0; i < elements; i++)
+            {
+                tensor[i] = half_float::half_cast<float, half_float::half>(vals[i]);
+            }
+
+            setTensorValueFloat(elements, tensor.data());
+            break;
+        default:
+            WARNING("The input type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameDType(getDtype()));
+            return -2;
+    }
+    setIsValid();
+    return 0;
+}
+
 int TosaReference::Tensor::readfromVector(const std::vector<int32_t>& vals)
 {
     uint32_t elements = getElementCount();
@@ -532,6 +565,7 @@ int TosaReference::Tensor::writeToVector(std::vector<float>& vals)
 
     switch (getDtype())
     {
+        case DType_FP16:
         case DType_FP32:
             if (vals.size() != elements)
             {
@@ -550,6 +584,37 @@ int TosaReference::Tensor::writeToVector(std::vector<float>& vals)
     return 0;
 }
 
+int TosaReference::Tensor::writeToVector(std::vector<half_float::half>& vals)
+{
+    uint32_t elements = getElementCount();
+    std::vector<float> tensor(elements);
+
+    switch (getDtype())
+    {
+        case DType_FP16:
+            if (vals.size() != elements)
+            {
+                WARNING("The output size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            getTensorValueFloat(elements, tensor.data());
+
+            // Convert fp32 to fp16
+            for (uint32_t i=0; i < elements; i++)
+            {
+                vals[i] = half_float::half_cast<half_float::half, float>(tensor[i]);
+            }
+            break;
+        default:
+            WARNING("The output type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameDType(getDtype()));
+            return -2;
+    }
+    return 0;
+}
+
 int TosaReference::Tensor::writeToVector(std::vector<int32_t>& vals)
 {
     uint32_t elements = getElementCount();
diff --git a/reference_model/src/tensor.h b/reference_model/src/tensor.h
index efd7e62..4efbf84 100644
--- a/reference_model/src/tensor.h
+++ b/reference_model/src/tensor.h
@@ -229,11 +229,13 @@ public:
     virtual int copyValueFrom(Tensor* tensor) = 0;
 
     virtual int readfromVector(const std::vector<float>& vals);
+    virtual int readfromVector(const std::vector<half_float::half>& vals);
     virtual int readfromVector(const std::vector<int32_t>& vals);
     virtual int readfromVector(const std::vector<int64_t>& vals);
     virtual int readfromVector(const std::vector<unsigned char>& vals);
 
     virtual int writeToVector(std::vector<float>& vals);
+    virtual int writeToVector(std::vector<half_float::half>& vals);
     virtual int writeToVector(std::vector<int32_t>& vals);
     virtual int writeToVector(std::vector<int64_t>& vals);
     virtual int writeToVector(std::vector<unsigned char>& vals);
-- 
cgit v1.2.1