From c5291695f04901e8abbc26dad6cba10e2c7685f8 Mon Sep 17 00:00:00 2001
From: Jerry Ge <jerry.ge@arm.com>
Date: Tue, 2 Jan 2024 22:29:08 +0000
Subject: Save Int8/UInt8 reference outputs to native dtypes

* Int8/UInt8 reference outputs were previously saved to INT32
* Save those in their native dtypes and updated other affected code

Signed-off-by: Jerry Ge <jerry.ge@arm.com>
Georgios Pinitas <georgios.pinitas@arm.com>

Change-Id: Id65fc8773150d3c56bc2c72789a6a0d3c78cd363
---
 reference_model/src/model_runner.cc                |    4 +
 reference_model/src/model_runner_impl.cc           |   16 +
 reference_model/src/tensor.cc                      | 1028 +++++++++++++++++---
 reference_model/src/tensor.h                       |   70 ++
 .../tosa_verif_framework_compiler_runner.py        |   11 +-
 verif/generator/tosa_test_gen.py                   |   15 +-
 6 files changed, 1011 insertions(+), 133 deletions(-)
diff --git a/reference_model/src/model_runner.cc b/reference_model/src/model_runner.cc
index 6f65202..28ad72c 100644
--- a/reference_model/src/model_runner.cc
+++ b/reference_model/src/model_runner.cc
@@ -82,12 +82,16 @@ int IModelRunner::getOutput(std::string output_name, uint8_t* raw_ptr, size_t si
 // Template explicit specialization
 template int IModelRunner::setInput<float>(std::string input_name, std::vector<float>& vals);
 template int IModelRunner::setInput<half_float::half>(std::string input_name, std::vector<half_float::half>& vals);
+template int IModelRunner::setInput<int8_t>(std::string input_name, std::vector<int8_t>& vals);
+template int IModelRunner::setInput<int16_t>(std::string input_name, std::vector<int16_t>& vals);
 template int IModelRunner::setInput<int32_t>(std::string input_name, std::vector<int32_t>& vals);
 template int IModelRunner::setInput<int64_t>(std::string input_name, std::vector<int64_t>& vals);
 template int IModelRunner::setInput<unsigned char>(std::string input_name, std::vector<unsigned char>& vals);
 
 template std::vector<float> IModelRunner::getOutput<float>(std::string output_name);
 template std::vector<half_float::half> IModelRunner::getOutput<half_float::half>(std::string output_name);
+template std::vector<int8_t> IModelRunner::getOutput<int8_t>(std::string output_name);
+template std::vector<int16_t> IModelRunner::getOutput<int16_t>(std::string output_name);
 template std::vector<int32_t> IModelRunner::getOutput<int32_t>(std::string output_name);
 template std::vector<int64_t> IModelRunner::getOutput<int64_t>(std::string output_name);
 template std::vector<unsigned char> IModelRunner::getOutput<unsigned char>(std::string output_name);
diff --git a/reference_model/src/model_runner_impl.cc b/reference_model/src/model_runner_impl.cc
index bf23bac..b01b90c 100644
--- a/reference_model/src/model_runner_impl.cc
+++ b/reference_model/src/model_runner_impl.cc
@@ -243,6 +243,12 @@ int ModelRunnerImpl::setInput(std::string input_name, uint8_t* raw_ptr, size_t s
                 status             = setInput(input_name, ArrayProxy(elements, typed_ptr));
             }
             break;
+        case TOSA_REF_TYPE_INT8: {
+            auto typed_ptr     = reinterpret_cast<int8_t*>(raw_ptr);
+            const int elements = size / sizeof(int8_t);
+            status             = setInput(input_name, ArrayProxy(elements, typed_ptr));
+            break;
+        }
         case TOSA_REF_TYPE_INT16: {
             auto typed_ptr     = reinterpret_cast<int16_t*>(raw_ptr);
             const int elements = size / sizeof(int16_t);
@@ -339,6 +345,12 @@ int ModelRunnerImpl::getOutput(std::string output_name, uint8_t* raw_ptr, size_t
             status             = tensor->writeToVector(ArrayProxy(elements, typed_ptr));
             break;
         }
+        case TOSA_REF_TYPE_INT8: {
+            auto typed_ptr     = reinterpret_cast<int8_t*>(raw_ptr);
+            const int elements = size / sizeof(int8_t);
+            status             = tensor->writeToVector(ArrayProxy(elements, typed_ptr));
+            break;
+        }
         case TOSA_REF_TYPE_INT16: {
             auto typed_ptr     = reinterpret_cast<int16_t*>(raw_ptr);
             const int elements = size / sizeof(int16_t);
@@ -449,6 +461,8 @@ void ModelRunnerImpl::checkGraphStatus(SubgraphTraverser& main_gt)
 template int ModelRunnerImpl::setInput<double>(std::string input_name, ArrayProxy<double> vals);
 template int ModelRunnerImpl::setInput<float>(std::string input_name, ArrayProxy<float> vals);
 template int ModelRunnerImpl::setInput<half_float::half>(std::string input_name, ArrayProxy<half_float::half> vals);
+template int ModelRunnerImpl::setInput<int8_t>(std::string input_name, ArrayProxy<int8_t> vals);
+template int ModelRunnerImpl::setInput<int16_t>(std::string input_name, ArrayProxy<int16_t> vals);
 template int ModelRunnerImpl::setInput<int32_t>(std::string input_name, ArrayProxy<int32_t> vals);
 template int ModelRunnerImpl::setInput<int64_t>(std::string input_name, ArrayProxy<int64_t> vals);
 template int ModelRunnerImpl::setInput<unsigned char>(std::string input_name, ArrayProxy<unsigned char> vals);
@@ -456,6 +470,8 @@ template int ModelRunnerImpl::setInput<unsigned char>(std::string input_name, Ar
 template std::vector<double> ModelRunnerImpl::getOutput<double>(std::string output_name);
 template std::vector<float> ModelRunnerImpl::getOutput<float>(std::string output_name);
 template std::vector<half_float::half> ModelRunnerImpl::getOutput<half_float::half>(std::string output_name);
+template std::vector<int8_t> ModelRunnerImpl::getOutput<int8_t>(std::string output_name);
+template std::vector<int16_t> ModelRunnerImpl::getOutput<int16_t>(std::string output_name);
 template std::vector<int32_t> ModelRunnerImpl::getOutput<int32_t>(std::string output_name);
 template std::vector<int64_t> ModelRunnerImpl::getOutput<int64_t>(std::string output_name);
 template std::vector<unsigned char> ModelRunnerImpl::getOutput<unsigned char>(std::string output_name);
diff --git a/reference_model/src/tensor.cc b/reference_model/src/tensor.cc
index e84507b..f9ec937 100644
--- a/reference_model/src/tensor.cc
+++ b/reference_model/src/tensor.cc
@@ -323,6 +323,8 @@ int TosaReference::Tensor::writeToNpyFile(const char* filename) const
     float* f32databuf               = nullptr;
     double* f64databuf              = nullptr;
     half_float::half* f16databuf    = nullptr;
+    uint8_t* ui8databuf             = nullptr;
+    int8_t* i8databuf               = nullptr;
     int32_t* i32databuf             = nullptr;
     int64_t* i64databuf             = nullptr;
     bool* bdatabuf                  = nullptr;
@@ -369,9 +371,48 @@ int TosaReference::Tensor::writeToNpyFile(const char* filename) const
             free(f16databuf);
             break;
         case TOSA_REF_TYPE_INT32:
+            i32databuf = (int32_t*)calloc(sizeof(int32_t), elements);
+            ASSERT_MEM(i32databuf);
+
+            if (getTensorValueInt32(elements, i32databuf))
+            {
+                free(i32databuf);
+                return 1;
+            }
+
+            nperror = NumpyUtilities::writeToNpyFile(filename, shape, i32databuf);
+
+            free(i32databuf);
+            break;
         case TOSA_REF_TYPE_UINT8:
+            ui8databuf = (uint8_t*)calloc(sizeof(uint8_t), elements);
+            ASSERT_MEM(ui8databuf);
+
+            if (getTensorValueUInt8(elements, ui8databuf))
+            {
+                free(ui8databuf);
+                return 1;
+            }
+
+            nperror = NumpyUtilities::writeToNpyFile(filename, shape, ui8databuf);
+
+            free(ui8databuf);
+            break;
         case TOSA_REF_TYPE_INT4:
         case TOSA_REF_TYPE_INT8:
+            i8databuf = (int8_t*)calloc(sizeof(int8_t), elements);
+            ASSERT_MEM(i8databuf);
+
+            if (getTensorValueInt8(elements, i8databuf))
+            {
+                free(i8databuf);
+                return 1;
+            }
+
+            nperror = NumpyUtilities::writeToNpyFile(filename, shape, i8databuf);
+
+            free(i8databuf);
+            break;
         case TOSA_REF_TYPE_INT16:
         case TOSA_REF_TYPE_UINT16:
             i32databuf = (int32_t*)calloc(sizeof(int32_t), elements);
@@ -663,6 +704,31 @@ int TosaReference::Tensor::readfromVector(const ArrayProxy<half_float::half> val
     return 0;
 }
 
+int TosaReference::Tensor::readfromVector(const ArrayProxy<int8_t> vals)
+{
+    uint32_t elements = getElementCount();
+    switch (getDtype())
+    {
+        case TOSA_REF_TYPE_INT8:
+        case TOSA_REF_TYPE_UINT8:
+            if (vals.size() != elements)
+            {
+                WARNING("The input size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            setTensorValueInt8(elements, vals.data());
+            break;
+        default:
+            WARNING("The input type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameTOSAREFTYPE(getDtype()));
+            return -2;
+    }
+    setIsValid();
+    return 0;
+}
+
 int TosaReference::Tensor::readfromVector(const ArrayProxy<int16_t> vals)
 {
     uint32_t elements = getElementCount();
@@ -863,6 +929,30 @@ int TosaReference::Tensor::writeToVector(ArrayProxy<half_float::half> vals)
     return 0;
 }
 
+int TosaReference::Tensor::writeToVector(ArrayProxy<int8_t> vals)
+{
+    uint32_t elements = getElementCount();
+    switch (getDtype())
+    {
+        case TOSA_REF_TYPE_INT8:
+        case TOSA_REF_TYPE_UINT8:
+            if (vals.size() != elements)
+            {
+                WARNING("The output size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            getTensorValueInt8(elements, vals.data());
+            break;
+        default:
+            WARNING("The output type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameTOSAREFTYPE(getDtype()));
+            return -2;
+    }
+    return 0;
+}
+
 int TosaReference::Tensor::writeToVector(ArrayProxy<int16_t> vals)
 {
     uint32_t elements = getElementCount();
@@ -1415,15 +1505,15 @@ int TosaReference::Tensor6<double>::setTensorValueFloat(const size_t bufLen, con
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::TensorTemplate<T>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt32 should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueUInt8 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor0<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
@@ -1433,7 +1523,7 @@ int TosaReference::Tensor0<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor1<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor1<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1448,7 +1538,7 @@ int TosaReference::Tensor1<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor2<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor2<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1466,7 +1556,7 @@ int TosaReference::Tensor2<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor3<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor3<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1487,7 +1577,7 @@ int TosaReference::Tensor3<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor4<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor4<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1511,7 +1601,7 @@ int TosaReference::Tensor4<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor5<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor5<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1538,7 +1628,7 @@ int TosaReference::Tensor5<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor6<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
+int TosaReference::Tensor6<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1567,25 +1657,25 @@ int TosaReference::Tensor6<int32_t>::setTensorValueInt16(const size_t bufLen, co
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::TensorTemplate<T>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt32 should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt8 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor0<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
-    (*tensor)(0) = vals[0];
+    (*tensor)(0) = static_cast<int32_t>(vals[0]);
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor1<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor1<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1593,14 +1683,14 @@ int TosaReference::Tensor1<int32_t>::setTensorValueInt32(const size_t bufLen, co
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
-        (*tensor)(i0) = vals[idx++];
+        (*tensor)(i0) = static_cast<int32_t>(vals[idx++]);
     }
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor2<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor2<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1610,7 +1700,7 @@ int TosaReference::Tensor2<int32_t>::setTensorValueInt32(const size_t bufLen, co
     {
         for (int i1 = 0; i1 < shape[1]; i1++)
         {
-            (*tensor)(i0, i1) = vals[idx++];
+            (*tensor)(i0, i1) = static_cast<int32_t>(vals[idx++]);
         }
     }
 
@@ -1618,7 +1708,7 @@ int TosaReference::Tensor2<int32_t>::setTensorValueInt32(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor3<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor3<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1630,7 +1720,7 @@ int TosaReference::Tensor3<int32_t>::setTensorValueInt32(const size_t bufLen, co
         {
             for (int i2 = 0; i2 < shape[2]; i2++)
             {
-                (*tensor)(i0, i1, i2) = vals[idx++];
+                (*tensor)(i0, i1, i2) = static_cast<int32_t>(vals[idx++]);
             }
         }
     }
@@ -1639,7 +1729,7 @@ int TosaReference::Tensor3<int32_t>::setTensorValueInt32(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor4<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor4<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1653,7 +1743,7 @@ int TosaReference::Tensor4<int32_t>::setTensorValueInt32(const size_t bufLen, co
             {
                 for (int i3 = 0; i3 < shape[3]; i3++)
                 {
-                    (*tensor)(i0, i1, i2, i3) = vals[idx++];
+                    (*tensor)(i0, i1, i2, i3) = static_cast<int32_t>(vals[idx++]);
                 }
             }
         }
@@ -1663,7 +1753,7 @@ int TosaReference::Tensor4<int32_t>::setTensorValueInt32(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor5<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor5<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1679,7 +1769,7 @@ int TosaReference::Tensor5<int32_t>::setTensorValueInt32(const size_t bufLen, co
                 {
                     for (int i4 = 0; i4 < shape[4]; i4++)
                     {
-                        (*tensor)(i0, i1, i2, i3, i4) = vals[idx++];
+                        (*tensor)(i0, i1, i2, i3, i4) = static_cast<int32_t>(vals[idx++]);
                     }
                 }
             }
@@ -1690,7 +1780,7 @@ int TosaReference::Tensor5<int32_t>::setTensorValueInt32(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor6<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
+int TosaReference::Tensor6<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1708,7 +1798,7 @@ int TosaReference::Tensor6<int32_t>::setTensorValueInt32(const size_t bufLen, co
                     {
                         for (int i5 = 0; i5 < shape[5]; i5++)
                         {
-                            (*tensor)(i0, i1, i2, i3, i4, i5) = vals[idx++];
+                            (*tensor)(i0, i1, i2, i3, i4, i5) = static_cast<int32_t>(vals[idx++]);
                         }
                     }
                 }
@@ -1719,25 +1809,25 @@ int TosaReference::Tensor6<int32_t>::setTensorValueInt32(const size_t bufLen, co
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::TensorTemplate<T>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt64 should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt32 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor0<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
-    (*tensor)(0) = vals[0];
+    (*tensor)(0) = static_cast<int32_t>(vals[0]);
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor1<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor1<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1745,14 +1835,14 @@ int TosaReference::Tensor1<int64_t>::setTensorValueInt64(const size_t bufLen, co
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
-        (*tensor)(i0) = vals[idx++];
+        (*tensor)(i0) = static_cast<int32_t>(vals[idx++]);
     }
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor2<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor2<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1762,7 +1852,7 @@ int TosaReference::Tensor2<int64_t>::setTensorValueInt64(const size_t bufLen, co
     {
         for (int i1 = 0; i1 < shape[1]; i1++)
         {
-            (*tensor)(i0, i1) = vals[idx++];
+            (*tensor)(i0, i1) = static_cast<int32_t>(vals[idx++]);
         }
     }
 
@@ -1770,7 +1860,7 @@ int TosaReference::Tensor2<int64_t>::setTensorValueInt64(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor3<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor3<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1782,7 +1872,7 @@ int TosaReference::Tensor3<int64_t>::setTensorValueInt64(const size_t bufLen, co
         {
             for (int i2 = 0; i2 < shape[2]; i2++)
             {
-                (*tensor)(i0, i1, i2) = vals[idx++];
+                (*tensor)(i0, i1, i2) = static_cast<int32_t>(vals[idx++]);
             }
         }
     }
@@ -1791,7 +1881,7 @@ int TosaReference::Tensor3<int64_t>::setTensorValueInt64(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor4<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor4<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1805,7 +1895,7 @@ int TosaReference::Tensor4<int64_t>::setTensorValueInt64(const size_t bufLen, co
             {
                 for (int i3 = 0; i3 < shape[3]; i3++)
                 {
-                    (*tensor)(i0, i1, i2, i3) = vals[idx++];
+                    (*tensor)(i0, i1, i2, i3) = static_cast<int32_t>(vals[idx++]);
                 }
             }
         }
@@ -1815,7 +1905,7 @@ int TosaReference::Tensor4<int64_t>::setTensorValueInt64(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor5<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor5<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1831,7 +1921,7 @@ int TosaReference::Tensor5<int64_t>::setTensorValueInt64(const size_t bufLen, co
                 {
                     for (int i4 = 0; i4 < shape[4]; i4++)
                     {
-                        (*tensor)(i0, i1, i2, i3, i4) = vals[idx++];
+                        (*tensor)(i0, i1, i2, i3, i4) = static_cast<int32_t>(vals[idx++]);
                     }
                 }
             }
@@ -1842,7 +1932,7 @@ int TosaReference::Tensor5<int64_t>::setTensorValueInt64(const size_t bufLen, co
 }
 
 template <>
-int TosaReference::Tensor6<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
+int TosaReference::Tensor6<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1860,7 +1950,7 @@ int TosaReference::Tensor6<int64_t>::setTensorValueInt64(const size_t bufLen, co
                     {
                         for (int i5 = 0; i5 < shape[5]; i5++)
                         {
-                            (*tensor)(i0, i1, i2, i3, i4, i5) = vals[idx++];
+                            (*tensor)(i0, i1, i2, i3, i4, i5) = static_cast<int32_t>(vals[idx++]);
                         }
                     }
                 }
@@ -1871,15 +1961,15 @@ int TosaReference::Tensor6<int64_t>::setTensorValueInt64(const size_t bufLen, co
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::setTensorValueBool(const size_t buflen, const bool* vals)
+int TosaReference::TensorTemplate<T>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::setTensorValueBool should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt32 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor0<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
@@ -1889,7 +1979,7 @@ int TosaReference::Tensor0<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor1<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor1<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1904,7 +1994,7 @@ int TosaReference::Tensor1<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor2<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor2<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1922,7 +2012,7 @@ int TosaReference::Tensor2<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor3<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor3<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1943,7 +2033,7 @@ int TosaReference::Tensor3<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor4<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor4<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1967,7 +2057,7 @@ int TosaReference::Tensor4<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor5<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor5<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -1994,7 +2084,7 @@ int TosaReference::Tensor5<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <>
-int TosaReference::Tensor6<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+int TosaReference::Tensor6<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals)
 {
     uint32_t idx = 0;
 
@@ -2023,64 +2113,50 @@ int TosaReference::Tensor6<bool>::setTensorValueBool(const size_t bufLen, const
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::TensorTemplate<T>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::getTensorValueDouble should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt64 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor0<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    int totalVals = 1;
-
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
-    vals[0] = (*tensor)(0);
+    (*tensor)(0) = vals[0];
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor1<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor1<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
-        vals[idx++] = (*tensor)(i0);
+        (*tensor)(i0) = vals[idx++];
     }
 
     return 0;
 }
 
 template <>
-int TosaReference::Tensor2<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor2<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
         for (int i1 = 0; i1 < shape[1]; i1++)
         {
-            vals[idx++] = (*tensor)(i0, i1);
+            (*tensor)(i0, i1) = vals[idx++];
         }
     }
 
@@ -2088,17 +2164,11 @@ int TosaReference::Tensor2<double>::getTensorValueDouble(const size_t bufLen, do
 }
 
 template <>
-int TosaReference::Tensor3<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor3<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
@@ -2106,7 +2176,7 @@ int TosaReference::Tensor3<double>::getTensorValueDouble(const size_t bufLen, do
         {
             for (int i2 = 0; i2 < shape[2]; i2++)
             {
-                vals[idx++] = (*tensor)(i0, i1, i2);
+                (*tensor)(i0, i1, i2) = vals[idx++];
             }
         }
     }
@@ -2115,17 +2185,11 @@ int TosaReference::Tensor3<double>::getTensorValueDouble(const size_t bufLen, do
 }
 
 template <>
-int TosaReference::Tensor4<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor4<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
@@ -2135,7 +2199,7 @@ int TosaReference::Tensor4<double>::getTensorValueDouble(const size_t bufLen, do
             {
                 for (int i3 = 0; i3 < shape[3]; i3++)
                 {
-                    vals[idx++] = (*tensor)(i0, i1, i2, i3);
+                    (*tensor)(i0, i1, i2, i3) = vals[idx++];
                 }
             }
         }
@@ -2145,17 +2209,11 @@ int TosaReference::Tensor4<double>::getTensorValueDouble(const size_t bufLen, do
 }
 
 template <>
-int TosaReference::Tensor5<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor5<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
@@ -2167,7 +2225,7 @@ int TosaReference::Tensor5<double>::getTensorValueDouble(const size_t bufLen, do
                 {
                     for (int i4 = 0; i4 < shape[4]; i4++)
                     {
-                        vals[idx++] = (*tensor)(i0, i1, i2, i3, i4);
+                        (*tensor)(i0, i1, i2, i3, i4) = vals[idx++];
                     }
                 }
             }
@@ -2178,17 +2236,11 @@ int TosaReference::Tensor5<double>::getTensorValueDouble(const size_t bufLen, do
 }
 
 template <>
-int TosaReference::Tensor6<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+int TosaReference::Tensor6<int64_t>::setTensorValueInt64(const size_t bufLen, const int64_t* vals)
 {
-    uint32_t idx  = 0;
-    int totalVals = 1;
-
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        totalVals *= shape[i];
-    }
+    uint32_t idx = 0;
 
-    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
 
     for (int i0 = 0; i0 < shape[0]; i0++)
     {
@@ -2202,7 +2254,7 @@ int TosaReference::Tensor6<double>::getTensorValueDouble(const size_t bufLen, do
                     {
                         for (int i5 = 0; i5 < shape[5]; i5++)
                         {
-                            vals[idx++] = (*tensor)(i0, i1, i2, i3, i4, i5);
+                            (*tensor)(i0, i1, i2, i3, i4, i5) = vals[idx++];
                         }
                     }
                 }
@@ -2213,15 +2265,548 @@ int TosaReference::Tensor6<double>::getTensorValueDouble(const size_t bufLen, do
 }
 
 template <class T>
-int TosaReference::TensorTemplate<T>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::TensorTemplate<T>::setTensorValueBool(const size_t buflen, const bool* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::getTensorValueFloat should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueBool should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
 
 template <>
-int TosaReference::Tensor0<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor0<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    (*tensor)(0) = vals[0];
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        (*tensor)(i0) = vals[idx++];
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            (*tensor)(i0, i1) = vals[idx++];
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                (*tensor)(i0, i1, i2) = vals[idx++];
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    (*tensor)(i0, i1, i2, i3) = vals[idx++];
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        (*tensor)(i0, i1, i2, i3, i4) = vals[idx++];
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<bool>::setTensorValueBool(const size_t bufLen, const bool* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            (*tensor)(i0, i1, i2, i3, i4, i5) = vals[idx++];
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
+int TosaReference::TensorTemplate<T>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueDouble should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    int totalVals = 1;
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    vals[0] = (*tensor)(0);
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        vals[idx++] = (*tensor)(i0);
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            vals[idx++] = (*tensor)(i0, i1);
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                vals[idx++] = (*tensor)(i0, i1, i2);
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    vals[idx++] = (*tensor)(i0, i1, i2, i3);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        vals[idx++] = (*tensor)(i0, i1, i2, i3, i4);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<double>::getTensorValueDouble(const size_t bufLen, double* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            vals[idx++] = (*tensor)(i0, i1, i2, i3, i4, i5);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
+int TosaReference::TensorTemplate<T>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueFloat should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    int totalVals = 1;
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    vals[0] = (*tensor)(0);
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        vals[idx++] = (*tensor)(i0);
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            vals[idx++] = (*tensor)(i0, i1);
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                vals[idx++] = (*tensor)(i0, i1, i2);
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    vals[idx++] = (*tensor)(i0, i1, i2, i3);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        vals[idx++] = (*tensor)(i0, i1, i2, i3, i4);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            vals[idx++] = (*tensor)(i0, i1, i2, i3, i4, i5);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
+int TosaReference::TensorTemplate<T>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
+{
+    std::cout << "T is: " << typeid(T).name() << std::endl;
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueUInt8 should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     int totalVals = 1;
 
@@ -2233,7 +2818,7 @@ int TosaReference::Tensor0<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor1<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor1<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
@@ -2254,7 +2839,7 @@ int TosaReference::Tensor1<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor2<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor2<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
@@ -2278,7 +2863,7 @@ int TosaReference::Tensor2<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor3<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor3<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
@@ -2305,7 +2890,7 @@ int TosaReference::Tensor3<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor4<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor4<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
@@ -2335,7 +2920,7 @@ int TosaReference::Tensor4<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor5<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor5<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
@@ -2368,7 +2953,198 @@ int TosaReference::Tensor5<float>::getTensorValueFloat(const size_t bufLen, floa
 }
 
 template <>
-int TosaReference::Tensor6<float>::getTensorValueFloat(const size_t bufLen, float* vals) const
+int TosaReference::Tensor6<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            vals[idx++] = (*tensor)(i0, i1, i2, i3, i4, i5);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
+int TosaReference::TensorTemplate<T>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    std::cout << "T is: " << typeid(T).name() << std::endl;
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueInt8 should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    int totalVals = 1;
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    vals[0] = (*tensor)(0);
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        vals[idx++] = (*tensor)(i0);
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            vals[idx++] = (*tensor)(i0, i1);
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                vals[idx++] = (*tensor)(i0, i1, i2);
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    vals[idx++] = (*tensor)(i0, i1, i2, i3);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        vals[idx++] = (*tensor)(i0, i1, i2, i3, i4);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const
 {
     uint32_t idx  = 0;
     int totalVals = 1;
diff --git a/reference_model/src/tensor.h b/reference_model/src/tensor.h
index cd71f9f..2c3be7f 100644
--- a/reference_model/src/tensor.h
+++ b/reference_model/src/tensor.h
@@ -241,12 +241,16 @@ public:
 
     virtual int setTensorValueDouble(const size_t bufLen, const double* vals) = 0;
     virtual int setTensorValueFloat(const size_t bufLen, const float* vals)   = 0;
+    virtual int setTensorValueUInt8(const size_t bufLen, const uint8_t* vals) = 0;
+    virtual int setTensorValueInt8(const size_t bufLen, const int8_t* vals)   = 0;
     virtual int setTensorValueInt16(const size_t bufLen, const int16_t* vals) = 0;
     virtual int setTensorValueInt32(const size_t bufLen, const int32_t* vals) = 0;
     virtual int setTensorValueInt64(const size_t bufLen, const int64_t* vals) = 0;
     virtual int setTensorValueBool(const size_t bufLen, const bool* vals)     = 0;
     virtual int getTensorValueDouble(const size_t bufLen, double* fbuf) const = 0;
     virtual int getTensorValueFloat(const size_t bufLen, float* fbuf) const   = 0;
+    virtual int getTensorValueUInt8(const size_t bufLen, uint8_t* ibuf) const = 0;
+    virtual int getTensorValueInt8(const size_t bufLen, int8_t* ibuf) const   = 0;
     virtual int getTensorValueInt16(const size_t bufLen, int16_t* ibuf) const = 0;
     virtual int getTensorValueInt32(const size_t bufLen, int32_t* ibuf) const = 0;
     virtual int getTensorValueInt64(const size_t bufLen, int64_t* ibuf) const = 0;
@@ -259,6 +263,7 @@ public:
     virtual int readfromVector(const ArrayProxy<double> vals);
     virtual int readfromVector(const ArrayProxy<float> vals);
     virtual int readfromVector(const ArrayProxy<half_float::half> vals);
+    virtual int readfromVector(const ArrayProxy<int8_t> vals);
     virtual int readfromVector(const ArrayProxy<int16_t> vals);
     virtual int readfromVector(const ArrayProxy<int32_t> vals);
     virtual int readfromVector(const ArrayProxy<int64_t> vals);
@@ -267,6 +272,7 @@ public:
     virtual int writeToVector(ArrayProxy<double> vals);
     virtual int writeToVector(ArrayProxy<float> vals);
     virtual int writeToVector(ArrayProxy<half_float::half> vals);
+    virtual int writeToVector(ArrayProxy<int8_t> vals);
     virtual int writeToVector(ArrayProxy<int16_t> vals);
     virtual int writeToVector(ArrayProxy<int32_t> vals);
     virtual int writeToVector(ArrayProxy<int64_t> vals);
@@ -361,6 +367,8 @@ public:
 
     virtual int setTensorValueDouble(const size_t bufLen, const double* vals);
     virtual int setTensorValueFloat(const size_t bufLen, const float* vals);
+    virtual int setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+    virtual int setTensorValueInt8(const size_t bufLen, const int8_t* vals);
     virtual int setTensorValueInt16(const size_t bufLen, const int16_t* vals);
     virtual int setTensorValueInt32(const size_t bufLen, const int32_t* vals);
     virtual int setTensorValueInt64(const size_t bufLen, const int64_t* vals);
@@ -368,6 +376,8 @@ public:
 
     virtual int getTensorValueDouble(const size_t bufLen, double* fbuf) const;
     virtual int getTensorValueFloat(const size_t bufLen, float* fbuf) const;
+    virtual int getTensorValueUInt8(const size_t bufLen, uint8_t* ibuf) const;
+    virtual int getTensorValueInt8(const size_t bufLen, int8_t* ibuf) const;
     virtual int getTensorValueInt16(const size_t bufLen, int16_t* ibuf) const;
     virtual int getTensorValueInt32(const size_t bufLen, int32_t* ibuf) const;
     virtual int getTensorValueInt64(const size_t bufLen, int64_t* ibuf) const;
@@ -531,6 +541,36 @@ int Tensor5<bool>::copyValueFrom(Tensor* src);
 template <>
 int Tensor6<bool>::copyValueFrom(Tensor* src);
 
+template <>
+int Tensor0<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor1<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor2<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor3<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor4<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor5<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+template <>
+int Tensor6<int32_t>::setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
+
+template <>
+int Tensor0<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor1<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor2<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor3<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor4<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor5<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+template <>
+int Tensor6<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+
 template <>
 int Tensor0<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals);
 template <>
@@ -561,6 +601,36 @@ int Tensor5<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* va
 template <>
 int Tensor6<int32_t>::setTensorValueInt32(const size_t bufLen, const int32_t* vals);
 
+template <>
+int Tensor0<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor1<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor2<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor3<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor4<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor5<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+template <>
+int Tensor6<int32_t>::getTensorValueUInt8(const size_t bufLen, uint8_t* vals) const;
+
+template <>
+int Tensor0<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor1<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor2<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor3<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor4<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor5<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+template <>
+int Tensor6<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
+
 template <>
 int Tensor0<int32_t>::getTensorValueInt16(const size_t bufLen, int16_t* vals) const;
 template <>
diff --git a/verif/frameworks/tosa_verif_framework_compiler_runner.py b/verif/frameworks/tosa_verif_framework_compiler_runner.py
index ab3db90..ce9b253 100755
--- a/verif/frameworks/tosa_verif_framework_compiler_runner.py
+++ b/verif/frameworks/tosa_verif_framework_compiler_runner.py
@@ -691,12 +691,11 @@ def run_test(args, test_path, framework):
         tf_result = tf_result.astype(np.float64)
     elif tf_result.dtype == np.float16:
         tf_result = tf_result.astype(np.float32)
-    elif (
-        tf_result.dtype == np.uint8
-        or tf_result.dtype == np.int8
-        or tf_result.dtype == np.int16
-        or tf_result.dtype == np.int64
-    ):
+    elif tf_result.dtype == np.int8:
+        tf_result = tf_result.astype(np.int8)
+    elif tf_result.dtype == np.uint8:
+        tf_result = tf_result.astype(np.uint8)
+    elif tf_result.dtype == np.int16 or tf_result.dtype == np.int64:
         tf_result = tf_result.astype(np.int32)
 
     # For now, search for the first output from ref_model
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index b9352ac..28cf392 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -191,6 +191,10 @@ class TosaTestGen:
 
         if dtype == DType.BOOL:
             return np.bool_(self.rng.choice(a=[False, True], size=shape))
+        elif dtype == DType.INT8:
+            return np.int8(self.rng.integers(low=low, high=high, size=shape))
+        elif dtype == DType.UINT8:
+            return np.uint8(self.rng.integers(low=low, high=high, size=shape))
         elif dtype in (DType.INT48, DType.SHAPE):
             return np.int64(self.rng.integers(low=low, high=high, size=shape))
         elif dtype in (DType.FP16, DType.BF16, DType.FP32):
@@ -2079,7 +2083,16 @@ class TosaTestGen:
             val_adj = np.subtract(values, input_zp, dtype=np.int64)
             val_adj = np.maximum(val_adj, min_shift_value_arr, dtype=np.int64)
             val_adj = np.minimum(val_adj, max_shift_value_arr, dtype=np.int64)
-            val_adj = np.add(val_adj, input_zp, dtype=values.dtype)
+            val_adj = np.add(val_adj, input_zp, dtype=np.int64)
+            # Check we can safely convert to the expected dtype
+            assert (
+                val_adj.all() >= np.iinfo(values.dtype).min
+                and val_adj.all() <= np.iinfo(values.dtype).max
+            )
+
+            # Force casting to output datatype
+            val_adj = val_adj.astype(values.dtype, casting="unsafe")
+
             if not np.all(np.array_equal(values, val_adj)):
                 # Values changed so overwrite file with new values
                 np.save(
-- 
cgit v1.2.1