From 0d35a93d68e321e8c4b16baa8b9754b98cc9faf3 Mon Sep 17 00:00:00 2001
From: Matthew Sloyan <matthew.sloyan@arm.com>
Date: Mon, 9 Nov 2020 12:25:05 +0000
Subject: IVGCVSW-5398 TfLiteDelegate: Implement the Quantization operators

 * Enabled quantization operators DEQUANTIZE and QUANTIZE.
 * Implemented unit tests for quantization operators.
 * Added utils function for checking if affine quantization.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I84b5c75bda629d9234f5ed198b04f527705a54aa
---
 delegate/CMakeLists.txt                      |   4 +-
 delegate/src/DelegateUtils.hpp               |  10 +
 delegate/src/Quantization.hpp                | 132 ++++++++-
 delegate/src/test/QuantizationTest.cpp       | 429 +++++++++++++++++++++++++++
 delegate/src/test/QuantizationTestHelper.hpp | 197 ++++++++++++
 5 files changed, 761 insertions(+), 11 deletions(-)
 create mode 100644 delegate/src/test/QuantizationTest.cpp
 create mode 100644 delegate/src/test/QuantizationTestHelper.hpp

diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 814976407b..0a1a3e43e4 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -94,7 +94,9 @@ list(APPEND armnnDelegate_unittest_sources
         src/test/ElementwiseBinaryTest.cpp
         src/test/ElementwiseBinaryTestHelper.hpp
         src/test/ElementwiseUnaryTest.cpp
-        src/test/ElementwiseUnaryTestHelper.hpp)
+        src/test/ElementwiseUnaryTestHelper.hpp
+        src/test/QuantizationTest.cpp
+        src/test/QuantizationTestHelper.hpp)
 
 add_executable(DelegateUnitTests ${armnnDelegate_unittest_sources})
 target_include_directories(DelegateUnitTests PRIVATE src)
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 00279f630d..f12aee9d2b 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -104,6 +104,16 @@ bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
     return false;
 }
 
+bool IsAffineQuantization(const TfLiteTensor& tfLiteTensor)
+{
+    auto quantizationInfo = tfLiteTensor.quantization;
+    if (quantizationInfo.type == kTfLiteAffineQuantization)
+    {
+        return true;
+    }
+    return false;
+}
+
 TfLiteStatus Connect(armnn::IConnectableLayer* layer,
                      TfLiteNode* tfLiteNode,
                      armnnDelegate::DelegateData& data)
diff --git a/delegate/src/Quantization.hpp b/delegate/src/Quantization.hpp
index 31196233f9..4adbd11616 100644
--- a/delegate/src/Quantization.hpp
+++ b/delegate/src/Quantization.hpp
@@ -13,22 +13,134 @@
 namespace armnnDelegate
 {
 
+TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     TfLiteNode* tfLiteNode,
+                                     int nodeIndex,
+                                     int32_t tfLiteDequantizeOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteDequantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteDequantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDequantizeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* dequantizeLayer = delegateData.m_Network->AddDequantizeLayer();
+    ARMNN_ASSERT(dequantizeLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = dequantizeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    return Connect(dequantizeLayer, tfLiteNode, delegateData);
+}
+
 TfLiteStatus VisitQuantizeOperator(DelegateData& delegateData,
                                    TfLiteContext* tfLiteContext,
                                    TfLiteNode* tfLiteNode,
                                    int nodeIndex,
-                                   int32_t operatorCode)
+                                   int32_t tfLiteQuantizeOperatorCode)
 {
-    return kTfLiteError;
-}
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
 
-TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
-                                     TfLiteContext* tfLiteContext,
-                                     TfLiteNode* tfLiteNode,
-                                     int nodeIndex,
-                                     int32_t operatorCode)
-{
-    return kTfLiteError;
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Only affine per-layer quantization is supported.
+    if (!IsAffineQuantization(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Only affine per-layer quantization is supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsQuantizeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* quantizeLayer = delegateData.m_Network->AddQuantizeLayer();
+    ARMNN_ASSERT(quantizeLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = quantizeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    return Connect(quantizeLayer, tfLiteNode, delegateData);
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/test/QuantizationTest.cpp b/delegate/src/test/QuantizationTest.cpp
new file mode 100644
index 0000000000..5466d47f48
--- /dev/null
+++ b/delegate/src/test/QuantizationTest.cpp
@@ -0,0 +1,429 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "QuantizationTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// Dequantize operator test functions.
+void DequantizeUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    // Set input and output data
+    std::vector<uint8_t> inputValues
+    {
+        0, 1, 2, 3, // Lower bounds
+        252, 253, 254, 255 // Upper bounds
+    };
+    std::vector<float> expectedOutputValues
+    {
+        0.f, 1.f, 2.f, 3.f,
+        252.f, 253.f, 254.f, 255.f
+    };
+
+    QuantizationTest<uint8_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_UINT8,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void DequantizeInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int8_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -128.f, -127.f, 126.f, 127.f
+    };
+
+    QuantizationTest<int8_t , float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_INT8,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void DequantizeInt16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 5 };
+    std::vector<int32_t> outputShape { 2, 5 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -32768.f, -16384.f, 16384.f, 32767.f
+    };
+
+    QuantizationTest<int16_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_INT16,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+// Quantize operator test functions.
+void QuantizeFloat32Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    // Set input and output data
+    std::vector<float> inputValues
+    {
+         -1.f, 0.f, 1.f, 2.f, // Lower bounds
+         252.f, 253.f, 255.f, 256.f // Upper bounds
+    };
+    std::vector<uint8_t> expectedOutputValues
+    {
+        0, 0, 1, 2,
+        252, 253, 255, 255
+    };
+
+    QuantizationTest<float, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_FLOAT32,
+                                     ::tflite::TensorType_UINT8,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeFloat32Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<float> inputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -128.5f, -127.f, 126.f, 127.5f
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+
+    QuantizationTest<float, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_FLOAT32,
+                                     ::tflite::TensorType_INT8,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeFloat32Int16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<float> inputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -32768.5f, -16384.f, 16384.f, 32767.5f
+    };
+    std::vector<int16_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+
+    QuantizationTest<float, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+                                    ::tflite::TensorType_FLOAT32,
+                                    ::tflite::TensorType_INT16,
+                                    backends,
+                                    inputShape,
+                                    outputShape,
+                                    inputValues,
+                                    expectedOutputValues);
+}
+
+void QuantizeInt16Int16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<int16_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+
+    QuantizationTest<int16_t, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_INT16,
+                                     ::tflite::TensorType_INT16,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeInt16Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -128, -128, 127, 127
+    };
+
+    QuantizationTest<int16_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                       ::tflite::TensorType_INT16,
+                                       ::tflite::TensorType_INT8,
+                                       backends,
+                                       inputShape,
+                                       outputShape,
+                                       inputValues,
+                                       expectedOutputValues);
+}
+
+void QuantizeInt8Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int8_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+    std::vector<uint8_t> expectedOutputValues
+    {
+        0, 0, 1, 2,
+        0, 0, 126, 127
+    };
+
+    QuantizationTest<int8_t, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                      ::tflite::TensorType_INT8,
+                                      ::tflite::TensorType_UINT8,
+                                      backends,
+                                      inputShape,
+                                      outputShape,
+                                      inputValues,
+                                      expectedOutputValues);
+}
+
+void QuantizeUint8Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<uint8_t> inputValues
+    {
+        0, 1, 2, 3,
+        126, 127, 254, 255
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        0, 1, 2, 3,
+        126, 127, 127, 127
+    };
+
+    QuantizationTest<uint8_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                      ::tflite::TensorType_UINT8,
+                                      ::tflite::TensorType_INT8,
+                                      backends,
+                                      inputShape,
+                                      outputShape,
+                                      inputValues,
+                                      expectedOutputValues);
+}
+
+TEST_SUITE("QuantizationTests")
+{
+
+// Dequantize Operator Tests
+TEST_CASE ("DEQUANTIZE_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeInt16Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    DequantizeInt16Test(backends);
+}
+
+// Quantize Operator Tests
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeFloat32Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt16Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt16Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt16Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt16Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeUint8Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    QuantizeUint8Int8Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/QuantizationTestHelper.hpp b/delegate/src/test/QuantizationTestHelper.hpp
new file mode 100644
index 0000000000..2843e43233
--- /dev/null
+++ b/delegate/src/test/QuantizationTestHelper.hpp
@@ -0,0 +1,197 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateQuantizationTfLiteModel(tflite::BuiltinOperator quantizationOperatorCode,
+                                                tflite::TensorType inputTensorType,
+                                                tflite::TensorType outputTensorType,
+                                                const std::vector <int32_t>& inputTensorShape,
+                                                const std::vector <int32_t>& outputTensorShape,
+                                                float quantScale = 1.0f,
+                                                int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }),
+                                         QuantizationDetails_CustomQuantization);
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              outputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (quantizationOperatorCode)
+    {
+        case BuiltinOperator_QUANTIZE:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_QuantizeOptions;
+            operatorBuiltinOptions = CreateQuantizeOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_DEQUANTIZE:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_DequantizeOptions;
+            operatorBuiltinOptions = CreateDequantizeOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+
+    const std::vector<int32_t> operatorInputs{ {0} };
+    const std::vector<int32_t> operatorOutputs{{1}};
+    flatbuffers::Offset <Operator> quantizationOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0} };
+    const std::vector<int> subgraphOutputs{{1}};
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&quantizationOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Quantization Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, quantizationOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename InputT, typename OutputT>
+void QuantizationTest(tflite::BuiltinOperator quantizeOperatorCode,
+                      tflite::TensorType inputTensorType,
+                      tflite::TensorType outputTensorType,
+                      std::vector<armnn::BackendId>& backends,
+                      std::vector<int32_t>& inputShape,
+                      std::vector<int32_t>& outputShape,
+                      std::vector<InputT>&  inputValues,
+                      std::vector<OutputT>& expectedOutputValues,
+                      float quantScale = 1.0f,
+                      int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateQuantizationTfLiteModel(quantizeOperatorCode,
+                                                                  inputTensorType,
+                                                                  outputTensorType,
+                                                                  inputShape,
+                                                                  outputShape,
+                                                                  quantScale,
+                                                                  quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<InputT>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<InputT>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<OutputT>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<OutputT>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
\ No newline at end of file
-- 
cgit v1.2.1