aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sloyan <matthew.sloyan@arm.com>2020-11-09 12:25:05 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-10 16:50:49 +0000
commit0d35a93d68e321e8c4b16baa8b9754b98cc9faf3 (patch)
treed4143415dbd8d7ea2b65b802fe9e18ead9a09e75
parent8b9858d891439fd1b0710e5d245e2116a3b88d30 (diff)
downloadarmnn-0d35a93d68e321e8c4b16baa8b9754b98cc9faf3.tar.gz
IVGCVSW-5398 TfLiteDelegate: Implement the Quantization operators
* Enabled quantization operators DEQUANTIZE and QUANTIZE. * Implemented unit tests for quantization operators. * Added utils function for checking if affine quantization. Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: I84b5c75bda629d9234f5ed198b04f527705a54aa
-rw-r--r--delegate/CMakeLists.txt4
-rw-r--r--delegate/src/DelegateUtils.hpp10
-rw-r--r--delegate/src/Quantization.hpp132
-rw-r--r--delegate/src/test/QuantizationTest.cpp429
-rw-r--r--delegate/src/test/QuantizationTestHelper.hpp197
5 files changed, 761 insertions, 11 deletions
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 814976407b..0a1a3e43e4 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -94,7 +94,9 @@ list(APPEND armnnDelegate_unittest_sources
src/test/ElementwiseBinaryTest.cpp
src/test/ElementwiseBinaryTestHelper.hpp
src/test/ElementwiseUnaryTest.cpp
- src/test/ElementwiseUnaryTestHelper.hpp)
+ src/test/ElementwiseUnaryTestHelper.hpp
+ src/test/QuantizationTest.cpp
+ src/test/QuantizationTestHelper.hpp)
add_executable(DelegateUnitTests ${armnnDelegate_unittest_sources})
target_include_directories(DelegateUnitTests PRIVATE src)
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 00279f630d..f12aee9d2b 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -104,6 +104,16 @@ bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
return false;
}
+bool IsAffineQuantization(const TfLiteTensor& tfLiteTensor)
+{
+ auto quantizationInfo = tfLiteTensor.quantization;
+ if (quantizationInfo.type == kTfLiteAffineQuantization)
+ {
+ return true;
+ }
+ return false;
+}
+
TfLiteStatus Connect(armnn::IConnectableLayer* layer,
TfLiteNode* tfLiteNode,
armnnDelegate::DelegateData& data)
diff --git a/delegate/src/Quantization.hpp b/delegate/src/Quantization.hpp
index 31196233f9..4adbd11616 100644
--- a/delegate/src/Quantization.hpp
+++ b/delegate/src/Quantization.hpp
@@ -13,22 +13,134 @@
namespace armnnDelegate
{
+TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
+ TfLiteContext* tfLiteContext,
+ TfLiteNode* tfLiteNode,
+ int nodeIndex,
+ int32_t tfLiteDequantizeOperatorCode)
+{
+ TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+ TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+ const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+ const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+ if (IsDynamicTensor(tfLiteInputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+ tfLiteDequantizeOperatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+ if (IsDynamicTensor(tfLiteOutputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+ tfLiteDequantizeOperatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+ const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+ bool isSupported = false;
+ auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ tfLiteContext,
+ IsDequantizeSupported,
+ delegateData.m_Backends,
+ isSupported,
+ inputTensorInfo,
+ outputTensorInfo);
+ };
+
+ if (!delegateData.m_Network)
+ {
+ validateFunc(outputTensorInfo, isSupported);
+ return isSupported ? kTfLiteOk : kTfLiteError;
+ }
+
+ armnn::IConnectableLayer* dequantizeLayer = delegateData.m_Network->AddDequantizeLayer();
+ ARMNN_ASSERT(dequantizeLayer != nullptr);
+
+ armnn::IOutputSlot& outputSlot = dequantizeLayer->GetOutputSlot(0);
+ outputSlot.SetTensorInfo(outputTensorInfo);
+
+ return Connect(dequantizeLayer, tfLiteNode, delegateData);
+}
+
TfLiteStatus VisitQuantizeOperator(DelegateData& delegateData,
TfLiteContext* tfLiteContext,
TfLiteNode* tfLiteNode,
int nodeIndex,
- int32_t operatorCode)
+ int32_t tfLiteQuantizeOperatorCode)
{
- return kTfLiteError;
-}
+ TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+ TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
-TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
- TfLiteContext* tfLiteContext,
- TfLiteNode* tfLiteNode,
- int nodeIndex,
- int32_t operatorCode)
-{
- return kTfLiteError;
+ const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+ const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+ if (IsDynamicTensor(tfLiteInputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+ tfLiteQuantizeOperatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+ if (IsDynamicTensor(tfLiteOutputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+ tfLiteQuantizeOperatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ // Only affine per-layer quantization is supported.
+ if (!IsAffineQuantization(tfLiteOutputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Only affine per-layer quantization is supported in operator #%d node #%d: ",
+ tfLiteQuantizeOperatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+ const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+ bool isSupported = false;
+ auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ tfLiteContext,
+ IsQuantizeSupported,
+ delegateData.m_Backends,
+ isSupported,
+ inputTensorInfo,
+ outputTensorInfo);
+ };
+
+ if (!delegateData.m_Network)
+ {
+ validateFunc(outputTensorInfo, isSupported);
+ return isSupported ? kTfLiteOk : kTfLiteError;
+ }
+
+ armnn::IConnectableLayer* quantizeLayer = delegateData.m_Network->AddQuantizeLayer();
+ ARMNN_ASSERT(quantizeLayer != nullptr);
+
+ armnn::IOutputSlot& outputSlot = quantizeLayer->GetOutputSlot(0);
+ outputSlot.SetTensorInfo(outputTensorInfo);
+
+ return Connect(quantizeLayer, tfLiteNode, delegateData);
}
} // namespace armnnDelegate
diff --git a/delegate/src/test/QuantizationTest.cpp b/delegate/src/test/QuantizationTest.cpp
new file mode 100644
index 0000000000..5466d47f48
--- /dev/null
+++ b/delegate/src/test/QuantizationTest.cpp
@@ -0,0 +1,429 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "QuantizationTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// Dequantize operator test functions.
+void DequantizeUint8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ // Set input and output data
+ std::vector<uint8_t> inputValues
+ {
+ 0, 1, 2, 3, // Lower bounds
+ 252, 253, 254, 255 // Upper bounds
+ };
+ std::vector<float> expectedOutputValues
+ {
+ 0.f, 1.f, 2.f, 3.f,
+ 252.f, 253.f, 254.f, 255.f
+ };
+
+ QuantizationTest<uint8_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+ ::tflite::TensorType_UINT8,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void DequantizeInt8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<int8_t> inputValues
+ {
+ -1, 0, 1, 2,
+ -128, -127, 126, 127
+ };
+ std::vector<float> expectedOutputValues
+ {
+ -1.f, 0.f, 1.f, 2.f,
+ -128.f, -127.f, 126.f, 127.f
+ };
+
+ QuantizationTest<int8_t , float>(tflite::BuiltinOperator_DEQUANTIZE,
+ ::tflite::TensorType_INT8,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void DequantizeInt16Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 5 };
+ std::vector<int32_t> outputShape { 2, 5 };
+
+ std::vector<int16_t> inputValues
+ {
+ -1, 0, 1, 2,
+ -32768, -16384, 16384, 32767
+ };
+ std::vector<float> expectedOutputValues
+ {
+ -1.f, 0.f, 1.f, 2.f,
+ -32768.f, -16384.f, 16384.f, 32767.f
+ };
+
+ QuantizationTest<int16_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+ ::tflite::TensorType_INT16,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+// Quantize operator test functions.
+void QuantizeFloat32Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ // Set input and output data
+ std::vector<float> inputValues
+ {
+ -1.f, 0.f, 1.f, 2.f, // Lower bounds
+ 252.f, 253.f, 255.f, 256.f // Upper bounds
+ };
+ std::vector<uint8_t> expectedOutputValues
+ {
+ 0, 0, 1, 2,
+ 252, 253, 255, 255
+ };
+
+ QuantizationTest<float, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_FLOAT32,
+ ::tflite::TensorType_UINT8,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeFloat32Int8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<float> inputValues
+ {
+ -1.f, 0.f, 1.f, 2.f,
+ -128.5f, -127.f, 126.f, 127.5f
+ };
+ std::vector<int8_t> expectedOutputValues
+ {
+ -1, 0, 1, 2,
+ -128, -127, 126, 127
+ };
+
+ QuantizationTest<float, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_FLOAT32,
+ ::tflite::TensorType_INT8,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeFloat32Int16Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<float> inputValues
+ {
+ -1.f, 0.f, 1.f, 2.f,
+ -32768.5f, -16384.f, 16384.f, 32767.5f
+ };
+ std::vector<int16_t> expectedOutputValues
+ {
+ -1, 0, 1, 2,
+ -32768, -16384, 16384, 32767
+ };
+
+ QuantizationTest<float, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_FLOAT32,
+ ::tflite::TensorType_INT16,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeInt16Int16Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<int16_t> inputValues
+ {
+ -1, 0, 1, 2,
+ -32768, -16384, 16384, 32767
+ };
+ std::vector<int16_t> expectedOutputValues
+ {
+ -1, 0, 1, 2,
+ -32768, -16384, 16384, 32767
+ };
+
+ QuantizationTest<int16_t, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_INT16,
+ ::tflite::TensorType_INT16,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeInt16Int8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<int16_t> inputValues
+ {
+ -1, 0, 1, 2,
+ -32768, -16384, 16384, 32767
+ };
+ std::vector<int8_t> expectedOutputValues
+ {
+ -1, 0, 1, 2,
+ -128, -128, 127, 127
+ };
+
+ QuantizationTest<int16_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_INT16,
+ ::tflite::TensorType_INT8,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeInt8Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<int8_t> inputValues
+ {
+ -1, 0, 1, 2,
+ -128, -127, 126, 127
+ };
+ std::vector<uint8_t> expectedOutputValues
+ {
+ 0, 0, 1, 2,
+ 0, 0, 126, 127
+ };
+
+ QuantizationTest<int8_t, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_INT8,
+ ::tflite::TensorType_UINT8,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+void QuantizeUint8Int8Test(std::vector<armnn::BackendId>& backends)
+{
+ std::vector<int32_t> inputShape { 2, 4 };
+ std::vector<int32_t> outputShape { 2, 4 };
+
+ std::vector<uint8_t> inputValues
+ {
+ 0, 1, 2, 3,
+ 126, 127, 254, 255
+ };
+ std::vector<int8_t> expectedOutputValues
+ {
+ 0, 1, 2, 3,
+ 126, 127, 127, 127
+ };
+
+ QuantizationTest<uint8_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+ ::tflite::TensorType_UINT8,
+ ::tflite::TensorType_INT8,
+ backends,
+ inputShape,
+ outputShape,
+ inputValues,
+ expectedOutputValues);
+}
+
+TEST_SUITE("QuantizationTests")
+{
+
+// Dequantize Operator Tests
+TEST_CASE ("DEQUANTIZE_UINT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_UINT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeInt16Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ DequantizeInt16Test(backends);
+}
+
+// Quantize Operator Tests
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT16_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT16_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeFloat32Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT16_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt16Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT16_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt16Int16Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt16Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT16_INT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt16Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_GpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeUint8Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::CpuRef };
+ QuantizeUint8Int8Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate \ No newline at end of file
diff --git a/delegate/src/test/QuantizationTestHelper.hpp b/delegate/src/test/QuantizationTestHelper.hpp
new file mode 100644
index 0000000000..2843e43233
--- /dev/null
+++ b/delegate/src/test/QuantizationTestHelper.hpp
@@ -0,0 +1,197 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateQuantizationTfLiteModel(tflite::BuiltinOperator quantizationOperatorCode,
+ tflite::TensorType inputTensorType,
+ tflite::TensorType outputTensorType,
+ const std::vector <int32_t>& inputTensorShape,
+ const std::vector <int32_t>& outputTensorShape,
+ float quantScale = 1.0f,
+ int quantOffset = 0)
+{
+ using namespace tflite;
+ flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+ std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+ buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+ auto quantizationParameters =
+ CreateQuantizationParameters(flatBufferBuilder,
+ 0,
+ 0,
+ flatBufferBuilder.CreateVector<float>({ quantScale }),
+ flatBufferBuilder.CreateVector<int64_t>({ quantOffset }),
+ QuantizationDetails_CustomQuantization);
+
+ std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+ tensors[0] = CreateTensor(flatBufferBuilder,
+ flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+ inputTensorShape.size()),
+ inputTensorType,
+ 0,
+ flatBufferBuilder.CreateString("input"),
+ quantizationParameters);
+ tensors[1] = CreateTensor(flatBufferBuilder,
+ flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+ outputTensorShape.size()),
+ outputTensorType,
+ 0,
+ flatBufferBuilder.CreateString("output"),
+ quantizationParameters);
+
+ // create operator
+ tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+ flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+ switch (quantizationOperatorCode)
+ {
+ case BuiltinOperator_QUANTIZE:
+ {
+ operatorBuiltinOptionsType = BuiltinOptions_QuantizeOptions;
+ operatorBuiltinOptions = CreateQuantizeOptions(flatBufferBuilder).Union();
+ break;
+ }
+ case BuiltinOperator_DEQUANTIZE:
+ {
+ operatorBuiltinOptionsType = BuiltinOptions_DequantizeOptions;
+ operatorBuiltinOptions = CreateDequantizeOptions(flatBufferBuilder).Union();
+ break;
+ }
+ default:
+ break;
+ }
+
+ const std::vector<int32_t> operatorInputs{ {0} };
+ const std::vector<int32_t> operatorOutputs{{1}};
+ flatbuffers::Offset <Operator> quantizationOperator =
+ CreateOperator(flatBufferBuilder,
+ 0,
+ flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+ flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+ operatorBuiltinOptionsType,
+ operatorBuiltinOptions);
+
+ const std::vector<int> subgraphInputs{ {0} };
+ const std::vector<int> subgraphOutputs{{1}};
+ flatbuffers::Offset <SubGraph> subgraph =
+ CreateSubGraph(flatBufferBuilder,
+ flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+ flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+ flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+ flatBufferBuilder.CreateVector(&quantizationOperator, 1));
+
+ flatbuffers::Offset <flatbuffers::String> modelDescription =
+ flatBufferBuilder.CreateString("ArmnnDelegate: Quantization Operator Model");
+ flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, quantizationOperatorCode);
+
+ flatbuffers::Offset <Model> flatbufferModel =
+ CreateModel(flatBufferBuilder,
+ TFLITE_SCHEMA_VERSION,
+ flatBufferBuilder.CreateVector(&operatorCode, 1),
+ flatBufferBuilder.CreateVector(&subgraph, 1),
+ modelDescription,
+ flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+ flatBufferBuilder.Finish(flatbufferModel);
+
+ return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+ flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename InputT, typename OutputT>
+void QuantizationTest(tflite::BuiltinOperator quantizeOperatorCode,
+ tflite::TensorType inputTensorType,
+ tflite::TensorType outputTensorType,
+ std::vector<armnn::BackendId>& backends,
+ std::vector<int32_t>& inputShape,
+ std::vector<int32_t>& outputShape,
+ std::vector<InputT>& inputValues,
+ std::vector<OutputT>& expectedOutputValues,
+ float quantScale = 1.0f,
+ int quantOffset = 0)
+{
+ using namespace tflite;
+ std::vector<char> modelBuffer = CreateQuantizationTfLiteModel(quantizeOperatorCode,
+ inputTensorType,
+ outputTensorType,
+ inputShape,
+ outputShape,
+ quantScale,
+ quantOffset);
+
+ const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+ // Create TfLite Interpreters
+ std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+ CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+ (&armnnDelegateInterpreter) == kTfLiteOk);
+ CHECK(armnnDelegateInterpreter != nullptr);
+ CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+ std::unique_ptr<Interpreter> tfLiteInterpreter;
+ CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+ (&tfLiteInterpreter) == kTfLiteOk);
+ CHECK(tfLiteInterpreter != nullptr);
+ CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+ // Create the ArmNN Delegate
+ armnnDelegate::DelegateOptions delegateOptions(backends);
+ std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+ theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+ armnnDelegate::TfLiteArmnnDelegateDelete);
+ CHECK(theArmnnDelegate != nullptr);
+
+ // Modify armnnDelegateInterpreter to use armnnDelegate
+ CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+ // Set input data
+ auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+ auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<InputT>(tfLiteDelegateInputId);
+ for (unsigned int i = 0; i < inputValues.size(); ++i)
+ {
+ tfLiteDelageInputData[i] = inputValues[i];
+ }
+
+ auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+ auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<InputT>(armnnDelegateInputId);
+ for (unsigned int i = 0; i < inputValues.size(); ++i)
+ {
+ armnnDelegateInputData[i] = inputValues[i];
+ }
+
+ // Run EnqueWorkload
+ CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+ CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+ // Compare output data
+ auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+ auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<OutputT>(tfLiteDelegateOutputId);
+ auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+ auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<OutputT>(armnnDelegateOutputId);
+
+ for (size_t i = 0; i < expectedOutputValues.size(); i++)
+ {
+ CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+ CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+ CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+ }
+}
+
+} // anonymous namespace \ No newline at end of file