From 9c3cae8683e4b24932446b88d3ecbc02f9f9fa08 Mon Sep 17 00:00:00 2001
From: James Conroy <james.conroy@arm.com>
Date: Thu, 1 Aug 2019 16:01:48 +0100
Subject: IVGCVSW-3470 Add Quantized_LSTM tests

  * Added Layer and Create Workload tests
    for the new Quantized LSTM layer.
  * Tests to be enabled on NEON and CL in
    their respective patches.

Signed-off-by: James Conroy <james.conroy@arm.com>
Change-Id: I7e9e9768dd63010ab58367c45fffcff452377cfb
---
 src/armnn/test/CreateWorkload.hpp                 | 140 ++++++++++++++++
 src/backends/backendsCommon/WorkloadData.cpp      | 171 +++++++++++++++++++
 src/backends/backendsCommon/test/LayerTests.cpp   |  18 +-
 src/backends/backendsCommon/test/LayerTests.hpp   |   5 +
 src/backends/backendsCommon/test/LstmTestImpl.hpp | 191 +++++++++++++++++++++-
 5 files changed, 523 insertions(+), 2 deletions(-)
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 98cdfaff0e..3ec7e8e673 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -347,6 +347,146 @@ std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& fa
     return workload;
 }
 
+template <typename QuantizedLstmWorkload>
+std::unique_ptr<QuantizedLstmWorkload> CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                       armnn::Graph& graph)
+{
+
+    auto layer = graph.AddLayer<QuantizedLstmLayer>("quantizedLstmlayer");
+    unsigned int numBatches = 2;
+    unsigned int inputSize = 2;
+    unsigned int outputSize = 4;
+
+    // Scale/Offset for input/output, cellState In/Out, weights, bias
+    float inputOutputScale = 0.0078125f;
+    int32_t inputOutputOffset = 128;
+
+    float cellStateScale = 0.00048828125f;
+    int32_t cellStateOffset = 0;
+
+    float weightsScale = 0.00408021f;
+    int32_t weightsOffset = 100;
+
+    float biasScale = 3.1876640625e-05f;
+    int32_t biasOffset = 0;
+
+    // Weights and bias tensor and quantization info
+    armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
+                                       armnn::DataType::QuantisedAsymm8,
+                                       weightsScale,
+                                       weightsOffset);
+
+    armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
+                                           armnn::DataType::QuantisedAsymm8,
+                                           weightsScale,
+                                           weightsOffset);
+
+    armnn::TensorInfo biasInfo({outputSize},
+                               armnn::DataType::Signed32,
+                               biasScale,
+                               biasOffset);
+
+    // Weights and bias
+    layer->m_QuantizedLstmParameters.m_InputToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(inputWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
+            std::make_unique<ScopedCpuTensorHandle>(inputWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_InputToCellWeights =
+            std::make_unique<ScopedCpuTensorHandle>(inputWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(inputWeightsInfo);
+
+    layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(recurrentWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
+            std::make_unique<ScopedCpuTensorHandle>(recurrentWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
+            std::make_unique<ScopedCpuTensorHandle>(recurrentWeightsInfo);
+    layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(recurrentWeightsInfo);
+
+    layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedCpuTensorHandle>(biasInfo);
+    layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle>(biasInfo);
+    layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle>(biasInfo);
+    layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle>(biasInfo);
+
+    // Allocate weights and bias
+    layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
+
+    layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
+    layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
+
+    layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
+    layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
+    layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
+    layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
+
+    // Create input and output layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const cellStateIn = graph.AddLayer<InputLayer>(1, "cellStateIn");
+    Layer* const outputStateIn = graph.AddLayer<InputLayer>(2, "outputStateIn");
+
+    Layer* const cellStateOut = graph.AddLayer<OutputLayer>(0, "cellStateOut");
+    Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
+
+    // Input/output tensor info and quantization info
+    armnn::TensorInfo inputInfo({numBatches , inputSize},
+                                armnn::DataType::QuantisedAsymm8,
+                                inputOutputScale,
+                                inputOutputOffset);
+
+    armnn::TensorInfo cellStateInfo({numBatches , outputSize},
+                                    armnn::DataType::QuantisedSymm16,
+                                    cellStateScale,
+                                    cellStateOffset);
+
+    armnn::TensorInfo outputStateInfo({numBatches , outputSize},
+                                      armnn::DataType::QuantisedAsymm8,
+                                      inputOutputScale,
+                                      inputOutputOffset);
+
+    // Connect input/output slots
+    Connect(input, layer, inputInfo, 0, 0);
+    Connect(cellStateIn, layer, cellStateInfo, 0, 1);
+    Connect(outputStateIn, layer, outputStateInfo, 0, 2);
+
+    Connect(layer, cellStateOut, cellStateInfo, 0, 0);
+    Connect(layer, outputStateOut, outputStateInfo, 1, 0);
+
+    CreateTensorHandles(graph, factory);
+
+    // Create workload and check layer support
+    auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, graph, factory);
+    QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
+
+    // Validate input/output sizes
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 3);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 2);
+
+    // Validate weight tensor info
+    BOOST_TEST((queueDescriptor.m_InputToInputWeights->GetTensorInfo() == inputWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
+
+    BOOST_TEST((queueDescriptor.m_RecurrentToInputWeights->GetTensorInfo() == recurrentWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
+    BOOST_TEST((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
+
+    BOOST_TEST((queueDescriptor.m_InputGateBias->GetTensorInfo() == biasInfo));
+    BOOST_TEST((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
+    BOOST_TEST((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
+    BOOST_TEST((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
+
+    return workload;
+}
+
 template <typename Convolution2dWorkload, armnn::DataType DataType>
 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
                                                                        armnn::Graph&            graph)
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 88cd6a69d6..a4d35827fa 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -2266,4 +2266,175 @@ void TransposeConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
     }
 }
 
+void QuantizedLstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    const std::string descriptorName{"QuantizedLstmQueueDescriptor"};
+
+    // Validate number of inputs/outputs
+    ValidateNumInputs(workloadInfo,  descriptorName, 3);
+    ValidateNumOutputs(workloadInfo, descriptorName, 2);
+
+    // Input/output tensor infos
+    auto inputInfo = workloadInfo.m_InputTensorInfos[0];
+    auto cellStateInInfo = workloadInfo.m_InputTensorInfos[1];
+    auto outputStateInInfo = workloadInfo.m_InputTensorInfos[2];
+
+    auto cellStateOutInfo = workloadInfo.m_OutputTensorInfos[0];
+    auto outputStateOutInfo = workloadInfo.m_OutputTensorInfos[1];
+
+    std::vector<DataType> inputOutputSupportedTypes =
+    {
+        DataType::QuantisedAsymm8
+    };
+
+    std::vector<DataType> cellStateSupportedTypes =
+    {
+        DataType::QuantisedSymm16
+    };
+
+    std::vector<DataType> weightsSupportedTypes =
+    {
+        DataType::QuantisedAsymm8
+    };
+
+    std::vector<DataType> biasSupportedTypes =
+    {
+        DataType::Signed32
+    };
+
+    // Validate types of input/output tensors
+    ValidateDataTypes(inputInfo, inputOutputSupportedTypes, descriptorName);
+    ValidateDataTypes(cellStateInInfo, cellStateSupportedTypes, descriptorName);
+    ValidateDataTypes(outputStateInInfo, inputOutputSupportedTypes, descriptorName);
+
+    ValidateDataTypes(cellStateOutInfo, cellStateSupportedTypes, descriptorName);
+    ValidateDataTypes(outputStateOutInfo, inputOutputSupportedTypes, descriptorName);
+
+    // Validate matching types of input/output tensors
+    ValidateTensorDataTypesMatch(inputInfo, outputStateInInfo, descriptorName, "input", "outputStateIn");
+    ValidateTensorDataTypesMatch(outputStateInInfo, outputStateOutInfo, descriptorName,
+                                 "outputStateIn", "outputStateOut");
+    ValidateTensorDataTypesMatch(cellStateInInfo, cellStateOutInfo, descriptorName, "cellStateIn", "cellStateOut");
+
+    // Validate matching quantization info for input/output tensors
+    ValidateTensorQuantizationSpace(inputInfo, outputStateInInfo, descriptorName, "input", "outputStateIn");
+    ValidateTensorQuantizationSpace(inputInfo, outputStateOutInfo, descriptorName, "input", "outputStateOut");
+    ValidateTensorQuantizationSpace(cellStateInInfo, cellStateOutInfo, descriptorName, "cellStateIn", "cellStateOut");
+    
+    // Infer number of batches, input size and output size from tensor dimensions
+    const uint32_t numBatches = inputInfo.GetShape()[0];
+    const uint32_t inputSize  = inputInfo.GetShape()[1];
+    const uint32_t outputSize = cellStateInInfo.GetShape()[1];
+
+    // Validate number of dimensions and number of elements for input/output tensors
+    ValidateTensorNumDimNumElem(inputInfo, 2, (numBatches * inputSize), descriptorName + " input");
+    ValidateTensorNumDimNumElem(cellStateInInfo, 2, (numBatches * outputSize), descriptorName + " cellStateIn");
+    ValidateTensorNumDimNumElem(outputStateInInfo, 2, (numBatches * outputSize), descriptorName + " outputStateIn");
+    ValidateTensorNumDimNumElem(cellStateOutInfo, 2, (numBatches * outputSize), descriptorName + " cellStateOut");
+    ValidateTensorNumDimNumElem(outputStateOutInfo, 2, (numBatches * outputSize), descriptorName + " outputStateOut");
+
+    // Validate number of dimensions and number of elements for weights tensors
+    ValidatePointer(m_InputToInputWeights, descriptorName, "InputToInputWeights");
+    auto inputToInputWeightsInfo = m_InputToInputWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(inputToInputWeightsInfo, 2, (outputSize * inputSize), " InputToInputWeights");
+
+    ValidatePointer(m_InputToForgetWeights, descriptorName, "InputToForgetWeights");
+    auto inputToForgetWeightsInfo = m_InputToForgetWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(inputToForgetWeightsInfo, 2, (outputSize * inputSize), " InputToForgetWeights");
+
+    ValidatePointer(m_InputToCellWeights, descriptorName, "InputToCellWeights");
+    auto inputToCellWeightsInfo = m_InputToCellWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(inputToCellWeightsInfo, 2, (outputSize * inputSize), " InputToCellWeights");
+
+    ValidatePointer(m_InputToOutputWeights, descriptorName, "InputToOutputWeights");
+    auto inputToOutputWeightsInfo = m_InputToOutputWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(inputToOutputWeightsInfo, 2, (outputSize * inputSize), " InputToOutputWeights");
+
+    ValidatePointer(m_RecurrentToInputWeights, descriptorName, "RecurrentToInputWeights");
+    auto recurrentToInputWeightsInfo = m_RecurrentToInputWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(recurrentToInputWeightsInfo, 2, (outputSize * outputSize), " RecurrentToInputWeights");
+
+    ValidatePointer(m_RecurrentToForgetWeights, descriptorName, "RecurrentToForgetWeights");
+    auto recurrentToForgetWeightsInfo = m_RecurrentToForgetWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(recurrentToForgetWeightsInfo, 2, (outputSize * outputSize),
+                                " RecurrentToForgetWeights");
+
+    ValidatePointer(m_RecurrentToCellWeights, descriptorName, "RecurrentToCellWeights");
+    auto recurrentToCellWeightsInfo = m_RecurrentToCellWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(recurrentToCellWeightsInfo, 2, (outputSize * outputSize), " RecurrentToCellWeights");
+
+    ValidatePointer(m_RecurrentToOutputWeights, descriptorName, "RecurrentToOutputWeights");
+    auto recurrentToOutputWeightsInfo = m_RecurrentToOutputWeights->GetTensorInfo();
+    ValidateTensorNumDimNumElem(recurrentToOutputWeightsInfo, 2, (outputSize * outputSize), " RecurrentToCellWeights");
+
+    // Validate data types for weights tensors (all should match each other)
+    ValidateDataTypes(inputToInputWeightsInfo, weightsSupportedTypes, descriptorName);
+
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, inputToForgetWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "inputToForgetWeights");
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, inputToCellWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "inputToCellWeights");
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, inputToOutputWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "inputToOutputWeights");
+
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, recurrentToInputWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "recurrentToInputWeights");
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, recurrentToForgetWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "recurrentToForgeteights");
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, recurrentToCellWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "recurrentToCellWeights");
+    ValidateTensorDataTypesMatch(inputToInputWeightsInfo, recurrentToOutputWeightsInfo, descriptorName,
+                                 "inputToInputWeights", "recurrentToOutputWeights");
+
+    // Validate matching quantization info for weight tensors (all should match each other)
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, inputToForgetWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "inputToForgetWeights");
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, inputToCellWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "inputToCellWeights");
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, inputToOutputWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "inputToOutputWeights");
+
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, recurrentToInputWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "recurrentToInputWeights");
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, recurrentToForgetWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "recurrentToForgetWeights");
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, recurrentToCellWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "recurrentToCellWeights");
+    ValidateTensorQuantizationSpace(inputToInputWeightsInfo, recurrentToOutputWeightsInfo,
+                                    descriptorName, "inputToInputWeights", "recurrentToOutputWeights");
+
+    // Validate number of dimensions and number of elements in bias tensors
+    ValidatePointer(m_InputGateBias, descriptorName, "InputGateBias");
+    auto inputGateBiasInfo = m_InputGateBias->GetTensorInfo();
+    ValidateTensorNumDimNumElem(inputGateBiasInfo, 1, outputSize, " InputGateBias");
+
+    ValidatePointer(m_ForgetGateBias, descriptorName, "ForgetGateBias");
+    auto forgetGateBiasInfo = m_ForgetGateBias->GetTensorInfo();
+    ValidateTensorNumDimNumElem(forgetGateBiasInfo, 1, outputSize, " ForgetGateBias");
+
+    ValidatePointer(m_CellBias, descriptorName, "CellBias");
+    auto cellBiasInfo = m_CellBias->GetTensorInfo();
+    ValidateTensorNumDimNumElem(cellBiasInfo, 1, outputSize, " CellBias");
+
+    ValidatePointer(m_OutputGateBias, descriptorName, "OutputGateBias");
+    auto outputGateBiasInfo = m_OutputGateBias->GetTensorInfo();
+    ValidateTensorNumDimNumElem(outputGateBiasInfo, 1, outputSize, " OutputGateBias");
+
+    // Validate data types for bias tensors (all should match each other)
+    ValidateDataTypes(inputGateBiasInfo, biasSupportedTypes, descriptorName);
+
+    ValidateTensorDataTypesMatch(inputGateBiasInfo, forgetGateBiasInfo, descriptorName,
+                                 "inputGateBias", "forgetGateBias");
+    ValidateTensorDataTypesMatch(inputGateBiasInfo, cellBiasInfo, descriptorName,
+                                 "inputGateBias", "cellBias");
+    ValidateTensorDataTypesMatch(inputGateBiasInfo, outputGateBiasInfo, descriptorName,
+                                 "inputGateBias", "outputGateBias");
+
+    // Validate bias tensor quantization info
+    ValidateBiasTensorQuantization(inputGateBiasInfo, inputInfo, inputToInputWeightsInfo, descriptorName);
+    ValidateBiasTensorQuantization(forgetGateBiasInfo, inputInfo, inputToInputWeightsInfo, descriptorName);
+    ValidateBiasTensorQuantization(cellBiasInfo, inputInfo, inputToInputWeightsInfo, descriptorName);
+    ValidateBiasTensorQuantization(outputGateBiasInfo, inputInfo, inputToInputWeightsInfo, descriptorName);
+}
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 46063803f0..f431216969 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -2410,6 +2410,22 @@ LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16Const
         workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, datatype);
 }
 
+// QuantizedLstm
+LayerTestResult<uint8_t, 2> QuantizedLstmTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, std::vector<uint8_t>(
+        {166, 179, 50, 150}));
+
+    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> expectedOutput = MakeTensor<uint8_t, 2>(outputDesc, std::vector<uint8_t>(
+        {140, 151, 146, 112, 136, 156, 142, 112 }));
+
+    return QuantizedLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput);
+}
+
 LayerTestResult<float,3> ConcatTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
@@ -10710,4 +10726,4 @@ LayerTestResult<int16_t, 4> UnbiasedStridedTransposeConvolution2dInt16NhwcTest(
         memoryManager,
         false,
         armnn::DataLayout::NHWC);
-}
+}
\ No newline at end of file
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index fb7ce92702..1121ae71e0 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1536,6 +1536,11 @@ LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16Const
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+// QuantizedLstm
+LayerTestResult<uint8_t, 2> QuantizedLstmTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/LstmTestImpl.hpp b/src/backends/backendsCommon/test/LstmTestImpl.hpp
index 2ed0a974fc..cd1f524879 100644
--- a/src/backends/backendsCommon/test/LstmTestImpl.hpp
+++ b/src/backends/backendsCommon/test/LstmTestImpl.hpp
@@ -128,7 +128,7 @@ void LstmUtilsVectorBatchVectorCwiseProductTestImpl(
 }
 
 // Lstm Layer tests:
-
+// *********************************** //
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 2>
 LstmNoCifgNoPeepholeNoProjectionTestImpl(
@@ -1540,4 +1540,193 @@ LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadF
 
     return ret;
 
+}
+
+// QuantizedLstm tests:
+
+LayerTestResult<uint8_t, 2>
+QuantizedLstmTestImpl(armnn::IWorkloadFactory& workloadFactory,
+                      const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+                      const boost::multi_array<uint8_t, 2>& input,
+                      const boost::multi_array<uint8_t, 2>& outputExpected)
+{
+
+    auto numBatches = boost::numeric_cast<unsigned int>(input.shape()[0]);
+    auto inputSize = boost::numeric_cast<unsigned int>(input.shape()[1]);
+    auto outputSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
+
+    // Scale/Offset for input/output, cellState In/Out, weights, bias
+    float inputOutputScale = 0.0078125f;
+    int32_t inputOutputOffset = 128;
+
+    float cellStateScale = 0.00048828125f;
+    int32_t cellStateOffset = 0;
+
+    float weightsScale = 0.00408021f;
+    int32_t weightsOffset = 100;
+
+    float biasScale = 3.1876640625e-05f;
+    int32_t biasOffset = 0;
+
+    // Input/Output tensor info
+    armnn::TensorInfo inputInfo({numBatches , inputSize},
+                                 armnn::DataType::QuantisedAsymm8,
+                                 inputOutputScale,
+                                 inputOutputOffset);
+
+    armnn::TensorInfo cellStateInfo({numBatches , outputSize},
+                                     armnn::DataType::QuantisedSymm16,
+                                     cellStateScale,
+                                     cellStateOffset);
+
+    armnn::TensorInfo outputStateInfo({numBatches , outputSize},
+                                       armnn::DataType::QuantisedAsymm8,
+                                       inputOutputScale,
+                                       inputOutputOffset);
+
+    LayerTestResult<uint8_t, 2> ret(outputStateInfo);
+
+    // Input0
+    std::vector<uint8_t> inputVector;
+    inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
+    auto inputTensor = MakeTensor<uint8_t, 2>(inputInfo, inputVector);
+
+    // Input1
+    std::vector<int16_t> cellStateInVector   = {876, 1034, 955, -909, 761, 1029, 796, -1036}; // 13
+    auto cellStateInTensor   = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
+
+    // Input2
+    std::vector<uint8_t> outputStateInVector = {136, 150, 140, 115, 135, 152, 138, 112}; // 14
+    auto outputStateInTensor = MakeTensor<uint8_t, 2>(outputStateInfo, outputStateInVector);
+
+    // Output0
+    std::vector<int16_t> cellStateOutVector  = {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}; // 0
+    auto cellStateOutTensor  = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
+
+    // Output1
+    std::vector<uint8_t> outputVector; // 1
+    outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
+    ret.outputExpected = MakeTensor<uint8_t, 2>(outputStateInfo, outputVector);
+
+    // Create tensor handles
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
+            workloadFactory.CreateTensorHandle(cellStateInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
+            workloadFactory.CreateTensorHandle(outputStateInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
+            workloadFactory.CreateTensorHandle(cellStateInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputStateInfo);
+
+    armnn::QuantizedLstmQueueDescriptor data;
+    armnn::WorkloadInfo info;
+
+    // Add inputs and outputs to workload
+    AddInputToWorkload(data, info, inputInfo, inputHandle.get());
+    AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
+    AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
+
+    AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
+    AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
+
+    // Weights and bias tensor and quantization info
+    armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
+                                        armnn::DataType::QuantisedAsymm8,
+                                        weightsScale,
+                                        weightsOffset);
+
+    armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
+                                            armnn::DataType::QuantisedAsymm8,
+                                            weightsScale,
+                                            weightsOffset);
+
+    armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
+
+    // Weights and bias tensor data
+    auto inputToInputWeights  = MakeTensor<uint8_t, 2>(inputWeightsInfo, {146, 250, 235, 171, 10, 218, 171, 108});
+    auto inputToForgetWeights = MakeTensor<uint8_t, 2>(inputWeightsInfo, {24, 50, 132, 179, 158, 110, 3, 169});
+    auto inputToCellWeights   = MakeTensor<uint8_t, 2>(inputWeightsInfo, {133, 34, 29, 49, 206, 109, 54, 183});
+    auto inputToOutputWeights = MakeTensor<uint8_t, 2>(inputWeightsInfo, {195, 187, 11, 99, 109, 10, 218, 48});
+
+    auto recurrentToInputWeights  = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
+            {254, 206, 77, 168, 71, 20, 215, 6, 223, 7, 118, 225, 59, 130, 174, 26});
+    auto recurrentToForgetWeights = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
+            {137, 240, 103, 52, 68, 51, 237, 112, 0, 220, 89, 23, 69, 4, 207, 253});
+    auto recurrentToCellWeights   = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
+            {172, 60, 205, 65, 14, 0, 140, 168, 240, 223, 133, 56, 142, 64, 246, 216});
+    auto recurrentToOutputWeights = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
+            {106, 214, 67, 23, 59, 158, 45, 3, 119, 132, 49, 205, 129, 218, 11, 98});
+
+    auto inputGateBias  = MakeTensor<int32_t, 1>(biasInfo, {-7876, 13488, -726, 32839});
+    auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {9206, -46884, -11693, -38724});
+    auto cellBias       = MakeTensor<int32_t, 1>(biasInfo, {39481, 48624, 48976, -21419});
+    auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {-58999, -17050, -41852, -40538});
+
+    // ScopedCpuTensorHandles
+    armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(inputWeightsInfo);
+    armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
+    armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
+    armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
+
+    armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(recurrentWeightsInfo);
+    armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
+    armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
+    armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
+
+    armnn::ScopedCpuTensorHandle inputGateBiasTensor(biasInfo);
+    armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
+    armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
+    armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
+
+    // Allocate and copy data
+    AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
+
+    AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
+    AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
+
+    AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
+    AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
+    AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
+    AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
+
+    // Setup queue descriptor
+    data.m_InputToInputWeights = &inputToInputWeightsTensor;
+    data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+    data.m_InputToCellWeights = &inputToCellWeightsTensor;
+    data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+
+    data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+    data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+    data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+    data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+
+    data.m_InputGateBias = &inputGateBiasTensor;
+    data.m_ForgetGateBias = &forgetGateBiasTensor;
+    data.m_CellBias = &cellBiasTensor;
+    data.m_OutputGateBias = &outputGateBiasTensor;
+
+    // Create workload and allocate tensor handles
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQuantizedLstm(data, info);
+    inputHandle->Allocate();
+    outputStateInHandle->Allocate();
+    cellStateInHandle->Allocate();
+
+    cellStateOutHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+    CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
+    CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
+
+    return ret;
 }
\ No newline at end of file
-- 
cgit v1.2.1