2 files changed, 880 insertions, 0 deletions
diff --git a/delegate/src/test/LstmTest.cpp b/delegate/src/test/LstmTest.cpp
new file mode 100644
index 0000000000..1fa9f0c8bf
--- /dev/null
+++ b/delegate/src/test/LstmTest.cpp
@@ -0,0 +1,189 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LstmTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void LstmTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 2;
+    int32_t inputSize = 2;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    std::vector<int32_t> inputShape {batchSize , inputSize};
+    std::vector<int32_t> cellStateInTensorInfo {batchSize , numUnits};
+    std::vector<int32_t> outputStateInTensorInfo {batchSize , outputSize};
+
+    std::vector<int32_t> scratchBufferTensorInfo {batchSize, numUnits * 4};
+    std::vector<int32_t> cellStateOutTensorInfo {batchSize, numUnits};
+    std::vector<int32_t> outputStateOutTensorInfo {batchSize, outputSize};
+    std::vector<int32_t> outputTensorInfo {batchSize, outputSize};
+
+    std::vector<int32_t> tensorInfo4 {numUnits};
+    std::vector<int32_t> tensorInfo8 {numUnits, 2};
+    std::vector<int32_t> tensorInfo16 {numUnits, 4};
+
+    //tensorInfo8,
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights {-0.45018822f, -0.02338299f, -0.0870589f,
+                                            -0.34550029f, 0.04266912f, -0.15680569f,
+                                            -0.34856534f, 0.43890524f};
+
+    std::vector<float> inputToForgetWeights {0.09701663f, 0.20334584f, -0.50592935f,
+                                             -0.31343272f, -0.40032279f, 0.44781327f,
+                                             0.01387155f, -0.35593212f};
+
+    std::vector<float> inputToCellWeights {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f,
+                                           -0.20583314f, 0.44344562f, 0.22077113f,
+                                           -0.29909778f};
+
+    std::vector<float> inputToOutputWeights {-0.25065863f, -0.28290087f, 0.04613829f,
+                                             0.40525138f, 0.44272184f, 0.03897077f,
+                                             -0.1556896f, 0.19487578f};
+
+    //tensorInfo16,
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights {-0.0063535f, -0.2042388f, 0.31454784f,
+                                                -0.35746509f, 0.28902304f, 0.08183324f,
+                                                -0.16555229f, 0.02286911f, -0.13566875f,
+                                                0.03034258f, 0.48091322f, -0.12528998f,
+                                                0.24077177f, -0.51332325f, -0.33502164f,
+                                                0.10629296f};
+
+    std::vector<float> recurrentToForgetWeights {-0.48684245f, -0.06655136f, 0.42224967f,
+                                                 0.2112639f, 0.27654213f, 0.20864892f,
+                                                 -0.07646349f, 0.45877004f, 0.00141793f,
+                                                 -0.14609534f, 0.36447752f, 0.09196436f,
+                                                 0.28053468f, 0.01560611f, -0.20127171f,
+                                                 -0.01140004f};
+
+    std::vector<float> recurrentToCellWeights {-0.3407414f, 0.24443203f, -0.2078532f,
+                                               0.26320225f, 0.05695659f, -0.00123841f,
+                                               -0.4744786f, -0.35869038f, -0.06418842f,
+                                               -0.13502428f, -0.501764f, 0.22830659f,
+                                               -0.46367589f, 0.26016325f, -0.03894562f,
+                                               -0.16368064f};
+
+    std::vector<float> recurrentToOutputWeights {0.43385774f, -0.17194885f, 0.2718237f,
+                                                 0.09215671f, 0.24107647f, -0.39835793f,
+                                                 0.18212086f, 0.01301402f, 0.48572797f,
+                                                 -0.50656658f, 0.20047462f, -0.20607421f,
+                                                 -0.51818722f, -0.15390486f, 0.0468148f,
+                                                 0.39922136f};
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<float> cellToInputWeights {};
+    bool hasCellToForgetWeights = false;
+    std::vector<float> cellToForgetWeights {};
+    bool hasCellToOutputWeights = false;
+    std::vector<float> cellToOutputWeights {};
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias {0., 0., 0., 0.};
+    std::vector<float> forgetGateBias {1., 1., 1., 1.};
+    std::vector<float> cellBias {0., 0., 0., 0.};
+    std::vector<float> outputGateBias {0., 0., 0., 0.};
+
+    bool hasProjectionWeights = false;
+    std::vector<float> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues {2., 3., 3., 4.};
+    std::vector<float> expectedOutputValues {-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
+                                             -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f};
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 0.f;
+    float clippingThresProj = 0.f;
+
+    LstmTestImpl<float>(backends,
+                        ::tflite::TensorType_FLOAT32,
+                        batchSize,
+                        inputSize,
+                        outputSize,
+                        numUnits,
+                        hasInputToInputWeights,
+                        inputToInputWeights,
+                        inputToForgetWeights,
+                        inputToCellWeights,
+                        inputToOutputWeights,
+                        hasRecurrentToInputWeights,
+                        recurrentToInputWeights,
+                        recurrentToForgetWeights,
+                        recurrentToCellWeights,
+                        recurrentToOutputWeights,
+                        hasCellToInputWeights,
+                        cellToInputWeights,
+                        hasCellToForgetWeights,
+                        cellToForgetWeights,
+                        hasCellToOutputWeights,
+                        cellToOutputWeights,
+                        hasInputGateBias,
+                        inputGateBias,
+                        forgetGateBias,
+                        cellBias,
+                        outputGateBias,
+                        hasProjectionWeights,
+                        projectionWeights,
+                        hasProjectionBias,
+                        projectionBias,
+                        hasInputLayerNormWeights,
+                        inputLayerNormWeights,
+                        hasForgetLayerNormWeights,
+                        forgetLayerNormWeights,
+                        hasCellLayerNormWeights,
+                        cellLayerNormWeights,
+                        hasOutputLayerNormWeights,
+                        outputLayerNormWeights,
+                        inputValues,
+                        expectedOutputValues,
+                        activationFunction,
+                        clippingThresCell,
+                        clippingThresProj);
+}
+
+TEST_SUITE("LstmTest_CpuRefTests")
+{
+
+TEST_CASE ("LstmTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    LstmTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuRef")
+
+TEST_SUITE("LstmTest_CpuAccTests")
+{
+
+TEST_CASE ("LstmTest_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    LstmTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuAcc")
+
+} // namespace armnnDelegate
+\ No newline at end of file
diff --git a/delegate/src/test/LstmTestHelper.hpp b/delegate/src/test/LstmTestHelper.hpp
new file mode 100644
index 0000000000..36a606119a
--- /dev/null
+++ b/delegate/src/test/LstmTestHelper.hpp
@@ -0,0 +1,691 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+#include <tensorflow/lite/c/common.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateLstmTfLiteModel(tflite::TensorType tensorType,
+                                        int32_t batchSize,
+                                        int32_t inputSize,
+                                        int32_t outputSize,
+                                        int32_t numUnits,
+                                        bool hasInputToInputWeights,
+                                        const std::vector<T>& inputToInputWeights,
+                                        const std::vector<T>& inputToForgetWeights,
+                                        const std::vector<T>& inputToCellWeights,
+                                        const std::vector<T>& inputToOutputWeights,
+                                        bool hasRecurrentToInputWeights,
+                                        const std::vector<T>& recurrentToInputWeights,
+                                        const std::vector<T>& recurrentToForgetWeights,
+                                        const std::vector<T>& recurrentToCellWeights,
+                                        const std::vector<T>& recurrentToOutputWeights,
+                                        bool hasCellToInputWeights,
+                                        const std::vector<T>& cellToInputWeights,
+                                        bool hasCellToForgetWeights,
+                                        const std::vector<T>& cellToForgetWeights,
+                                        bool hasCellToOutputWeights,
+                                        const std::vector<T>& cellToOutputWeights,
+                                        bool hasInputGateBias,
+                                        const std::vector<T>& inputGateBias,
+                                        const std::vector<T>& forgetGateBias,
+                                        const std::vector<T>& cellBias,
+                                        const std::vector<T>& outputGateBias,
+                                        bool hasProjectionWeights,
+                                        const std::vector<T>& projectionWeights,
+                                        bool hasProjectionBias,
+                                        const std::vector<T>& projectionBias,
+                                        bool hasInputLayerNormWeights,
+                                        const std::vector<T>& inputLayerNormWeights,
+                                        bool hasForgetLayerNormWeights,
+                                        const std::vector<T>& forgetLayerNormWeights,
+                                        bool hasCellLayerNormWeights,
+                                        const std::vector<T>& cellLayerNormWeights,
+                                        bool hasOutputLayerNormWeights,
+                                        const std::vector<T>& outputLayerNormWeights,
+                                        tflite::ActivationFunctionType activationFunction,
+                                        float clippingThresCell,
+                                        float clippingThresProj,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0,
+                                        float outputQuantScale = 2.0f,
+                                        int outputQuantOffset  = 0)
+{
+
+    std::vector <int32_t> tensorInfo0 {};
+    std::vector <int32_t> tensorInfo4 {numUnits};
+    std::vector <int32_t> tensorInfo8 {numUnits, static_cast<int32_t>(2)};
+    std::vector <int32_t> tensorInfo16 {numUnits, static_cast<int32_t>(4)};
+
+    std::vector<int32_t> inputShape {batchSize , inputSize};
+    std::vector<int32_t> outputShape {batchSize , outputSize};
+
+    std::vector<int32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<int32_t> cellStateInDimensions{batchSize, numUnits};
+
+    std::vector<int> operatorInputs;
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    std::vector<flatbuffers::Offset<Tensor>> tensors;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                           inputShape.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("input_0"),
+                                   quantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToInputWeights.data()),
+                                                        sizeof(T) * inputToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                               tensorInfo8.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToForgetWeights.data()),
+                                                    sizeof(T) * inputToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToForgetWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToCellWeights.data()),
+                                                    sizeof(T) * inputToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToCellWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToOutputWeights.data()),
+                                                    sizeof(T) * inputToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToOutputWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasRecurrentToInputWeights)
+    {
+        buffers.push_back(CreateBuffer(
+            flatBufferBuilder,
+            flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(recurrentToInputWeights.data()),
+                                           sizeof(T) * recurrentToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                               tensorInfo16.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("recurrentToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToForgetWeights.data()),
+                                                    sizeof(T) * recurrentToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToForgetWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToCellWeights.data()),
+                                                    sizeof(T) * recurrentToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToCellWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToOutputWeights.data()),
+                                                    sizeof(T) * recurrentToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1 ,
+                                   flatBufferBuilder.CreateString("recurrentToOutputWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasCellToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToInputWeights.data()),
+                                                        sizeof(T) * cellToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToForgetWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToForgetWeights.data()),
+                                                        sizeof(T) * cellToForgetWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToForgetWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToOutputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToOutputWeights.data()),
+                                                        sizeof(T) * cellToOutputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToOutputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasInputGateBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(inputGateBias.data()),
+                                                        sizeof(T) * inputGateBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputGateBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(forgetGateBias.data()),
+                                                    sizeof(T) * forgetGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("forgetGateBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellBias.data()),
+                                                    sizeof(T) * cellBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(outputGateBias.data()),
+                                                    sizeof(T) * outputGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputGateBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasProjectionWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionWeights.data()),
+                                                        sizeof(T) * projectionWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("outputGateBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasProjectionBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionBias.data()),
+                                                        sizeof(T) * projectionBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("projectionBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputStateInDimensions.data(),
+                                                                           outputStateInDimensions.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputStateInInfo"),
+                                   outputQuantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(cellStateInDimensions.data(),
+                                                                           cellStateInDimensions.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellStateInInfo"),
+                                   outputQuantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(inputLayerNormWeights.data()),
+                                              sizeof(T) * inputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasForgetLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(forgetLayerNormWeights.data()),
+                                              sizeof(T) * forgetLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("forgetLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellLayerNormWeights.data()),
+                                                        sizeof(T) * cellLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasOutputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                             reinterpret_cast<const uint8_t *>(outputLayerNormWeights.data()),
+                             sizeof(T) * outputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("outputLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+    int outputBufferId = buffers.size();
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputShape.data(),
+                                                                           outputShape.size()),
+                                   tensorType,
+                                   outputBufferId,
+                                   flatBufferBuilder.CreateString("output"),
+                                   outputQuantizationParameters));
+    std::vector<int> operatorOutputs;
+    operatorOutputs.push_back(buffers.size() - 1);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_LSTMOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateLSTMOptions(flatBufferBuilder,
+                          activationFunction,
+                          clippingThresCell,
+                          clippingThresProj).Union();
+
+    flatbuffers::Offset <Operator> lstmOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType, operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       flatBufferBuilder.CreateVector(&lstmOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: LSTM Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         tflite::BuiltinOperator_LSTM);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void LstmTestImpl(std::vector<armnn::BackendId>& backends,
+                  tflite::TensorType tensorType,
+                  int32_t batchSize,
+                  int32_t inputSize,
+                  int32_t outputSize,
+                  int32_t numUnits,
+                  bool hasInputToInputWeights,
+                  const std::vector<T>& inputToInputWeights,
+                  const std::vector<T>& inputToForgetWeights,
+                  const std::vector<T>& inputToCellWeights,
+                  const std::vector<T>& inputToOutputWeights,
+                  bool hasRecurrentToInputWeights,
+                  const std::vector<T>& recurrentToInputWeights,
+                  const std::vector<T>& recurrentToForgetWeights,
+                  const std::vector<T>& recurrentToCellWeights,
+                  const std::vector<T>& recurrentToOutputWeights,
+                  bool hasCellToInputWeights,
+                  const std::vector<T>& cellToInputWeights,
+                  bool hasCellToForgetWeights,
+                  const std::vector<T>& cellToForgetWeights,
+                  bool hasCellToOutputWeights,
+                  const std::vector<T>& cellToOutputWeights,
+                  bool hasInputGateBias,
+                  const std::vector<T>& inputGateBias,
+                  const std::vector<T>& forgetGateBias,
+                  const std::vector<T>& cellBias,
+                  const std::vector<T>& outputGateBias,
+                  bool hasProjectionWeights,
+                  const std::vector<T>& projectionWeights,
+                  bool hasProjectionBias,
+                  const std::vector<T>& projectionBias,
+                  bool hasInputLayerNormWeights,
+                  const std::vector<T>& inputLayerNormWeights,
+                  bool hasForgetLayerNormWeights,
+                  const std::vector<T>& forgetLayerNormWeights,
+                  bool hasCellLayerNormWeights,
+                  const std::vector<T>& cellLayerNormWeights,
+                  bool hasOutputLayerNormWeights,
+                  const std::vector<T>& outputLayerNormWeights,
+                  std::vector<T>& inputValues,
+                  std::vector<T>& expectedOutputValues,
+                  tflite::ActivationFunctionType activationFunction,
+                  float clippingThresCell,
+                  float clippingThresProj)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateLstmTfLiteModel(tensorType,
+                                                          batchSize,
+                                                          inputSize,
+                                                          outputSize,
+                                                          numUnits,
+                                                          hasInputToInputWeights,
+                                                          inputToInputWeights,
+                                                          inputToForgetWeights,
+                                                          inputToCellWeights,
+                                                          inputToOutputWeights,
+                                                          hasRecurrentToInputWeights,
+                                                          recurrentToInputWeights,
+                                                          recurrentToForgetWeights,
+                                                          recurrentToCellWeights,
+                                                          recurrentToOutputWeights,
+                                                          hasCellToInputWeights,
+                                                          cellToInputWeights,
+                                                          hasCellToForgetWeights,
+                                                          cellToForgetWeights,
+                                                          hasCellToOutputWeights,
+                                                          cellToOutputWeights,
+                                                          hasInputGateBias,
+                                                          inputGateBias,
+                                                          forgetGateBias,
+                                                          cellBias,
+                                                          outputGateBias,
+                                                          hasProjectionWeights,
+                                                          projectionWeights,
+                                                          hasProjectionBias,
+                                                          projectionBias,
+                                                          hasInputLayerNormWeights,
+                                                          inputLayerNormWeights,
+                                                          hasForgetLayerNormWeights,
+                                                          forgetLayerNormWeights,
+                                                          hasCellLayerNormWeights,
+                                                          cellLayerNormWeights,
+                                                          hasOutputLayerNormWeights,
+                                                          outputLayerNormWeights,
+                                                          activationFunction,
+                                                          clippingThresCell,
+                                                          clippingThresProj);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+}
+
+} // anonymous namespace
+\ No newline at end of file