From 6e36a64e26520e3f169bb2a92972a24e1be915a7 Mon Sep 17 00:00:00 2001
From: Sadik Armagan <sadik.armagan@arm.com>
Date: Tue, 10 Nov 2020 21:18:41 +0000
Subject: IVGCVSW-5389 'TfLiteDelegate: Implement the FullyConnected operator'

* Added FullyConnected operator support to delegate

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: Iae9c0980a4bfd6aa4d90f107f329dfa782baeefe
---
 delegate/CMakeLists.txt                        |   4 +-
 delegate/src/DelegateUtils.hpp                 | 132 ++++++++++++--
 delegate/src/FullyConnected.hpp                | 212 +++++++++++++++++++++-
 delegate/src/armnn_delegate.cpp                |  20 +--
 delegate/src/test/FullyConnectedTest.cpp       | 128 ++++++++++++++
 delegate/src/test/FullyConnectedTestHelper.hpp | 232 +++++++++++++++++++++++++
 6 files changed, 696 insertions(+), 32 deletions(-)
 create mode 100644 delegate/src/test/FullyConnectedTest.cpp
 create mode 100644 delegate/src/test/FullyConnectedTestHelper.hpp

diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index e05a0baff4..05ec851bf2 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -95,16 +95,18 @@ list(APPEND armnnDelegate_unittest_sources
         src/test/ElementwiseBinaryTestHelper.hpp
         src/test/ElementwiseUnaryTest.cpp
         src/test/ElementwiseUnaryTestHelper.hpp
+        src/test/FullyConnectedTest.cpp
+        src/test/FullyConnectedTestHelper.hpp
         src/test/Pooling2dTest.cpp
         src/test/Pooling2dTestHelper.hpp
         src/test/QuantizationTest.cpp
         src/test/QuantizationTestHelper.hpp)
 
 add_executable(DelegateUnitTests ${armnnDelegate_unittest_sources})
-target_include_directories(DelegateUnitTests PRIVATE src)
 target_include_directories(DelegateUnitTests PRIVATE third-party)
 
 target_link_libraries(DelegateUnitTests armnnDelegate)
+target_link_libraries(DelegateUnitTests Armnn::armnnUtils)
 
 target_include_directories(DelegateUnitTests
         PRIVATE
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 729a8b4e98..fb3f998283 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -10,6 +10,8 @@
 #include <armnn/utility/Assert.hpp>
 #include <armnn/utility/NumericCast.hpp>
 
+#include <armnnUtils/Permute.hpp>
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -94,6 +96,11 @@ TfLiteStatus ValidateNumOutputs(TfLiteContext* tfLiteContext,
     return kTfLiteOk;
 }
 
+bool IsValid(const TfLiteTensor* tfLiteTensor)
+{
+    return tfLiteTensor == nullptr ? false : true;
+}
+
 bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
 {
     auto tensorAllocationType = tfLiteTensor.allocation_type;
@@ -118,13 +125,15 @@ TfLiteStatus Connect(armnn::IConnectableLayer* layer,
                      TfLiteNode* tfLiteNode,
                      armnnDelegate::DelegateData& data)
 {
-    ARMNN_ASSERT(tfLiteNode->inputs->size  == layer->GetNumInputSlots());
     ARMNN_ASSERT(tfLiteNode->outputs->size == layer->GetNumOutputSlots());
 
     // Connect the input slots
     for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
     {
-        data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
+        if (data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]] != nullptr)
+        {
+            data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
+        }
     }
 
     // Prepare output slots
@@ -133,6 +142,7 @@ TfLiteStatus Connect(armnn::IConnectableLayer* layer,
         armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
         data.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
     }
+
     return kTfLiteOk;
 }
 
@@ -299,43 +309,39 @@ TfLiteStatus FusedActivation(TfLiteContext* tfLiteContext,
     return kTfLiteOk;
 }
 
-armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
+armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
 {
-    armnn::DataType type;
     switch (tfLiteTensor.type)
     {
         case kTfLiteBool:
-            type = armnn::DataType::Boolean;
-            break;
+            return armnn::DataType::Boolean;
         case kTfLiteFloat32:
-            type = armnn::DataType::Float32;
-            break;
+            return armnn::DataType::Float32;
         case kTfLiteFloat16:
-            type = armnn::DataType::Float16;
-            break;
+            return armnn::DataType::Float16;
         case kTfLiteUInt8:
-            type = armnn::DataType::QAsymmU8;
-            break;
+            return armnn::DataType::QAsymmU8;
         case kTfLiteInt8:
             if (tfLiteTensor.params.zero_point == 0)
             {
-                type = armnn::DataType::QSymmS8;
+                return armnn::DataType::QSymmS8;
             }
             else
             {
-                type = armnn::DataType::QAsymmS8;
+                return armnn::DataType::QAsymmS8;
             }
-            break;
         case kTfLiteInt16:
-            type = armnn::DataType::QSymmS16;
-            break;
+            return armnn::DataType::QSymmS16;
         case kTfLiteInt32:
-            type = armnn::DataType::Signed32;
-            break;
+            return armnn::DataType::Signed32;
         default:
             throw armnn::Exception("TfLiteArmnnDelegate: Unsupported data type: " + tfLiteTensor.type);
     }
+}
 
+armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
+{
+    armnn::DataType type = GetDataType(tfLiteTensor);
     armnn::TensorInfo ret;
     auto tensorDimensionSize = tfLiteTensor.dims->size;
     if (tensorDimensionSize == 0)
@@ -391,4 +397,92 @@ armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
     return ret;
 }
 
+struct DataHolder
+{
+public:
+    DataHolder()
+    : m_Fp32Data(nullptr), m_Uint8Data(nullptr),
+      m_Int8Data(nullptr), m_Int16Data(nullptr), m_Int32Data(nullptr) {}
+
+    DataHolder(std::unique_ptr<float[]>&& data)
+    : m_Fp32Data(std::move(data)), m_Uint8Data(nullptr),
+      m_Int8Data(nullptr), m_Int16Data(nullptr), m_Int32Data(nullptr) {}
+
+    DataHolder(std::unique_ptr<uint8_t[]>&& data)
+    : m_Fp32Data(nullptr), m_Uint8Data(std::move(data)),
+      m_Int8Data(nullptr), m_Int16Data(nullptr), m_Int32Data(nullptr) {}
+
+    DataHolder(std::unique_ptr<int8_t[]>&& data)
+    : m_Fp32Data(nullptr), m_Uint8Data(nullptr),
+      m_Int8Data(std::move(data)), m_Int16Data(nullptr), m_Int32Data(nullptr) {}
+
+    DataHolder(std::unique_ptr<int16_t[]>&& data)
+    : m_Fp32Data(nullptr), m_Uint8Data(nullptr),
+      m_Int8Data(nullptr), m_Int16Data(std::move(data)), m_Int32Data(nullptr) {}
+
+    DataHolder(std::unique_ptr<int32_t[]>&& data)
+    : m_Fp32Data(nullptr), m_Uint8Data(nullptr),
+      m_Int8Data(nullptr), m_Int16Data(nullptr), m_Int32Data(std::move(data)) {}
+
+private:
+    std::unique_ptr<float[]>   m_Fp32Data;
+    std::unique_ptr<uint8_t[]> m_Uint8Data;
+    std::unique_ptr<int8_t[]>  m_Int8Data;
+    std::unique_ptr<int16_t[]> m_Int16Data;
+    std::unique_ptr<int32_t[]> m_Int32Data;
+};
+
+template <typename T>
+std::pair<armnn::ConstTensor, DataHolder> CreateConstTensorImpl(
+    const TfLiteTensor* tensor,
+    armnn::TensorInfo& tensorInfo,
+    armnn::Optional<armnn::PermutationVector&> permutationVector)
+{
+    std::unique_ptr<T[]> data(new T[tensorInfo.GetNumElements()]);
+    if (permutationVector.has_value() && permutationVector.value().GetSize() > 0)
+    {
+        tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value());
+        armnnUtils::Permute(tensorInfo.GetShape(),
+                            permutationVector.value(),
+                            reinterpret_cast<const T*>(tensor->data.raw), data.get(), sizeof(T));
+    }
+    else
+    {
+        ::memcpy(data.get(), tensor->data.raw, tensorInfo.GetNumBytes());
+    }
+
+    auto constData = std::make_pair(armnn::ConstTensor(tensorInfo, data.get()), std::move(data));
+
+    DataHolder storedData(std::move(constData.second));
+    return std::make_pair(constData.first, std::move(storedData));
+}
+
+std::pair<armnn::ConstTensor, DataHolder> CreateConstTensor(
+    const TfLiteTensor* tfLiteTensor,
+    armnn::TensorInfo& tensorInfo,
+    armnn::Optional<armnn::PermutationVector&> permutationVector)
+{
+    switch (tensorInfo.GetDataType())
+    {
+        case armnn::DataType::Float32:
+            return CreateConstTensorImpl<float>(tfLiteTensor, tensorInfo, permutationVector);
+        case armnn::DataType::QAsymmU8:
+            return CreateConstTensorImpl<uint8_t>(tfLiteTensor, tensorInfo, permutationVector);
+        case armnn::DataType::QSymmS8:
+            return CreateConstTensorImpl<int8_t>(tfLiteTensor, tensorInfo, permutationVector);
+        case armnn::DataType::QAsymmS8:
+            return CreateConstTensorImpl<int8_t>(tfLiteTensor, tensorInfo, permutationVector);
+        case armnn::DataType::QSymmS16:
+            return CreateConstTensorImpl<int16_t>(tfLiteTensor, tensorInfo, permutationVector);
+        case armnn::DataType::Signed32:
+            return CreateConstTensorImpl<int32_t>(tfLiteTensor, tensorInfo, permutationVector);
+        default:
+        {
+            throw armnn::Exception(
+                "TfLiteArmnnDelegate: Unsupported data type when creating const tensor: "
+                + std::string(armnn::GetDataTypeName(tensorInfo.GetDataType())));
+        }
+    }
+}
+
 } // namespace anonymous
diff --git a/delegate/src/FullyConnected.hpp b/delegate/src/FullyConnected.hpp
index ad981cd63b..f35f4c92b0 100644
--- a/delegate/src/FullyConnected.hpp
+++ b/delegate/src/FullyConnected.hpp
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "DelegateUtils.hpp"
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -19,7 +21,213 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
                                          int nodeIndex,
                                          int32_t operatorCode)
 {
-    return kTfLiteError;
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    bool biasEnabled = (numInputs == 3);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteTensors[tfLiteNode->inputs->data[0]]))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteWeightsTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteWeightsTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid weights tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteWeightsTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic weight tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo   = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::TensorInfo weightsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteWeightsTensor);
+    // Fully Connected Layer accepts two dimensional weights input
+    int32_t weightsDimension = static_cast<int32_t>(weightsTensorInfo.GetNumDimensions());
+    if (weightsDimension != 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dimension #$d for Fully Connected weights is not supported by Armnn"
+            " in operator #%d node #%d: ", weightsDimension, operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo biasTensorInfo;
+    if (biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(!IsValid(&tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
+                operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
+                nodeIndex);
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    armnn::FullyConnectedDescriptor descriptor;
+    descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_BiasEnabled           = biasEnabled;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsFullyConnectedSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   weightsTensorInfo,
+                                   biasTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor,
+                                           weightsTensorInfo,
+                                           armnn::Optional<armnn::PermutationVector&>());
+
+    armnn::IConnectableLayer* layer = nullptr;
+    if (biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        auto biasTensor = CreateConstTensor(&tfLiteBiasTensor,
+                                            biasTensorInfo,
+                                            armnn::Optional<armnn::PermutationVector&>());
+        layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
+                                                               weightsTensor.first,
+                                                               armnn::Optional<armnn::ConstTensor>(biasTensor.first));
+    }
+    else
+    {
+        layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
+                                                               weightsTensor.first,
+                                                               armnn::EmptyOptional());
+    }
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    armnn::IConnectableLayer* reshapeLayer = nullptr;
+    if (inputTensorInfo.GetNumDimensions() > 2)
+    {
+        // Add reshape to flatten to 2D [batch_size, input_size]
+        std::vector<unsigned int> reshapedDimensions(2);
+        reshapedDimensions[1] = weightsTensorInfo.GetShape()[1];
+        reshapedDimensions[0] = inputTensorInfo.GetNumElements() / reshapedDimensions[1];
+
+        if (inputTensorInfo.GetNumElements() % reshapedDimensions[1] != 0)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Failed to deduce input tensor shape from filter size #%d #%d node #%d: ",
+                reshapedDimensions[1], operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
+        armnn::TensorInfo reshapedTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+        reshapedTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() });
+
+        armnn::ReshapeDescriptor reshapeDescriptor;
+        reshapeDescriptor.m_TargetShape = reshapedTensorInfo.GetShape();
+        reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor);
+        ARMNN_ASSERT(reshapeLayer != nullptr);
+
+        reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedTensorInfo);
+
+        // Connect
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+        delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[0]] = &outputSlot;
+    }
+
+    if (reshapeLayer == nullptr)
+    {
+        Connect(layer, tfLiteNode, delegateData);
+    }
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteAddParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+
+    // Check Activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
 }
 
-} // namespace armnnDelegate
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 82cf5732df..69bd4f7350 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -85,7 +85,6 @@ TfLiteStatus DoPrepare(TfLiteContext* tfLiteContext, TfLiteDelegate* tfLiteDeleg
             {
                 return kTfLiteError;
             }
-
             return static_cast<ArmnnSubgraph*>(tfLiteNode->user_data)->Prepare(tfLiteContext);
         },
         // ArmnnSubgraph Invoke
@@ -209,6 +208,11 @@ TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
     {
         const int32_t tensorId = inputs->data[i];
         const TfLiteTensor tensor = tfLiteContext->tensors[tensorId];
+        // Do not create bindings for constant inputs
+        if (tensor.allocation_type == kTfLiteMmapRo)
+        {
+            continue;
+        }
 
         auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
         armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId);
@@ -220,12 +224,9 @@ TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
         // Store for creating connections
         delegateData.m_OutputSlotForNode[tensorId] = &outputSlot;
 
-        // Do not create bindings for constant inputs
-        if (tensor.allocation_type != kTfLiteMmapRo)
-        {
-            inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
-        }
+        inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
     }
+
     return kTfLiteOk;
 }
 
@@ -244,7 +245,6 @@ TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
         armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId);
 
         auto tensorInfo = GetTensorInfoForTfLiteTensor(tensor);
-
         ARMNN_ASSERT(delegateData.m_OutputSlotForNode[tensorId] != nullptr);
         delegateData.m_OutputSlotForNode[tensorId]->Connect(layer->GetInputSlot(0));
         outputBindings.push_back(std::make_pair(bindingId, tensorInfo));
@@ -272,7 +272,8 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
     armnn::NetworkId networkId;
     delegateData.m_Network = armnn::INetwork::Create(networkOptions);
 
-    delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(parameters->nodes_to_replace->size, nullptr);
+    delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(tfLiteContext->tensors_size, nullptr);
+
 
     std::vector<armnn::BindingPointInfo> inputBindings;
     std::vector<armnn::BindingPointInfo> outputBindings;
@@ -314,8 +315,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     try
     {
-
-        optNet = armnn::Optimize(*(delegateData.m_Network),
+        optNet = armnn::Optimize(*(delegateData.m_Network.get()),
                                  delegate->m_Options.GetBackends(),
                                  delegate->m_Runtime->GetDeviceSpec());
     }
diff --git a/delegate/src/test/FullyConnectedTest.cpp b/delegate/src/test/FullyConnectedTest.cpp
new file mode 100644
index 0000000000..1d33381d6e
--- /dev/null
+++ b/delegate/src/test/FullyConnectedTest.cpp
@@ -0,0 +1,128 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FullyConnectedTestHelper.hpp"
+
+namespace
+{
+
+TEST_SUITE("FullyConnectedTest")
+{
+
+void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 1, 1 };
+
+    std::vector<float> inputValues = { 10, 20, 30, 40 };
+    std::vector<float> weightsData = { 2, 3, 4, 5 };
+
+    std::vector<float> expectedOutputValues = { (400 + 10) };
+
+    // bias is set std::vector<float> biasData = { 10 } in the model
+    FullyConnectedTest<float>(backends,
+                              ::tflite::TensorType_FLOAT32,
+                              tflite::ActivationFunctionType_NONE,
+                              inputTensorShape,
+                              weightsTensorShape,
+                              biasTensorShape,
+                              outputTensorShape,
+                              inputValues,
+                              expectedOutputValues,
+                              weightsData);
+}
+
+void FullyConnectedActicationTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 1, 1 };
+
+    std::vector<float> inputValues = { -10, 20, 30, 40 };
+    std::vector<float> weightsData = { 2, 3, 4, -5 };
+
+    std::vector<float> expectedOutputValues = { 0 };
+
+    // bias is set std::vector<float> biasData = { 10 } in the model
+    FullyConnectedTest<float>(backends,
+                              ::tflite::TensorType_FLOAT32,
+                              tflite::ActivationFunctionType_RELU,
+                              inputTensorShape,
+                              weightsTensorShape,
+                              biasTensorShape,
+                              outputTensorShape,
+                              inputValues,
+                              expectedOutputValues,
+                              weightsData);
+}
+
+void FullyConnectedUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 2, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 2, 1 };
+
+    std::vector<uint8_t> inputValues = { 1, 2, 3, 4, 10, 20, 30, 40 };
+    std::vector<uint8_t> weightsData = { 2, 3, 4, 5 };
+
+    std::vector<uint8_t> expectedOutputValues = { (40 + 10) / 2, (400 + 10) / 2 };
+
+    // bias is set std::vector<int32_t> biasData = { 10 } in the model
+    // input and weights quantization scale 1.0f and offset 0 in the model
+    // output quantization scale 2.0f and offset 0 in the model
+    FullyConnectedTest<uint8_t>(backends,
+                              ::tflite::TensorType_UINT8,
+                              tflite::ActivationFunctionType_NONE,
+                              inputTensorShape,
+                              weightsTensorShape,
+                              biasTensorShape,
+                              outputTensorShape,
+                              inputValues,
+                              expectedOutputValues,
+                              weightsData);
+}
+
+TEST_CASE ("FULLY_CONNECTED_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    FullyConnectedFp32Test(backends);
+}
+
+TEST_CASE ("FULLY_CONNECTED_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                               armnn::Compute::CpuRef };
+    FullyConnectedFp32Test(backends);
+}
+
+TEST_CASE ("FULLY_CONNECTED_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    FullyConnectedUint8Test(backends);
+}
+
+TEST_CASE ("FULLY_CONNECTED_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    FullyConnectedUint8Test(backends);
+}
+
+TEST_CASE ("FULLY_CONNECTED_Activation_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    FullyConnectedActicationTest(backends);
+}
+
+} // End of TEST_SUITE("FullyConnectedTest")
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/delegate/src/test/FullyConnectedTestHelper.hpp b/delegate/src/test/FullyConnectedTestHelper.hpp
new file mode 100644
index 0000000000..4eed9580f1
--- /dev/null
+++ b/delegate/src/test/FullyConnectedTestHelper.hpp
@@ -0,0 +1,232 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateFullyConnectedTfLiteModel(tflite::TensorType tensorType,
+                                                  tflite::ActivationFunctionType activationType,
+                                                  const std::vector <int32_t>& inputTensorShape,
+                                                  const std::vector <int32_t>& weightsTensorShape,
+                                                  const std::vector <int32_t>& biasTensorShape,
+                                                  const std::vector <int32_t>& outputTensorShape,
+                                                  const std::vector <T>& weightsData,
+                                                  float quantScale = 1.0f,
+                                                  int quantOffset  = 0,
+                                                  float outputQuantScale = 2.0f,
+                                                  int outputQuantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(weightsData.data()),
+                                                    sizeof(T) * weightsData.size()));
+
+    auto biasTensorType = ::tflite::TensorType_FLOAT32;
+    if (tensorType == ::tflite::TensorType_UINT8)
+    {
+        biasTensorType = ::tflite::TensorType_INT32;
+        std::vector<int32_t> biasData = { 10 };
+        buffers[2] = CreateBuffer(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                 sizeof(int32_t) * biasData.size()));
+
+    }
+    else
+    {
+        std::vector<float> biasData = { 10 };
+        buffers[2] = CreateBuffer(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                 sizeof(float) * biasData.size()));
+    }
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(weightsTensorShape.data(),
+                                                                      weightsTensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("weights"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(biasTensorShape.data(),
+                                                                      biasTensorShape.size()),
+                              biasTensorType,
+                              2,
+                              flatBufferBuilder.CreateString("bias"),
+                              quantizationParameters);
+
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_FullyConnectedOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateFullyConnectedOptions(flatBufferBuilder,
+                                    activationType,
+                                    FullyConnectedOptionsWeightsFormat_DEFAULT, false).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ {3} };
+    flatbuffers::Offset <Operator> fullyConnectedOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType, operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ {3} };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&fullyConnectedOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: FullyConnected Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         tflite::BuiltinOperator_FULLY_CONNECTED);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
+                        tflite::TensorType tensorType,
+                        tflite::ActivationFunctionType activationType,
+                        const std::vector <int32_t>& inputTensorShape,
+                        const std::vector <int32_t>& weightsTensorShape,
+                        const std::vector <int32_t>& biasTensorShape,
+                        const std::vector <int32_t>& outputTensorShape,
+                        const std::vector <T>& inputValues,
+                        const std::vector <T>& expectedOutputValues,
+                        const std::vector <T>& weightsData,
+                        float quantScale = 1.0f,
+                        int quantOffset  = 0)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateFullyConnectedTfLiteModel(tensorType,
+                                                                    activationType,
+                                                                    inputTensorShape,
+                                                                    weightsTensorShape,
+                                                                    biasTensorShape,
+                                                                    outputTensorShape,
+                                                                    weightsData,
+                                                                    quantScale,
+                                                                    quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == tfLiteDelageOutputData[i]);
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
\ No newline at end of file
-- 
cgit v1.2.1