IVGCVSW-6119 ConstTensorsAsInput: FullyConnected

* Constant weights and biases are now stored as Constant layers. * Updated Serializer, Deserializer and unit tests to reflect this. * Updated TfLiteDelegate, TfLiteParser and OnnxParser. * Updated Schema with IsConstant and ConstantTensorsAsInputs. * Updated Ref backend to handle constant weights and bias as inputs rather than reading from member variables. * Added dynamic or constant input EndToEnd tests. !android-nn-driver:5959 Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: Ibf3cf437df1100e4b322b0d303c575c6339f9696
author: Matthew Sloyan <matthew.sloyan@arm.com> 2021-07-13 19:46:11 +0100
committer: Matthew Sloyan <matthew.sloyan@arm.com> 2021-08-06 09:25:26 +0000
commit: 81beae3a870004795275e9266bc43d845b9f78db (patch)
tree: 70af86f3c36c8e330c72770e6f1419ca7b2a4bb8
parent: 95e9efc28ce70a8cda93e722f5ce90ebc96bdd95 (diff)
download: armnn-81beae3a870004795275e9266bc43d845b9f78db.tar.gz
38 files changed, 1232 insertions, 580 deletions
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index b04baac36e..2d1651842a 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -482,6 +482,11 @@ armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor,
             "TfLiteArmnnDelegate:  Not constant allocation type: " + std::to_string(tfLiteTensor->allocation_type));
     }
 
+    if(tflite::IsConstantTensor(tfLiteTensor))
+    {
+        tensorInfo.SetConstant();
+    }
+
     if (permutationVector.has_value() && permutationVector.value().GetSize() > 0 && permutationData != nullptr)
     {
         // Permute tensor info
diff --git a/delegate/src/FullyConnected.hpp b/delegate/src/FullyConnected.hpp
index e94304fb21..49686d6eaf 100644
--- a/delegate/src/FullyConnected.hpp
+++ b/delegate/src/FullyConnected.hpp
@@ -130,30 +130,39 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
-    armnn::Optional<armnn::ConstTensor> optionalWeights = armnn::EmptyOptional();
-    armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
-    if(descriptor.m_ConstantWeights)
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    // Add a constant layer for weights and biases if inputs are constant.
+    if (isConstantWeights)
     {
         auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor,
                                                weightsTensorInfo,
                                                armnn::Optional<armnn::PermutationVector&>());
-        optionalWeights = armnn::Optional<armnn::ConstTensor>(weightsTensor);
 
-        if (biasEnabled)
+        armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(weightsTensor);
+
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensorInfo);
+    }
+
+    if (biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(tflite::IsConstantTensor(&tfLiteBiasTensor))
         {
-            const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
             auto biasTensor = CreateConstTensor(&tfLiteBiasTensor,
                                                 biasTensorInfo,
                                                 armnn::Optional<armnn::PermutationVector&>());
-            optionalBiases = armnn::Optional<armnn::ConstTensor>(biasTensor);
+
+            armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biasTensor);
+            ARMNN_ASSERT(biasLayer != nullptr);
+
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo);
         }
     }
 
-    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
-                                                                                     optionalWeights,
-                                                                                     optionalBiases);
-    ARMNN_ASSERT(layer != nullptr);
-
     armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
     outputSlot.SetTensorInfo(outputTensorInfo);
 
@@ -171,13 +180,15 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
         // Connect
         delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0));
         reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
         if (!descriptor.m_ConstantWeights)
         {
             delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(1));
-            if (biasEnabled)
-            {
-                delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2));
-            }
+        }
+
+        if (biasEnabled && !tflite::IsConstantTensor(&tfLiteTensors[tfLiteNode->inputs->data[2]]))
+        {
+            delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2));
         }
         delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[0]] = &outputSlot;
     }
diff --git a/delegate/src/test/FullyConnectedTest.cpp b/delegate/src/test/FullyConnectedTest.cpp
index 3bea250988..c300bc72bf 100644
--- a/delegate/src/test/FullyConnectedTest.cpp
+++ b/delegate/src/test/FullyConnectedTest.cpp
@@ -34,7 +34,7 @@ void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends, bool consta
                               constantWeights);
 }
 
-void FullyConnectedActicationTest(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
+void FullyConnectedActivationTest(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
 {
     std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
     std::vector<int32_t> weightsTensorShape { 1, 4 };
@@ -106,7 +106,7 @@ TEST_CASE ("FullyConnected_Int8_GpuAcc_Test")
 TEST_CASE ("FullyConnected_Activation_GpuAcc_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
-    FullyConnectedActicationTest(backends);
+    FullyConnectedActivationTest(backends);
 }
 
 } // End of TEST_SUITE("FullyConnected_GpuAccTests")
@@ -129,7 +129,7 @@ TEST_CASE ("FullyConnected_Int8_CpuAcc_Test")
 TEST_CASE ("FullyConnected_Activation_CpuAcc_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
-    FullyConnectedActicationTest(backends);
+    FullyConnectedActivationTest(backends);
 }
 
 } // End of TEST_SUITE("FullyConnected_CpuAccTests")
@@ -152,7 +152,7 @@ TEST_CASE ("FullyConnected_Int8_CpuRef_Test")
 TEST_CASE ("FullyConnected_Activation_CpuRef_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
-    FullyConnectedActicationTest(backends);
+    FullyConnectedActivationTest(backends);
 }
 
 TEST_CASE ("FullyConnected_Weights_As_Inputs_FP32_CpuRef_Test")
@@ -170,7 +170,7 @@ TEST_CASE ("FullyConnected_Weights_As_Inputs_Int8_CpuRef_Test")
 TEST_CASE ("FullyConnected_Weights_As_Inputs_Activation_CpuRef_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
-    FullyConnectedActicationTest(backends, false);
+    FullyConnectedActivationTest(backends, false);
 }
 
 } // End of TEST_SUITE("FullyConnected_CpuRefTests")
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index bcee902d75..7188a7bd3a 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -402,7 +402,7 @@ struct FullyConnectedDescriptor : BaseDescriptor
     }
 
     /// Get the number of views/inputs.
-    uint32_t GetNumViews() const;
+    uint32_t GetNumInputs() const;
 
     /// Enable/disable bias.
     bool m_BiasEnabled;
diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
index b5112a8f0b..f7c769fbe3 100644
--- a/include/armnn/ILayerVisitor.hpp
+++ b/include/armnn/ILayerVisitor.hpp
@@ -198,6 +198,16 @@ public:
     virtual void VisitFloorLayer(const IConnectableLayer* layer,
                                  const char* name = nullptr) = 0;
 
+
+    /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
+                                          const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                          const char* name = nullptr) = 0;
+
     /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
@@ -205,6 +215,7 @@ public:
     /// @param weights - Tensor for the weights data.
     /// @param biases - Optional tensor for the bias data.
     /// @param name - Optional name for the layer.
+    ARMNN_DEPRECATED_MSG("Use VisitFullyConnectedLayer without ConstTensors")
     virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
                                           const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                           const ConstTensor& weights,
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 865d1291a9..48f407f2f9 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -301,37 +301,23 @@ public:
     IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor,
                                     const char* name = nullptr);
 
-    /// Adds a fully connected layer to the network.
-    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
-    /// @param weights -Optional Tensor for the weights data.
-    /// @param biases - Optional tensor for the bias data.
-    /// @param name - Optional name for the layer.
-    /// @return - Interface for configuring the layer.
-    IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const Optional<ConstTensor>& weights,
-                                              const Optional<ConstTensor>& biases,
-                                              const char* name = nullptr);
 
     /// Adds a fully connected layer to the network.
     /// @param fullyConnectedDescriptor - Description of the fully connected layer.
-    /// @param weights - Tensor for the weights data.
-    /// @param biases - Optional tensor for the bias data.
-    /// @param name - Optional name for the layer.
     /// @return - Interface for configuring the layer.
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const ConstTensor& weights,
-                                              const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated")
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const ConstTensor& weights,
+                                              const Optional<ConstTensor>& weights,
+                                              const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated")
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                               const ConstTensor& weights,
-                                              const ConstTensor& biases,
+                                              const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     /// Adds a permute layer to the network.
diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
index fb88d559bb..d3378dccf5 100644
--- a/include/armnn/LayerVisitorBase.hpp
+++ b/include/armnn/LayerVisitorBase.hpp
@@ -110,6 +110,10 @@ public:
 
     void VisitFullyConnectedLayer(const IConnectableLayer*,
                                   const FullyConnectedDescriptor&,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitFullyConnectedLayer(const IConnectableLayer*,
+                                  const FullyConnectedDescriptor&,
                                   const ConstTensor&,
                                   const Optional<ConstTensor>&,
                                   const char*) override { DefaultPolicy::Apply(__func__); }
diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp
index 13bde0aafa..9ab30f8fb2 100644
--- a/src/armnn/BackendHelper.cpp
+++ b/src/armnn/BackendHelper.cpp
@@ -5,6 +5,7 @@
 
 #include <armnn/BackendHelper.hpp>
 #include <armnn/BackendRegistry.hpp>
+#include <armnn/Logging.hpp>
 
 #include <armnn/backends/IBackendInternal.hpp>
 
@@ -399,22 +400,48 @@ bool LayerSupportHandle::IsFullyConnectedSupported(const TensorInfo& input,
                                                    const FullyConnectedDescriptor& descriptor,
                                                    Optional<std::string&> reasonIfUnsupported)
 {
-    if(!descriptor.m_ConstantWeights && !m_BackendId.IsUndefined())
+    if(!m_BackendId.IsUndefined())
     {
-        auto capability = GetCapability("NonConstWeights", m_BackendId);
-        if (capability.has_value() && capability.value().GetValue().AsBool() == true)
+        auto capability = GetCapability("ConstantTensorsAsInputs", m_BackendId);
+        if(!capability.has_value() || capability.value().GetValue().AsBool() == false)
         {
-            return true;
+            if(!weights.IsConstant())
+            {
+                return false;
+            }
+            if(descriptor.m_BiasEnabled)
+            {
+                if(!biases.IsConstant())
+                {
+                    return false;
+                }
+            }
+
+            // At the first stage we will only print a warning. this is to give
+            // backend developers a chance to adopt and read weights from input slots.
+            ARMNN_LOG(warning) << "The backend makes use of a deprecated interface to read constant tensors. "
+                                  "If you are a backend developer please find more information in our "
+                                  "doxygen documentation on github https://github.com/ARM-software/armnn "
+                                  "under the keyword 'ConstTensorsAsInputs'.";
+        }
+
+        if(!descriptor.m_ConstantWeights)
+        {
+            auto capability = GetCapability("NonConstWeights", m_BackendId);
+            if (capability.has_value() && capability.value().GetValue().AsBool() == true)
+            {
+                return true;
+            }
+            return false;
         }
-        return false;
     }
 
     return m_LayerSupport->IsFullyConnectedSupported(input,
-                                                    output,
-                                                    weights,
-                                                    biases,
-                                                    descriptor,
-                                                    reasonIfUnsupported.value());
+                                                     output,
+                                                     weights,
+                                                     biases,
+                                                     descriptor,
+                                                     reasonIfUnsupported.value());
 }
 
 bool LayerSupportHandle::IsGatherSupported(const TensorInfo& input0,
diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp
index 706992ccb0..4521894c28 100644
--- a/src/armnn/Descriptors.cpp
+++ b/src/armnn/Descriptors.cpp
@@ -425,19 +425,13 @@ int StridedSliceDescriptor::GetStopForAxis(const TensorShape& inputShape,
 
 }
 
-uint32_t FullyConnectedDescriptor::GetNumViews() const
+uint32_t FullyConnectedDescriptor::GetNumInputs() const
 {
-    // Return 1 with constant weights, otherwise check if bias is enabled
-    uint32_t numInputs = 1;
-    if (!m_ConstantWeights)
+    // Return 2 otherwise check if bias is enabled
+    unsigned int numInputs = 2;
+    if (m_BiasEnabled)
     {
-        // non-const weights
-        numInputs = 2;
-        if (m_BiasEnabled)
-        {
-            // non-const bias
-            numInputs = 3;
-        }
+        numInputs = 3;
     }
     return numInputs;
 }
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 83eafe7993..a29ce83c5a 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -30,6 +30,8 @@
 
 #include <common/include/ProfilingGuid.hpp>
 
+#include <fmt/format.h>
+
 #include <fcntl.h>
 #include <algorithm>
 #include <fstream>
@@ -178,21 +180,16 @@ IConnectableLayer* INetwork::AddFillLayer(const FillDescriptor& fillDescriptor,
 }
 
 IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                    const ConstTensor& weights,
-                                                    const Optional<ConstTensor>& biases,
                                                     const char* name)
 {
-    return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor,
-                                                armnn::Optional<ConstTensor>(weights),
-                                                biases,
-                                                name);
+    return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, name);
 }
 
 IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                                     const ConstTensor& weights,
+                                                    const Optional<ConstTensor>& biases,
                                                     const char* name)
 {
-    armnn::Optional<ConstTensor> biases;
     return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor,
                                                 armnn::Optional<ConstTensor>(weights),
                                                 biases,
@@ -200,17 +197,6 @@ IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescript
 }
 
 IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                    const ConstTensor& weights,
-                                                    const ConstTensor& biases,
-                                                    const char* name)
-{
-    return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor,
-                                                armnn::Optional<ConstTensor>(weights),
-                                                armnn::Optional<ConstTensor>(biases),
-                                                name);
-}
-
-IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                                     const Optional<ConstTensor>& weights,
                                                     const Optional<ConstTensor>& biases,
                                                     const char* name)
@@ -1799,69 +1785,87 @@ IConnectableLayer* NetworkImpl::AddFillLayer(const FillDescriptor& fillDescripto
     return m_Graph->AddLayer<FillLayer>(fillDescriptor, name);
 }
 
-IConnectableLayer* NetworkImpl::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                           const Optional<ConstTensor>& weights,
-                                                           const Optional<ConstTensor>& biases,
-                                                           const char* name)
+IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                                       const char* name)
+{
+    return m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
+}
+
+IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                                       const Optional<ConstTensor>& weights,
+                                                       const Optional<ConstTensor>& biases,
+                                                       const char* name)
 {
-    if (fullyConnectedDescriptor.m_ConstantWeights && !weights.has_value())
+    ConstantLayer* weightsLayer = nullptr;
+    ConstantLayer* biasLayer    = nullptr;
+    unsigned int   numInputs    = fullyConnectedDescriptor.GetNumInputs();
+
+    // Add a constant layer for weights
+    if (weights.has_value())
     {
-        throw InvalidArgumentException("AddFullyConnectedLayer: weights cannot be empty");
+        weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights");
+        weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights.value());
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
+    }
+    else if (fullyConnectedDescriptor.m_ConstantWeights)
+    {
+        throw InvalidArgumentException("AddFullyConnectedLayer: Constant weights tensor is empty.");
+    }
 
-        if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
-        {
-            throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
-        }
+    // Add a constant layer for biases
+    if (biases.has_value() && fullyConnectedDescriptor.m_BiasEnabled)
+    {
+        biasLayer = m_Graph->AddLayer<ConstantLayer>("Biases");
+        biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value());
+        biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
     }
 
-    const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
+    if (numInputs < 2)
+    {
+        throw InvalidArgumentException("AddFullyConnectedLayer: Requires at least 2 input tensors: Input, Weights");
+    }
+
+    auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
+
+    if (weightsLayer)
+    {
+        // Connect weights layer
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    }
 
-    if (fullyConnectedDescriptor.m_ConstantWeights)
+    if ( fullyConnectedDescriptor.m_BiasEnabled && numInputs == 3 )
     {
-        layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights.value());
-        if (fullyConnectedDescriptor.m_BiasEnabled)
+        if (biasLayer)
         {
-            layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
+            // Connect bias layer
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
         }
     }
+    else if ( !fullyConnectedDescriptor.m_BiasEnabled && numInputs == 2 )
+    {
+        // Bias is disabled
+        layer->m_Bias = nullptr;
+    }
+    else
+    {
+        throw InvalidArgumentException(fmt::format(
+                "AddFullyConnectedLayer: Value mismatch. When bias is enabled in the "
+                "descriptor the number of inputs is expected to be 3 otherwise 2. "
+                "BiasEnabled={}, numInputs={}",
+                fullyConnectedDescriptor.m_BiasEnabled,
+                numInputs));
+    }
 
     return layer;
 }
 
 IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                       const Optional<ConstTensor>& weights,
+                                                       const ConstTensor& weights,
                                                        const Optional<ConstTensor>& biases,
                                                        const char* name)
 {
-    return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
-}
-
-IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                   const ConstTensor& weights,
-                                                   const Optional<ConstTensor>& biases,
-                                                   const char* name)
-{
     Optional<ConstTensor> optionalWeights(weights);
-    return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, biases, name);
-}
-
-IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                   const ConstTensor& weights,
-                                                   const char* name)
-{
-    Optional<ConstTensor> optionalWeights(weights);
-    Optional<ConstTensor> biases;
-    return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, biases, name);
-}
-
-IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                   const ConstTensor& weights,
-                                                   const ConstTensor& biases,
-                                                   const char* name)
-{
-    Optional<ConstTensor> optionalWeights(weights);
-    Optional<ConstTensor> optionalBiases(biases);
-    return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, optionalBiases, name);
+    return AddFullyConnectedLayer(fullyConnectedDescriptor, optionalWeights, biases, name);
 }
 
 IConnectableLayer* NetworkImpl::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index 54c3497c90..c22c865e3b 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -133,24 +133,17 @@ public:
     IConnectableLayer* AddFloorLayer(const char* name = nullptr);
 
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const Optional<ConstTensor>& weights,
-                                              const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const ConstTensor& weights,
+                                              const Optional<ConstTensor>& weights,
                                               const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated")
     IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                               const ConstTensor& weights,
-                                              const char* name = nullptr);
-
-    ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated")
-    IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                              const ConstTensor& weights,
-                                              const ConstTensor& biases,
+                                              const Optional<ConstTensor>& biases,
                                               const char* name = nullptr);
 
     ARMNN_DEPRECATED_MSG("This AddGatherLayer overload is deprecated")
@@ -288,11 +281,6 @@ private:
                                                           const Optional<ConstTensor>& biases,
                                                           const char* name);
 
-    IConnectableLayer* AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                  const Optional<ConstTensor>& weights,
-                                                  const Optional<ConstTensor>& biases,
-                                                  const char* name);
-
     bool GetShapeInferenceMethod();
     NetworkOptions m_NetworkOptions;
 
diff --git a/src/armnn/layers/FullyConnectedLayer.cpp b/src/armnn/layers/FullyConnectedLayer.cpp
index 9d4f57d260..8dfb011730 100644
--- a/src/armnn/layers/FullyConnectedLayer.cpp
+++ b/src/armnn/layers/FullyConnectedLayer.cpp
@@ -15,24 +15,20 @@ namespace armnn
 {
 
 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
-    : LayerWithParameters(param.GetNumViews(), 1, LayerType::FullyConnected, param, name)
+    : LayerWithParameters(param.GetNumInputs(), 1, LayerType::FullyConnected, param, name)
 {
 }
 
 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const IWorkloadFactory& factory) const
 {
-    // on this level constant data should not be released..
     FullyConnectedQueueDescriptor descriptor;
-    if (m_Param.m_ConstantWeights)
+    if (m_Weight)
     {
-        ARMNN_ASSERT_MSG(m_Weight != nullptr, "FullyConnectedLayer: Weights data should not be null.");
         descriptor.m_Weight = m_Weight.get();
-
-        if (m_Param.m_BiasEnabled)
-        {
-            ARMNN_ASSERT_MSG(m_Bias != nullptr, "FullyConnectedLayer: Bias data should not be null.");
-            descriptor.m_Bias = m_Bias.get();
-        }
+    }
+    if (m_Param.m_BiasEnabled && m_Bias)
+    {
+        descriptor.m_Bias = m_Bias.get();
     }
     SetAdditionalInfo(descriptor);
 
@@ -42,15 +38,6 @@ std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const IWorkloadFa
 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
 {
     auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
-    if (m_Param.m_ConstantWeights)
-    {
-        layer->m_Weight = m_Weight ? m_Weight : nullptr;
-
-        if (layer->m_Param.m_BiasEnabled)
-        {
-            layer->m_Bias = m_Bias ? m_Bias : nullptr;
-        }
-    }
     return std::move(layer);
 }
 
@@ -73,20 +60,9 @@ void FullyConnectedLayer::ValidateTensorShapesFromInputs()
 
     VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod);
 
-    std::vector<TensorShape> inferredShapes;
-    if (m_Param.m_ConstantWeights)
-    {
-        // check if m_Weight data is not nullptr
-        ARMNN_ASSERT_MSG(m_Weight != nullptr, "FullyConnectedLayer: Weights data should not be null.");
-
-        inferredShapes = InferOutputShapes({GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
-                                            m_Weight->GetTensorInfo().GetShape()});
-    }
-    else
-    {
-        inferredShapes = InferOutputShapes({GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
-                                            GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()});
-    }
+    std::vector<TensorShape> inferredShapes = InferOutputShapes(
+            {GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
+             GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()});
 
     ARMNN_ASSERT(inferredShapes.size() == 1);
     ARMNN_ASSERT(inferredShapes[0].GetDimensionality() == Dimensionality::Specified);
@@ -101,45 +77,12 @@ Layer::ConstantTensors FullyConnectedLayer::GetConstantTensorsByRef()
 
 void FullyConnectedLayer::Accept(ILayerVisitor& visitor) const
 {
-    Optional<ConstTensor> optionalWeightsTensor = EmptyOptional();
-    Optional<ConstTensor> optionalBiasTensor = EmptyOptional();
-
-    ManagedConstTensorHandle managedWeight(m_Weight);
-    ManagedConstTensorHandle managedBias(m_Bias);
-    if (GetParameters().m_ConstantWeights)
-    {
-        ConstTensor weightsTensor(managedWeight.GetTensorInfo(), managedWeight.Map());
-        optionalWeightsTensor = Optional<ConstTensor>(weightsTensor);
-
-        if (GetParameters().m_BiasEnabled)
-        {
-            ConstTensor biasTensor(managedBias.GetTensorInfo(), managedBias.Map());
-            optionalBiasTensor = Optional<ConstTensor>(biasTensor);
-        }
-    }
-
-    visitor.VisitFullyConnectedLayer(this,
-                                     GetParameters(),
-                                     optionalWeightsTensor.value(),
-                                     optionalBiasTensor,
-                                     GetName());
+    visitor.VisitFullyConnectedLayer(this, GetParameters(), GetName());
 }
 
 void FullyConnectedLayer::ExecuteStrategy(IStrategy& strategy) const
 {
-    std::vector <armnn::ConstTensor> constTensors;
-    ManagedConstTensorHandle managedWeight(m_Weight);
-    ManagedConstTensorHandle managedBias(m_Bias);
-
-    if(GetParameters().m_ConstantWeights)
-    {
-        constTensors.emplace_back(ConstTensor(managedWeight.GetTensorInfo(), managedWeight.Map()));
-        if (GetParameters().m_BiasEnabled)
-        {
-            constTensors.emplace_back(ConstTensor(managedBias.GetTensorInfo(), managedBias.Map()));
-        }
-    }
-    strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName());
+    strategy.ExecuteStrategy(this, GetParameters(), {}, GetName());
 }
 
 } // namespace armnn
diff --git a/src/armnn/layers/FullyConnectedLayer.hpp b/src/armnn/layers/FullyConnectedLayer.hpp
index 7fc7b0d596..5639bf27b4 100644
--- a/src/armnn/layers/FullyConnectedLayer.hpp
+++ b/src/armnn/layers/FullyConnectedLayer.hpp
@@ -16,8 +16,10 @@ class FullyConnectedLayer : public LayerWithParameters<FullyConnectedDescriptor>
 {
 public:
     /// A unique pointer to store Weight values.
+    /// @Note: Deprecated. Weights are stored in ConstantLayers now.
     std::shared_ptr<ConstTensorHandle> m_Weight;
     /// A unique pointer to store Bias values.
+    /// @Note: Deprecated. Bias are stored in ConstantLayers now.
     std::shared_ptr<ConstTensorHandle> m_Bias;
 
     /// Makes a workload for the FullyConnected type.
diff --git a/src/armnn/test/ConstTensorLayerVisitor.cpp b/src/armnn/test/ConstTensorLayerVisitor.cpp
index baafcf41ef..d3d8698972 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.cpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.cpp
@@ -484,16 +484,23 @@ TEST_CASE("CheckFullyConnectedLayer")
 {
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_ConstantWeights = true;
+    descriptor.m_BiasEnabled = false;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32), data);
 
-    TestFullyConnectedLayerVistor visitor(descriptor, weights, EmptyOptional());
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestFullyConnectedLayerVistor visitor(descriptor);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, EmptyOptional());
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    weightsLayer->Accept(weightsVisitor);
     layer->Accept(visitor);
 }
 
@@ -502,16 +509,23 @@ TEST_CASE("CheckNamedFullyConnectedLayer")
     const char* layerName = "FullyConnectedLayer";
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_ConstantWeights = true;
+    descriptor.m_BiasEnabled = false;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32), data);
 
-    TestFullyConnectedLayerVistor visitor(descriptor, weights, EmptyOptional(), layerName);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestFullyConnectedLayerVistor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, EmptyOptional(), layerName);
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    weightsLayer->Accept(weightsVisitor);
     layer->Accept(visitor);
 }
 
@@ -519,6 +533,7 @@ TEST_CASE("CheckFullyConnectedLayerWithBiases")
 {
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
@@ -528,13 +543,21 @@ TEST_CASE("CheckFullyConnectedLayerWithBiases")
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32), biasData);
-    Optional<ConstTensor> optionalBiases(biases);
 
-    TestFullyConnectedLayerVistor visitor(descriptor, weights, optionalBiases);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestConstantLayerVisitor biasesVisitor(biases);
+    TestFullyConnectedLayerVistor visitor(descriptor);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, optionalBiases);
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
+    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
+
+    weightsLayer->Accept(weightsVisitor);
+    biasesLayer->Accept(biasesVisitor);
     layer->Accept(visitor);
 }
 
@@ -543,6 +566,7 @@ TEST_CASE("CheckNamedFullyConnectedLayerWithBiases")
     const char* layerName = "FullyConnectedLayer";
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
@@ -552,13 +576,21 @@ TEST_CASE("CheckNamedFullyConnectedLayerWithBiases")
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32), biasData);
-    Optional<ConstTensor> optionalBiases(biases);
 
-    TestFullyConnectedLayerVistor visitor(descriptor, weights, optionalBiases, layerName);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestConstantLayerVisitor biasesVisitor(biases);
+    TestFullyConnectedLayerVistor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, optionalBiases, layerName);
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
+    IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
+
+    weightsLayer->Accept(weightsVisitor);
+    biasesLayer->Accept(biasesVisitor);
     layer->Accept(visitor);
 }
 
diff --git a/src/armnn/test/ConstTensorLayerVisitor.hpp b/src/armnn/test/ConstTensorLayerVisitor.hpp
index e423e0f6e3..35e2e872f7 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.hpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.hpp
@@ -90,36 +90,26 @@ class TestFullyConnectedLayerVistor : public TestLayerVisitor
 {
 public:
     explicit TestFullyConnectedLayerVistor(const FullyConnectedDescriptor& descriptor,
-                                           const ConstTensor& weights,
-                                           const Optional<ConstTensor> biases,
                                            const char* name = nullptr)
         : TestLayerVisitor(name)
         , m_Descriptor(descriptor)
-        , m_Weights(weights)
-        , m_Biases(biases)
     {}
 
     virtual ~TestFullyConnectedLayerVistor() {}
 
     void VisitFullyConnectedLayer(const IConnectableLayer* layer,
                                   const FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                  const ConstTensor& weights,
-                                  const Optional<ConstTensor>& biases,
                                   const char* name = nullptr) override
     {
         CheckLayerPointer(layer);
         CheckLayerName(name);
         CheckDescriptor(fullyConnectedDescriptor);
-        CheckConstTensors(m_Weights, weights);
-        CheckOptionalConstTensors(m_Biases, biases);
     }
 
 protected:
     void CheckDescriptor(const FullyConnectedDescriptor& descriptor);
 private:
     FullyConnectedDescriptor m_Descriptor;
-    ConstTensor m_Weights;
-    Optional<ConstTensor> m_Biases;
 };
 
 class TestBatchNormalizationLayerVisitor : public TestLayerVisitor
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index b07e3b80a5..759ada97cd 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -1193,7 +1193,7 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::
 {
     // Creates the layer we're testing.
     FullyConnectedDescriptor layerDesc;
-    layerDesc.m_BiasEnabled = true;
+    layerDesc.m_BiasEnabled = false;
     layerDesc.m_TransposeWeightMatrix = true;
 
     FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
@@ -1201,17 +1201,24 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::
     float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
 
+    // As optimization isn't run member variables need to be updated.
     layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
-    layer->m_Bias   = std::make_unique<ScopedTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale));
     layer->m_Weight->Allocate();
-    layer->m_Bias->Allocate();
+
+    armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
+    weightsTensorInfo.SetConstant();
 
     // Creates extra layers.
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    auto const weights = graph.AddLayer<ConstantLayer>("weights");
     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
 
+    weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
+    weights->m_LayerOutput->Allocate();
+
     // Connects up.
-    Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale));
+    Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
+    Connect(weights, layer, weightsTensorInfo, 0, 1);
     Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
     CreateTensorHandles(graph, factory);
 
@@ -1219,13 +1226,10 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::
     auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
 
     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
-    CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
     CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
 
-    CHECK(queueDescriptor.m_Inputs.size() == 1);
+    CHECK(queueDescriptor.m_Inputs.size() == 2);
     CHECK(queueDescriptor.m_Outputs.size() == 1);
-    CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale)));
-    CHECK((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)));
 
     // Returns so we can do extra, backend-specific tests.
     return workload;
@@ -1246,11 +1250,17 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
     float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
 
+    // As optimization isn't run member variables need to be updated.
     layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
     layer->m_Bias   = std::make_unique<ScopedTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale));
     layer->m_Weight->Allocate();
     layer->m_Bias->Allocate();
 
+    armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
+    armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale);
+    weightsTensorInfo.SetConstant();
+    biasesTensorInfo.SetConstant();
+
     auto activationDesc = std::make_shared<ActivationDescriptor>();
     activationDesc->m_A        = 10.0f;
     activationDesc->m_B        = 5.0f;
@@ -1267,10 +1277,19 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
 
     // Creates extra layers.
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    auto const weights = graph.AddLayer<ConstantLayer>("weights");
+    auto const biases = graph.AddLayer<ConstantLayer>("biases");
     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
 
+    weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
+    weights->m_LayerOutput->Allocate();
+    biases->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
+    biases->m_LayerOutput->Allocate();
+
     // Connects up.
-    Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale));
+    Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
+    Connect(weights, layer, weightsTensorInfo, 0, 1);
+    Connect(biases, layer, biasesTensorInfo, 0, 2);
     Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
     CreateTensorHandles(graph, factory);
 
@@ -1290,10 +1309,52 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
 
     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
     CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
-    CHECK(queueDescriptor.m_Inputs.size() == 1);
+    CHECK(queueDescriptor.m_Inputs.size() == 3);
+    CHECK(queueDescriptor.m_Outputs.size() == 1);
+
+    // Returns so we can do extra, backend-specific tests.
+    return workload;
+}
+
+template <typename FullyConnectedWorkload, armnn::DataType DataType>
+std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
+    (armnn::IWorkloadFactory& factory,
+     armnn::Graph&            graph)
+{
+    // Creates the layer we're testing.
+    FullyConnectedDescriptor layerDesc;
+    layerDesc.m_BiasEnabled = true;
+    layerDesc.m_TransposeWeightMatrix = true;
+    layerDesc.m_ConstantWeights = false;
+
+    FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
+
+    float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
+    float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
+
+    // Creates extra layers with weights and biases as input layers.
+    Layer* const input   = graph.AddLayer<InputLayer>(1, "input");
+    Layer* const weights = graph.AddLayer<InputLayer>(2, "weights");
+    Layer* const biases  = graph.AddLayer<InputLayer>(3, "biases");
+    Layer* const output  = graph.AddLayer<OutputLayer>(0, "output");
+
+    // Connects up.
+    Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
+    Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1);
+    Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2);
+    Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
+    CreateTensorHandles(graph, factory);
+
+    // Makes the workload and checks it.
+    auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
+
+    FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
+
+    CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+    CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
+    CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false);
+    CHECK(queueDescriptor.m_Inputs.size() == 3);
     CHECK(queueDescriptor.m_Outputs.size() == 1);
-    CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale)));
-    CHECK((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)));
 
     // Returns so we can do extra, backend-specific tests.
     return workload;
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 0dc2619e51..b697f6dbe6 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -598,14 +598,14 @@ TEST_CASE("CheckGraphConstTensorSharing")
     {
         armnn::Graph graph1;
 
-        armnn::FullyConnectedLayer* const fcLayer =
-                graph1.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+        armnn::ConstantLayer* const constantLayer = graph1.AddLayer<armnn::ConstantLayer>("ConstantLayer");
 
         float weight = 1.0f;
         armnn::ConstTensor constTensor({{ 1, 1 }, armnn::DataType::Float32}, &weight);
-        fcLayer->m_Weight = std::make_shared<armnn::ScopedTensorHandle>(constTensor);;
+        constantLayer->m_LayerOutput = std::make_shared<armnn::ScopedTensorHandle>(constTensor);;
+
         // point sharedWeightPtr to graph1's const tensor
-        sharedWeightPtr = fcLayer->m_Weight->GetConstTensor<float>();
+        sharedWeightPtr = constantLayer->m_LayerOutput->GetConstTensor<float>();
 
         graph0 = armnn::Graph(graph1);
         // graph1 goes out of scope
diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp
index d763a85100..9acb60df4a 100644
--- a/src/armnn/test/NetworkTests.cpp
+++ b/src/armnn/test/NetworkTests.cpp
@@ -86,12 +86,15 @@ TEST_CASE("NetworkModification")
     inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
 
     armnn::FullyConnectedDescriptor fullyConnectedDesc;
+
+    // Constant layer that now holds weights data for FullyConnected
+    armnn::IConnectableLayer* const constantWeightsLayer = net.AddConstantLayer(weights, "const weights");
     armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc,
-                                                                                     weights,
-                                                                                     armnn::EmptyOptional(),
                                                                                      "fully connected");
+    CHECK(constantWeightsLayer);
     CHECK(fullyConnectedLayer);
 
+    constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
     convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
 
     armnn::Pooling2dDescriptor pooling2dDesc;
@@ -152,11 +155,12 @@ TEST_CASE("NetworkModification")
     multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
 
     //Tests that all layers are present in the graph.
-    CHECK(net.GetGraph().GetNumLayers() == 11);
+    CHECK(net.GetGraph().GetNumLayers() == 12);
 
     //Tests that the vertices exist and have correct names.
     CHECK(GraphHasNamedLayer(net.GetGraph(), "input layer"));
     CHECK(GraphHasNamedLayer(net.GetGraph(), "conv layer"));
+    CHECK(GraphHasNamedLayer(net.GetGraph(), "const weights"));
     CHECK(GraphHasNamedLayer(net.GetGraph(), "fully connected"));
     CHECK(GraphHasNamedLayer(net.GetGraph(), "pooling2d"));
     CHECK(GraphHasNamedLayer(net.GetGraph(), "activation"));
@@ -200,6 +204,28 @@ TEST_CASE("NetworkModification")
                 CHECK(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection());
             }
         };
+    auto checkOneOutputToTwoInputConnectionForTwoDifferentLayers = []
+        (const armnn::IConnectableLayer* const srcLayer1,
+         const armnn::IConnectableLayer* const srcLayer2,
+         const armnn::IConnectableLayer* const tgtLayer,
+         int expectedSrcNumInputs1 = 1,
+         int expectedSrcNumInputs2 = 1,
+         int expectedDstNumOutputs = 1)
+        {
+            CHECK(srcLayer1->GetNumInputSlots() == expectedSrcNumInputs1);
+            CHECK(srcLayer1->GetNumOutputSlots() == 1);
+            CHECK(srcLayer2->GetNumInputSlots() == expectedSrcNumInputs2);
+            CHECK(srcLayer2->GetNumOutputSlots() == 1);
+            CHECK(tgtLayer->GetNumInputSlots() == 2);
+            CHECK(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
+
+            CHECK(srcLayer1->GetOutputSlot(0).GetNumConnections() == 1);
+            CHECK(srcLayer2->GetOutputSlot(0).GetNumConnections() == 1);
+            CHECK(srcLayer1->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0));
+            CHECK(srcLayer2->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(1));
+            CHECK(&srcLayer1->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection());
+            CHECK(&srcLayer2->GetOutputSlot(0) == tgtLayer->GetInputSlot(1).GetConnection());
+        };
 
     CHECK(AreAllLayerInputSlotsConnected(*convLayer));
     CHECK(AreAllLayerInputSlotsConnected(*fullyConnectedLayer));
@@ -214,8 +240,8 @@ TEST_CASE("NetworkModification")
 
     // Checks connectivity.
     checkOneOutputToOneInputConnection(inputLayer, convLayer, 0);
-    checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer);
-    checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer);
+    checkOneOutputToTwoInputConnectionForTwoDifferentLayers(convLayer, constantWeightsLayer, fullyConnectedLayer, 1, 0);
+    checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer, 2, 1);
     checkOneOutputToOneInputConnection(poolingLayer, activationLayer);
     checkOneOutputToOneInputConnection(activationLayer, normalizationLayer);
     checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer);
diff --git a/src/armnn/test/ShapeInferenceTests.cpp b/src/armnn/test/ShapeInferenceTests.cpp
index 8abcfd7595..d3c928fec1 100644
--- a/src/armnn/test/ShapeInferenceTests.cpp
+++ b/src/armnn/test/ShapeInferenceTests.cpp
@@ -401,24 +401,16 @@ TEST_CASE("FloorTest")
 
 TEST_CASE("FullyConnectedTest")
 {
-    Graph graph;
-
     const unsigned int inputWidth = 3u;
     const unsigned int inputHeight = 2u;
     const unsigned int inputChannels = 1u;
     const unsigned int outputChannels = 2u;
 
-    auto layer = BuildGraph<FullyConnectedLayer>(&graph,
-                                                 {{1, inputChannels, inputHeight, inputWidth}},
-                                                 FullyConnectedDescriptor(),
-                                                 "fc");
-
-
-    const float Datum = 0.0f;
-    ConstTensor weights({{inputChannels, outputChannels}, DataType::Float32}, &Datum);
-    layer->m_Weight = std::make_unique<ScopedTensorHandle>(weights);
-
-    RunShapeInferenceTest<FullyConnectedLayer>(layer, {{ 1, outputChannels }});
+    CreateGraphAndRunTest<FullyConnectedLayer>({{ 1, inputChannels, inputHeight, inputWidth }, // input
+                                                { inputChannels, outputChannels }},            // weights
+                                               {{ 1, outputChannels }},                        // output
+                                               FullyConnectedDescriptor(),
+                                               "fc");
 }
 
 TEST_CASE("GatherTest")
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index 24ea8f6680..2352a3c498 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -8,6 +8,7 @@
 #include <Network.hpp>
 #include <ResolveType.hpp>
 #include <armnn/INetwork.hpp>
+#include "test/GraphUtils.hpp"
 #include <test/TestUtils.hpp>
 
 #include <doctest/doctest.h>
@@ -41,6 +42,7 @@ struct Convolution2dTest
 {
     using LayerType = Convolution2dLayer;
     static const bool isElementWise = false;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 3, 3, 4}); }  // NHWCout
@@ -70,6 +72,16 @@ struct Convolution2dTest
 
         return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -78,6 +90,7 @@ struct DWConvolution2dTest
 public:
     using LayerType = DepthwiseConvolution2dLayer;
     static const bool isElementWise = false;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }   // [N,H,W,Cin]
     static TensorShape GetOutputShape()  { return TensorShape( {1, 3, 3, 12}); }  // [N,H,W,Cout]
@@ -108,6 +121,16 @@ public:
 
         return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -116,6 +139,7 @@ struct FullyConnectedTest
 public:
     using LayerType = FullyConnectedLayer;
     static const bool isElementWise = false;
+    static const bool isConstTensorAsInputSupported = true;
 
     static TensorShape GetInputShape()   { return TensorShape( {2, 5, 1, 1}); } // NCinHW
     static TensorShape GetOutputShape()  { return TensorShape( {2, 3}); }       // NCout
@@ -129,18 +153,31 @@ public:
                                                float scale = 1.f,
                                                int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         FullyConnectedDescriptor descriptor;
         descriptor.m_BiasEnabled = false;
 
+        return network->AddFullyConnectedLayer(descriptor, name);
+    }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
         std::vector<float> weightsData   = { 1,  2,  3,  4,  5,
                                              6,  7,  8,  9, 10,
-                                            11, 12, 13, 14, 15};
+                                             11, 12, 13, 14, 15};
         std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
         ConstTensor        weights(weightsInfo, weightsVector);
-        Optional<ConstTensor> optionalBias;
 
-        return network->AddFullyConnectedLayer(descriptor, weights, optionalBias, name);
+        IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+        std::vector<IConnectableLayer*> layers = { weightsLayer };
+        return layers;
     }
 };
 
@@ -150,6 +187,7 @@ struct BatchNormTest
 public:
     using LayerType = BatchNormalizationLayer;
     static const bool isElementWise = false;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 4, 4, 3}); }  // NHWCout
@@ -181,6 +219,16 @@ public:
 
         return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -188,6 +236,7 @@ struct MultiplicationTest
 {
     using LayerType = MultiplicationLayer;
     static const bool isElementWise = true;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 4, 4, 3}); }  // NHWCout
@@ -205,6 +254,16 @@ struct MultiplicationTest
 
         return network->AddMultiplicationLayer(name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -212,6 +271,7 @@ struct AdditionTest
 {
     using LayerType = AdditionLayer;
     static const bool isElementWise = true;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 4, 4, 3}); }  // NHWCout
@@ -229,6 +289,16 @@ struct AdditionTest
 
         return network->AddAdditionLayer(name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -236,6 +306,7 @@ struct SubtractionTest
 {
     using LayerType = SubtractionLayer;
     static const bool isElementWise = true;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 4, 4, 3}); }  // NHWCout
@@ -253,6 +324,16 @@ struct SubtractionTest
 
         return network->AddSubtractionLayer(name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
@@ -260,6 +341,7 @@ struct DivisionTest
 {
     using LayerType = DivisionLayer;
     static const bool isElementWise = true;
+    static const bool isConstTensorAsInputSupported = false;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }  // NHWCin
     static TensorShape GetOutputShape()  { return TensorShape( {1, 4, 4, 3}); }  // NHWCout
@@ -277,11 +359,21 @@ struct DivisionTest
 
         return network->AddDivisionLayer(name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
+                                                             float scale = 1.f,
+                                                             int32_t offset = 0)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+        return {};
+    }
 };
 
 template<typename LayerTest,
          DataType ArmnnType>
-INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
+INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
                          float scale, int32_t offset)
 {
     // Create a network
@@ -300,6 +392,20 @@ INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool prevent
     IConnectableLayer* outputLayer  = network->AddOutputLayer(0);
     IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
 
+    // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
+    if(LayerTest::isConstTensorAsInputSupported)
+    {
+        std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
+                                                                                      scale,
+                                                                                      offset);
+
+        // Connect constant layers to receiverLayer.
+        for (unsigned int i = 0; i < constantLayers.size(); ++i)
+        {
+            constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
+        }
+    }
+
     // Define layers information
     TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
     TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
@@ -335,7 +441,7 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript
 {
     // FIRST NETWORK: Fused
     // Construct ArmNN network
-    INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
+    INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
 
     // Create ArmNN runtime
     IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -350,12 +456,31 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript
             (layer->GetNameStr() == "fused-activation-into-receiverLayer");
     };
 
-    CHECK(3 == graphFused.GetNumLayers());
-    CHECK(CheckSequence(graphFused.cbegin(),
-                             graphFused.cend(),
-                             &IsLayerOfType<InputLayer>,
-                             checkFusedConv2d,
-                             &IsLayerOfType<OutputLayer>));
+    // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
+    if(LayerTest::isConstTensorAsInputSupported)
+    {
+        CHECK(4 == graphFused.GetNumLayers());
+        CHECK(CheckSequence(graphFused.cbegin(),
+                            graphFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            &IsLayerOfType<ConstantLayer>,
+                            checkFusedConv2d,
+                            &IsLayerOfType<OutputLayer>));
+
+        // Check if new constant layer is connected to fused receiver layer.
+        Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
+        CHECK(fusedReceiverLayer);
+        CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
+    }
+    else
+    {
+        CHECK(3 == graphFused.GetNumLayers());
+        CHECK(CheckSequence(graphFused.cbegin(),
+                            graphFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            checkFusedConv2d,
+                            &IsLayerOfType<OutputLayer>));
+    }
 
     // Load network into runtime
     NetworkId networkIdentifier;
@@ -376,7 +501,7 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript
 
     // SECOND NETWORK: NotFused
     // Construct ArmNN network
-    INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
+    INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
 
     // Create ArmNN runtime
     IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -386,14 +511,30 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript
 
     Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
 
-    CHECK(5 == graphNotFused.GetNumLayers());
-    CHECK(CheckSequence(graphNotFused.cbegin(),
-                             graphNotFused.cend(),
-                             &IsLayerOfType<InputLayer>,
-                             &IsLayerOfType<LayerType>,
-                             &IsLayerOfType<ActivationLayer>,
-                             &IsLayerOfType<OutputLayer>,
-                             &IsLayerOfType<OutputLayer>));
+    // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
+    if(LayerTest::isConstTensorAsInputSupported)
+    {
+        CHECK(6 == graphNotFused.GetNumLayers());
+        CHECK(CheckSequence(graphNotFused.cbegin(),
+                            graphNotFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            &IsLayerOfType<ConstantLayer>,
+                            &IsLayerOfType<LayerType>,
+                            &IsLayerOfType<ActivationLayer>,
+                            &IsLayerOfType<OutputLayer>,
+                            &IsLayerOfType<OutputLayer>));
+    }
+    else
+    {
+        CHECK(5 == graphNotFused.GetNumLayers());
+        CHECK(CheckSequence(graphNotFused.cbegin(),
+                            graphNotFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            &IsLayerOfType<LayerType>,
+                            &IsLayerOfType<ActivationLayer>,
+                            &IsLayerOfType<OutputLayer>,
+                            &IsLayerOfType<OutputLayer>));
+    }
 
     // Load network into runtime
     NetworkId networkIdentifierNotFused;
@@ -433,7 +574,7 @@ bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute
     try
     {
         // Construct ArmNN network
-        INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
+        INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
 
         // Create ArmNN runtime
         IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 2d9194a350..5c99496744 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -688,6 +688,7 @@ armnn::ConstTensor ToConstTensor(ConstTensorRawPtr constTensorPtr)
 {
     CHECK_CONST_TENSOR_PTR(constTensorPtr);
     armnn::TensorInfo tensorInfo = ToTensorInfo(constTensorPtr->info());
+    tensorInfo.SetConstant();
 
     switch (constTensorPtr->data_type())
     {
@@ -938,6 +939,7 @@ IDeserializer::DeserializerImpl::FeatureVersions IDeserializer::DeserializerImpl
     {
         versions.m_BindingIdScheme = graph->featureVersions()->bindingIdsScheme();
         versions.m_WeightsLayoutScheme = graph->featureVersions()->weightsLayoutScheme();
+        versions.m_ConstTensorsAsInputs = graph->featureVersions()->constantTensorsAsInputs();
     }
 
     return versions;
@@ -1052,13 +1054,15 @@ void IDeserializer::DeserializerImpl::RegisterOutputSlots(GraphPtr graph,
 }
 
 void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph,
-                                      uint32_t layerIndex,
-                                      armnn::IConnectableLayer* layer)
+                                                         uint32_t layerIndex,
+                                                         armnn::IConnectableLayer* layer,
+                                                         std::vector<unsigned int> ignoreSlots)
 {
     CHECK_LAYERS(graph, 0, layerIndex);
     ARMNN_ASSERT(layer != nullptr);
     LayerBaseRawPtr baseLayer = GetBaseLayer(graph, layerIndex);
-    if (baseLayer->inputSlots()->size() != layer->GetNumInputSlots())
+
+    if (baseLayer->inputSlots()->size() != (layer->GetNumInputSlots() - ignoreSlots.size()))
     {
         throw ParseException(fmt::format("The number of inputslots ({0}) does not match the number expected ({1})"
                                          " for layer index:{2} {3}",
@@ -1070,10 +1074,14 @@ void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph,
 
     for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
     {
-        auto fbInputSlot = baseLayer->inputSlots()->Get(i);
-        auto fbConnection = fbInputSlot->connection();
-        armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index()));
-        RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot);
+        // Check if slot should be ignored.
+        if (std::find(ignoreSlots.begin(), ignoreSlots.end(), i) == ignoreSlots.end())
+        {
+            auto fbInputSlot = baseLayer->inputSlots()->Get(i);
+            auto fbConnection = fbInputSlot->connection();
+            armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index()));
+            RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot);
+        }
     }
 }
 
@@ -1924,40 +1932,47 @@ void IDeserializer::DeserializerImpl::ParseFullyConnected(GraphPtr graph, unsign
     fullyConnectedDescriptor.m_BiasEnabled = flatBufferDescriptor->biasEnabled();
     fullyConnectedDescriptor.m_TransposeWeightMatrix = flatBufferDescriptor->transposeWeightsMatrix();
     fullyConnectedDescriptor.m_ConstantWeights = flatBufferDescriptor->constantWeights();
-    uint32_t numInputs = 1;
-    if (!fullyConnectedDescriptor.m_ConstantWeights)
+
+    armnn::IConnectableLayer* layer;
+    std::vector<unsigned int> ignoreSlots {};
+
+    // Weights and biases used to be always constant and were stored as members of the layer. This has changed and
+    // they are now passed as inputs. If they are constant then they will be stored in a ConstantLayer.
+    if (this->GetFeatureVersions(graph).m_ConstTensorsAsInputs <= 0)
     {
-        numInputs = 2;
+        // If the model stores weights and biases as members of the layer we have to read them from there
+        // but add them to their own ConstantLayer for compatibility
+        CHECK_VALID_SIZE(inputs.size(), 1);
+        layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
+                                                  layerName.c_str());
+
+        armnn::ConstTensor weightsTensor = ToConstTensor(flatBufferLayer->weights());
+        auto weightsLayer = m_Network->AddConstantLayer(weightsTensor);
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensor.GetInfo());
+        ignoreSlots.emplace_back(1u);
+
         if (fullyConnectedDescriptor.m_BiasEnabled)
         {
-            numInputs = 3;
+            armnn::ConstTensor biasTensor = ToConstTensor(flatBufferLayer->biases());
+            auto biasLayer = m_Network->AddConstantLayer(biasTensor);
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensor.GetInfo());
+            ignoreSlots.emplace_back(2u);
         }
     }
-    CHECK_VALID_SIZE(inputs.size(), numInputs);
-
-    armnn::Optional <armnn::ConstTensor> optionalWeights = armnn::EmptyOptional();
-    armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
-    if (fullyConnectedDescriptor.m_ConstantWeights)
+    else
     {
-        armnn::ConstTensor weightsTensorData = ToConstTensor(flatBufferLayer->weights());
-        optionalWeights = armnn::Optional<armnn::ConstTensor>(weightsTensorData);
-
-        if (flatBufferDescriptor->biasEnabled())
-        {
-            armnn::ConstTensor biasTensorData = ToConstTensor(flatBufferLayer->biases());
-            optionalBiases = armnn::Optional<armnn::ConstTensor>(biasTensorData);
-        }
+        layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
+                                                  layerName.c_str());
+        uint32_t numInputs = fullyConnectedDescriptor.GetNumInputs();
+        CHECK_VALID_SIZE(inputs.size(), numInputs);
     }
 
-    armnn::IConnectableLayer* layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
-                                                                        optionalWeights,
-                                                                        optionalBiases,
-                                                                        layerName.c_str());
-
     armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
-    RegisterInputSlots(graph, layerIndex, layer);
+    RegisterInputSlots(graph, layerIndex, layer, ignoreSlots);
     RegisterOutputSlots(graph, layerIndex, layer);
 }
 
diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp
index b1362c44b6..b4dc68b72d 100644
--- a/src/armnnDeserializer/Deserializer.hpp
+++ b/src/armnnDeserializer/Deserializer.hpp
@@ -143,9 +143,12 @@ private:
     void ParseTransposeConvolution2d(GraphPtr graph, unsigned int layerIndex);
     void ParseUnidirectionalSequenceLstm(GraphPtr graph, unsigned int layerIndex);
 
-    void RegisterInputSlots(GraphPtr graph, uint32_t layerIndex,
-                            armnn::IConnectableLayer* layer);
-    void RegisterOutputSlots(GraphPtr graph, uint32_t layerIndex,
+    void RegisterInputSlots(GraphPtr graph,
+                            uint32_t layerIndex,
+                            armnn::IConnectableLayer* layer,
+                            std::vector<unsigned int> ignoreSlots = {});
+    void RegisterOutputSlots(GraphPtr graph,
+                             uint32_t layerIndex,
                              armnn::IConnectableLayer* layer);
 
     // NOTE index here must be from flatbuffer object index property
@@ -171,6 +174,9 @@ private:
 
         // Default values to zero for backward compatibility
         unsigned int m_WeightsLayoutScheme = 0;
+
+        // Default values to zero for backward compatibility
+        unsigned int m_ConstTensorsAsInputs = 0;
     };
 
     FeatureVersions GetFeatureVersions(GraphPtr graph);
diff --git a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
index da2db08fd4..5e298d1525 100644
--- a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
+++ b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
@@ -117,22 +117,241 @@ struct FullyConnectedFixture : public ParserFlatbuffersSerializeFixture
     }
 };
 
+
+struct FullyConnectedFixtureConstantAsInput : public ParserFlatbuffersSerializeFixture
+{
+    explicit FullyConnectedFixtureConstantAsInput()
+    {
+        m_JsonString = R"(
+    {
+      "layers": [
+        {
+          "layer_type": "InputLayer",
+          "layer": {
+            "base": {
+              "base": {
+                "index": 0,
+                "layerName": "InputLayer",
+                "layerType": "Input",
+                "inputSlots": [
+
+                ],
+                "outputSlots": [
+                  {
+                    "index": 0,
+                    "tensorInfo": {
+                      "dimensions": [
+                        1,
+                        4,
+                        1,
+                        1
+                      ],
+                      "dataType": "QAsymmU8",
+                      "quantizationScale": 1.0,
+                      "quantizationOffset": 0,
+                      "quantizationDim": 0,
+                      "dimensionality": 1,
+                      "dimensionSpecificity": [
+                        true,
+                        true,
+                        true,
+                        true
+                      ]
+                    }
+                  }
+                ]
+              },
+              "layerBindingId": 0
+            }
+          }
+        },
+        {
+          "layer_type": "FullyConnectedLayer",
+          "layer": {
+            "base": {
+              "index": 1,
+              "layerName": "FullyConnectedLayer",
+              "layerType": "FullyConnected",
+              "inputSlots": [
+                {
+                  "index": 0,
+                  "connection": {
+                    "sourceLayerIndex": 0,
+                    "outputSlotIndex": 0
+                  }
+                },
+                {
+                  "index": 1,
+                  "connection": {
+                    "sourceLayerIndex": 2,
+                    "outputSlotIndex": 0
+                  }
+                }
+              ],
+              "outputSlots": [
+                {
+                  "index": 0,
+                  "tensorInfo": {
+                    "dimensions": [
+                      1,
+                      1
+                    ],
+                    "dataType": "QAsymmU8",
+                    "quantizationScale": 2.0,
+                    "quantizationOffset": 0,
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true
+                    ]
+                  }
+                }
+              ]
+            },
+            "descriptor": {
+              "biasEnabled": false,
+              "transposeWeightsMatrix": true,
+              "constantWeights": true
+            }
+          }
+        },
+        {
+          "layer_type": "ConstantLayer",
+          "layer": {
+            "base": {
+              "index": 2,
+              "layerName": "",
+              "layerType": "Constant",
+              "inputSlots": [
+
+              ],
+              "outputSlots": [
+                {
+                  "index": 0,
+                  "tensorInfo": {
+                    "dimensions": [
+                      1,
+                      4
+                    ],
+                    "dataType": "QAsymmU8",
+                    "quantizationScale": 1.0,
+                    "quantizationOffset": 0,
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true
+                    ],
+                    "isConstant": true,
+                  }
+                }
+              ]
+            },
+            "input": {
+              "info": {
+                "dimensions": [
+                  1,
+                  4
+                ],
+                "dataType": "QAsymmU8",
+                "quantizationScale": 1.0,
+                "quantizationOffset": 0,
+                "quantizationDim": 0,
+                "dimensionality": 1,
+                "dimensionSpecificity": [
+                  true,
+                  true
+                ]
+              },
+              "data_type": "ByteData",
+              "data": {
+                "data": [
+                  2,
+                  3,
+                  4,
+                  5
+                ]
+              }
+            }
+          }
+        },
+        {
+          "layer_type": "OutputLayer",
+          "layer": {
+            "base": {
+              "base": {
+                "index": 3,
+                "layerName": "OutputLayer",
+                "layerType": "Output",
+                "inputSlots": [
+                  {
+                    "index": 0,
+                    "connection": {
+                      "sourceLayerIndex": 1,
+                      "outputSlotIndex": 0
+                    }
+                  }
+                ],
+                "outputSlots": [
+
+                ]
+              },
+              "layerBindingId": 0
+            }
+          }
+        }
+      ],
+      "inputIds": [
+        0
+      ],
+      "outputIds": [
+        0
+      ],
+      "featureVersions": {
+        "bindingIdsScheme": 1,
+        "weightsLayoutScheme": 1,
+        "constantTensorsAsInputs": 1
+      }
+    }
+    )";
+        Setup();
+    }
+};
+
 struct FullyConnectedWithNoBiasFixture : FullyConnectedFixture
 {
     FullyConnectedWithNoBiasFixture()
-        : FullyConnectedFixture("[ 1, 4, 1, 1 ]",     // inputShape
-                                "[ 1, 1 ]",           // outputShape
-                                "[ 1, 4 ]",           // filterShape
-                                "QuantisedAsymm8")     // filterData
+            : FullyConnectedFixture("[ 1, 4, 1, 1 ]",     // inputShape
+                                    "[ 1, 1 ]",           // outputShape
+                                    "[ 1, 4 ]",           // filterShape
+                                    "QuantisedAsymm8")    // filterData
     {}
 };
 
 TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixture, "FullyConnectedWithNoBias")
 {
+    // Weights and biases used to be always constant and were stored as members of the layer. This has changed and
+    // they are now passed as inputs (ConstantLayer) but the old way can still be used for now.
+    RunTest<2, armnn::DataType::QAsymmU8>(
+            0,
+            {{"InputLayer",  { 10, 20, 30, 40 }}},
+            {{"OutputLayer", { 400/2 }}});
+}
+
+struct FullyConnectedWithNoBiasFixtureConstantAsInput : FullyConnectedFixtureConstantAsInput
+{
+    FullyConnectedWithNoBiasFixtureConstantAsInput()
+            : FullyConnectedFixtureConstantAsInput()
+    {}
+};
+
+TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixtureConstantAsInput, "FullyConnectedWithNoBiasConstantAsInput")
+{
     RunTest<2, armnn::DataType::QAsymmU8>(
-         0,
-         {{"InputLayer",  { 10, 20, 30, 40 }}},
-         {{"OutputLayer", { 400/2 }}});
+            0,
+            {{"InputLayer",  { 10, 20, 30, 40 }}},
+            {{"OutputLayer", { 400/2 }}});
 }
 
 }
diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp
index 1fb5b96b8f..a7e6902fdd 100644
--- a/src/armnnOnnxParser/OnnxParser.cpp
+++ b/src/armnnOnnxParser/OnnxParser.cpp
@@ -532,6 +532,9 @@ OnnxParserImpl::CreateConstTensor(const std::string name,
     TensorInfo tensorInfo = *m_TensorsInfo[name].m_info;
     onnx::TensorProto onnxTensor = *m_TensorsInfo[name].m_tensor;
 
+    // Makes sure IsConstant flag is set.
+    tensorInfo.SetConstant();
+
     // Const tensors requires at least a list of values
     if (tensorInfo.GetNumElements() == 0)
     {
@@ -972,27 +975,41 @@ void OnnxParserImpl::AddFullyConnected(const onnx::NodeProto& matmulNode, const
                                                m_TensorsInfo[biasName].m_dtype ),
                             CHECK_LOCATION().AsString()));
         }
-        layer = m_Network->AddFullyConnectedLayer(desc,
-                                                  CreateConstTensor(weightName).first,
-                                                  Optional<ConstTensor>(CreateConstTensor(biasName).first),
-                                                  matmulNode.name().c_str());
+
+        // Just add a FullyConnected layer, weights and biases are handled as inputs now.
+        layer = m_Network->AddFullyConnectedLayer(desc, matmulNode.name().c_str());
         ARMNN_ASSERT(layer != nullptr);
 
         auto outputInfo = ComputeOutputInfo({addNode->output(0)}, layer,
                                             {m_TensorsInfo[inputName].m_info->GetShape(),
                                              m_TensorsInfo[weightName].m_info->GetShape()});
-
         layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]);
 
-        RegisterInputSlots(layer, {inputName});
+        // Add constant layer to store weights/biases and connect to FullyConnected layer..
+        if(m_TensorsInfo[weightName].isConstant())
+        {
+            IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(CreateConstTensor(weightName).first);
+
+            weightInfo.SetConstant();
+            weightsLayer->GetOutputSlot(0).SetTensorInfo(weightInfo);
+            weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        }
+
+        if(m_TensorsInfo[biasName].isConstant())
+        {
+            IConnectableLayer* biasLayer = m_Network->AddConstantLayer(CreateConstTensor(biasName).first);
+
+            biasInfo.SetConstant();
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasInfo);
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+        }
+
+        RegisterInputSlots(layer, {inputName, weightName, biasName});
         RegisterOutputSlots(layer, {addNode->output(0)});
     }
     else
     {
-        layer = m_Network->AddFullyConnectedLayer(desc,
-                                                  CreateConstTensor(weightName).first,
-                                                  EmptyOptional(),
-                                                  matmulNode.name().c_str());
+        layer = m_Network->AddFullyConnectedLayer(desc, matmulNode.name().c_str());
         ARMNN_ASSERT(layer != nullptr);
 
         auto outputInfo = ComputeOutputInfo({matmulNode.output(0)}, layer,
@@ -1000,7 +1017,18 @@ void OnnxParserImpl::AddFullyConnected(const onnx::NodeProto& matmulNode, const
                                              m_TensorsInfo[weightName].m_info->GetShape()});
         layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]);
 
-        RegisterInputSlots(layer, {inputName});
+        // Add constant layer to store weights and connect to FullyConnected layer.
+        if(m_TensorsInfo[weightName].isConstant())
+        {
+            TensorInfo weightInfo = *m_TensorsInfo[weightName].m_info;
+            IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(CreateConstTensor(weightName).first);
+
+            weightInfo.SetConstant();
+            weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+            weightsLayer->GetOutputSlot(0).SetTensorInfo(weightInfo);
+        }
+
+        RegisterInputSlots(layer, {inputName, weightName});
         RegisterOutputSlots(layer, {matmulNode.output(0)});
     }
 }
@@ -1755,6 +1783,7 @@ void OnnxParserImpl::RegisterInputSlots(IConnectableLayer* layer, const std::vec
                         layer->GetNumInputSlots(),
                         CHECK_LOCATION().AsString()));
     }
+
     for (unsigned int slotIndex = 0; slotIndex < layer->GetNumInputSlots(); ++slotIndex)
     {
         std::string tensorId = tensorIds[slotIndex];
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index a544161c53..85435a366f 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -69,6 +69,7 @@ table TensorInfo {
     quantizationDim:uint;
     dimensionality:uint = 1;
     dimensionSpecificity:[bool];
+    isConstant:bool = false;
 }
 
 struct Connection {
@@ -324,7 +325,7 @@ table FloorLayer{
 table FullyConnectedLayer {
     base:LayerBase;
     descriptor:FullyConnectedDescriptor;
-    weights:ConstTensor;
+    weights:ConstTensor; // ConstTensors are now passed as inputs.
     biases:ConstTensor;
 }
 
@@ -1007,6 +1008,7 @@ table AnyLayer {
 table FeatureCompatibilityVersions {
   bindingIdsScheme:uint = 0;
   weightsLayoutScheme:uint = 0;
+  constantTensorsAsInputs:uint = 0;
 }
 
 // Root type for serialized data is the graph of the network
diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h
index 27550f0682..ca2bf0c003 100644
--- a/src/armnnSerializer/ArmnnSchema_generated.h
+++ b/src/armnnSerializer/ArmnnSchema_generated.h
@@ -1685,7 +1685,8 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     VT_QUANTIZATIONSCALES = 12,
     VT_QUANTIZATIONDIM = 14,
     VT_DIMENSIONALITY = 16,
-    VT_DIMENSIONSPECIFICITY = 18
+    VT_DIMENSIONSPECIFICITY = 18,
+    VT_ISCONSTANT = 20
   };
   const flatbuffers::Vector<uint32_t> *dimensions() const {
     return GetPointer<const flatbuffers::Vector<uint32_t> *>(VT_DIMENSIONS);
@@ -1711,6 +1712,9 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const flatbuffers::Vector<uint8_t> *dimensionSpecificity() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DIMENSIONSPECIFICITY);
   }
+  bool isConstant() const {
+    return GetField<uint8_t>(VT_ISCONSTANT, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_DIMENSIONS) &&
@@ -1724,6 +1728,7 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyField<uint32_t>(verifier, VT_DIMENSIONALITY) &&
            VerifyOffset(verifier, VT_DIMENSIONSPECIFICITY) &&
            verifier.VerifyVector(dimensionSpecificity()) &&
+           VerifyField<uint8_t>(verifier, VT_ISCONSTANT) &&
            verifier.EndTable();
   }
 };
@@ -1756,6 +1761,9 @@ struct TensorInfoBuilder {
   void add_dimensionSpecificity(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity) {
     fbb_.AddOffset(TensorInfo::VT_DIMENSIONSPECIFICITY, dimensionSpecificity);
   }
+  void add_isConstant(bool isConstant) {
+    fbb_.AddElement<uint8_t>(TensorInfo::VT_ISCONSTANT, static_cast<uint8_t>(isConstant), 0);
+  }
   explicit TensorInfoBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -1777,7 +1785,8 @@ inline flatbuffers::Offset<TensorInfo> CreateTensorInfo(
     flatbuffers::Offset<flatbuffers::Vector<float>> quantizationScales = 0,
     uint32_t quantizationDim = 0,
     uint32_t dimensionality = 1,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity = 0) {
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity = 0,
+    bool isConstant = false) {
   TensorInfoBuilder builder_(_fbb);
   builder_.add_dimensionSpecificity(dimensionSpecificity);
   builder_.add_dimensionality(dimensionality);
@@ -1786,6 +1795,7 @@ inline flatbuffers::Offset<TensorInfo> CreateTensorInfo(
   builder_.add_quantizationOffset(quantizationOffset);
   builder_.add_quantizationScale(quantizationScale);
   builder_.add_dimensions(dimensions);
+  builder_.add_isConstant(isConstant);
   builder_.add_dataType(dataType);
   return builder_.Finish();
 }
@@ -1799,7 +1809,8 @@ inline flatbuffers::Offset<TensorInfo> CreateTensorInfoDirect(
     const std::vector<float> *quantizationScales = nullptr,
     uint32_t quantizationDim = 0,
     uint32_t dimensionality = 1,
-    const std::vector<uint8_t> *dimensionSpecificity = nullptr) {
+    const std::vector<uint8_t> *dimensionSpecificity = nullptr,
+    bool isConstant = false) {
   auto dimensions__ = dimensions ? _fbb.CreateVector<uint32_t>(*dimensions) : 0;
   auto quantizationScales__ = quantizationScales ? _fbb.CreateVector<float>(*quantizationScales) : 0;
   auto dimensionSpecificity__ = dimensionSpecificity ? _fbb.CreateVector<uint8_t>(*dimensionSpecificity) : 0;
@@ -1812,7 +1823,8 @@ inline flatbuffers::Offset<TensorInfo> CreateTensorInfoDirect(
       quantizationScales__,
       quantizationDim,
       dimensionality,
-      dimensionSpecificity__);
+      dimensionSpecificity__,
+      isConstant);
 }
 
 struct ByteData FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -10124,7 +10136,8 @@ struct FeatureCompatibilityVersions FLATBUFFERS_FINAL_CLASS : private flatbuffer
   typedef FeatureCompatibilityVersionsBuilder Builder;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
     VT_BINDINGIDSSCHEME = 4,
-    VT_WEIGHTSLAYOUTSCHEME = 6
+    VT_WEIGHTSLAYOUTSCHEME = 6,
+    VT_CONSTANTTENSORSASINPUTS = 8
   };
   uint32_t bindingIdsScheme() const {
     return GetField<uint32_t>(VT_BINDINGIDSSCHEME, 0);
@@ -10132,10 +10145,14 @@ struct FeatureCompatibilityVersions FLATBUFFERS_FINAL_CLASS : private flatbuffer
   uint32_t weightsLayoutScheme() const {
     return GetField<uint32_t>(VT_WEIGHTSLAYOUTSCHEME, 0);
   }
+  uint32_t constantTensorsAsInputs() const {
+    return GetField<uint32_t>(VT_CONSTANTTENSORSASINPUTS, 0);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint32_t>(verifier, VT_BINDINGIDSSCHEME) &&
            VerifyField<uint32_t>(verifier, VT_WEIGHTSLAYOUTSCHEME) &&
+           VerifyField<uint32_t>(verifier, VT_CONSTANTTENSORSASINPUTS) &&
            verifier.EndTable();
   }
 };
@@ -10150,6 +10167,9 @@ struct FeatureCompatibilityVersionsBuilder {
   void add_weightsLayoutScheme(uint32_t weightsLayoutScheme) {
     fbb_.AddElement<uint32_t>(FeatureCompatibilityVersions::VT_WEIGHTSLAYOUTSCHEME, weightsLayoutScheme, 0);
   }
+  void add_constantTensorsAsInputs(uint32_t constantTensorsAsInputs) {
+    fbb_.AddElement<uint32_t>(FeatureCompatibilityVersions::VT_CONSTANTTENSORSASINPUTS, constantTensorsAsInputs, 0);
+  }
   explicit FeatureCompatibilityVersionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -10165,8 +10185,10 @@ struct FeatureCompatibilityVersionsBuilder {
 inline flatbuffers::Offset<FeatureCompatibilityVersions> CreateFeatureCompatibilityVersions(
     flatbuffers::FlatBufferBuilder &_fbb,
     uint32_t bindingIdsScheme = 0,
-    uint32_t weightsLayoutScheme = 0) {
+    uint32_t weightsLayoutScheme = 0,
+    uint32_t constantTensorsAsInputs = 0) {
   FeatureCompatibilityVersionsBuilder builder_(_fbb);
+  builder_.add_constantTensorsAsInputs(constantTensorsAsInputs);
   builder_.add_weightsLayoutScheme(weightsLayoutScheme);
   builder_.add_bindingIdsScheme(bindingIdsScheme);
   return builder_.Finish();
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 44cd1800c4..195b41657a 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -1126,7 +1126,6 @@ void SerializerStrategy::SerializeQuantizeLayer(const armnn::IConnectableLayer *
 // Build FlatBuffer for FullyConnected Layer
 void SerializerStrategy::SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer,
                                                       const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                      const std::vector<armnn::ConstTensor>& constants,
                                                       const char*)
 {
     // Create FlatBuffer BaseLayer
@@ -1139,28 +1138,10 @@ void SerializerStrategy::SerializeFullyConnectedLayer(const armnn::IConnectableL
                                                    fullyConnectedDescriptor.m_TransposeWeightMatrix,
                                                    fullyConnectedDescriptor.m_ConstantWeights);
 
-    // Create FlatBuffer weights data
-    flatbuffers::Offset<serializer::ConstTensor> flatBufferWeights;
-    // Create FlatBuffer bias data
-    flatbuffers::Offset<serializer::ConstTensor> flatBufferBiases;
-    if (fullyConnectedDescriptor.m_ConstantWeights && !constants.empty())
-    {
-        armnn::ConstTensor weights = constants.at(0);
-        flatBufferWeights = CreateConstTensorInfo(weights);
-
-        if (fullyConnectedDescriptor.m_BiasEnabled)
-        {
-            armnn::ConstTensor biases = constants.at(1);
-            flatBufferBiases = CreateConstTensorInfo(biases);
-        }
-    }
-
     // Create FlatBuffer FullyConnectedLayer
     auto flatBufferLayer = serializer::CreateFullyConnectedLayer(m_flatBufferBuilder,
                                                                  flatBufferBaseLayer,
-                                                                 flatBufferDescriptor,
-                                                                 flatBufferWeights,
-                                                                 flatBufferBiases);
+                                                                 flatBufferDescriptor);
 
     // Add created FullyConnectedLayer to the FlatBufferLayers
     CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_FullyConnectedLayer);
@@ -1916,7 +1897,8 @@ flatbuffers::Offset<armnnSerializer::FeatureCompatibilityVersions> SerializerStr
         serializer::CreateFeatureCompatibilityVersions(
                 m_flatBufferBuilder,
                 1, // Binding ids scheme version
-                1  // Weights layout scheme version
+                1, // Weights layout scheme version
+                1  // Constant tensors as inputs version
             );
     return versionsTable;
 }
@@ -2110,7 +2092,7 @@ void SerializerStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
         {
             const armnn::FullyConnectedDescriptor& layerDescriptor =
                     static_cast<const armnn::FullyConnectedDescriptor&>(descriptor);
-            SerializeFullyConnectedLayer(layer, layerDescriptor, constants, name);
+            SerializeFullyConnectedLayer(layer, layerDescriptor, name);
             break;
         }
         case armnn::LayerType::Gather :
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index dead8739cc..18b2cc77ac 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -184,7 +184,6 @@ private:
 
     void SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer,
                                       const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                      const std::vector<armnn::ConstTensor>& constants,
                                       const char* name = nullptr);
 
     void SerializeGatherLayer(const armnn::IConnectableLayer* layer,
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 98532d0cec..9e9df0d1ea 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -789,6 +789,41 @@ TEST_CASE("SerializeFloor")
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
+using FullyConnectedDescriptor = armnn::FullyConnectedDescriptor;
+class FullyConnectedLayerVerifier : public LayerVerifierBaseWithDescriptor<FullyConnectedDescriptor>
+{
+public:
+    FullyConnectedLayerVerifier(const std::string& layerName,
+                        const std::vector<armnn::TensorInfo>& inputInfos,
+                        const std::vector<armnn::TensorInfo>& outputInfos,
+                        const FullyConnectedDescriptor& descriptor)
+        : LayerVerifierBaseWithDescriptor<FullyConnectedDescriptor>(layerName, inputInfos, outputInfos, descriptor) {}
+
+    void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                         const armnn::BaseDescriptor& descriptor,
+                         const std::vector<armnn::ConstTensor>& constants,
+                         const char* name,
+                         const armnn::LayerBindingId id = 0) override
+    {
+        armnn::IgnoreUnused(constants, id);
+        switch (layer->GetType())
+        {
+            case armnn::LayerType::Input: break;
+            case armnn::LayerType::Output: break;
+            case armnn::LayerType::Constant: break;
+            default:
+            {
+                VerifyNameAndConnections(layer, name);
+                const FullyConnectedDescriptor& layerDescriptor =
+                        static_cast<const FullyConnectedDescriptor&>(descriptor);
+                CHECK(layerDescriptor.m_ConstantWeights == m_Descriptor.m_ConstantWeights);
+                CHECK(layerDescriptor.m_BiasEnabled == m_Descriptor.m_BiasEnabled);
+                CHECK(layerDescriptor.m_TransposeWeightMatrix == m_Descriptor.m_TransposeWeightMatrix);
+            }
+        }
+    }
+};
+
 TEST_CASE("SerializeFullyConnected")
 {
     const std::string layerName("fullyConnected");
@@ -809,11 +844,16 @@ TEST_CASE("SerializeFullyConnected")
 
     armnn::INetworkPtr network = armnn::INetwork::Create();
     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+
+    // Old way of handling constant tensors.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
     armnn::IConnectableLayer* const fullyConnectedLayer =
         network->AddFullyConnectedLayer(descriptor,
                                         weights,
                                         armnn::Optional<armnn::ConstTensor>(biases),
                                         layerName.c_str());
+    ARMNN_NO_DEPRECATE_WARN_END
+
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
     inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
@@ -825,13 +865,11 @@ TEST_CASE("SerializeFullyConnected")
     armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
     CHECK(deserializedNetwork);
 
-    const std::vector<armnn::ConstTensor> constants {weights, biases};
-    LayerVerifierBaseWithDescriptorAndConstants<armnn::FullyConnectedDescriptor> verifier(
-            layerName, {inputInfo}, {outputInfo}, descriptor, constants);
+    FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor);
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
-TEST_CASE("SerializeFullyConnectedWeightsAsInputs")
+TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsInputs")
 {
     const std::string layerName("fullyConnected_weights_as_inputs");
     const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32);
@@ -854,8 +892,6 @@ TEST_CASE("SerializeFullyConnectedWeightsAsInputs")
     armnn::IConnectableLayer* const biasInputLayer = network->AddInputLayer(2);
     armnn::IConnectableLayer* const fullyConnectedLayer =
         network->AddFullyConnectedLayer(descriptor,
-                                        weights,
-                                        bias,
                                         layerName.c_str());
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
@@ -878,6 +914,49 @@ TEST_CASE("SerializeFullyConnectedWeightsAsInputs")
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
+TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsConstantLayers")
+{
+    const std::string layerName("fullyConnected_weights_as_inputs");
+    const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32);
+    const armnn::TensorInfo outputInfo({ 2, 3 }, armnn::DataType::Float32);
+
+    const armnn::TensorInfo weightsInfo({ 5, 3 }, armnn::DataType::Float32);
+    const armnn::TensorInfo biasesInfo ({ 3 }, armnn::DataType::Float32);
+
+    std::vector<float> weightsData = GenerateRandomData<float>(weightsInfo.GetNumElements());
+    std::vector<float> biasesData  = GenerateRandomData<float>(biasesInfo.GetNumElements());
+    armnn::ConstTensor weights(weightsInfo, weightsData);
+    armnn::ConstTensor biases(biasesInfo, biasesData);
+
+    armnn::FullyConnectedDescriptor descriptor;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_TransposeWeightMatrix = false;
+    descriptor.m_ConstantWeights = true;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
+    armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases");
+    armnn::IConnectableLayer* const fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor,layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(2));
+    fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+    biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+    fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    CHECK(deserializedNetwork);
+
+    FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor);
+    deserializedNetwork->ExecuteStrategy(verifier);
+}
+
 TEST_CASE("SerializeGather")
 {
     using GatherDescriptor = armnn::GatherDescriptor;
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index b669ae4efa..3e59244753 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -555,6 +555,9 @@ CreateConstTensorImpl(TfLiteParserImpl::BufferRawPtr bufferPtr,
         ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes());
     }
 
+    // Make sure isConstant flag is set.
+    tensorInfo.SetConstant();
+
     return std::make_pair(ConstTensor(tensorInfo, data.get()), std::move(data));
 }
 
@@ -2571,42 +2574,26 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator
     armnn::IConnectableLayer* layer = nullptr;
     auto layerName = fmt::format("FullyConnected:{}:{}", subgraphIndex, operatorIndex);
 
-    Optional<ConstTensor> filterOptionalConstTensor;
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    // Add the first input tensor to the registration list
+    std::vector<unsigned int> tensorIndexesToRegister = {inputTensorIndexes[0]};
+    std::vector<unsigned int> ignoreInputWhenRegister = {};
 
     desc.m_ConstantWeights = IsConstTensor(inputs[1]);
 
-    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
-    std::vector<unsigned int> tensorIndexesToRegister = {inputTensorIndexes[0]};
-    if (desc.m_ConstantWeights)
-    {
-        filterOptionalConstTensor = Optional<ConstTensor>(CreateConstTensorNonPermuted(inputs[1], filterTensorInfo));
-    }
-    else
-    {
-        // Non const weights will need to be registered as inputs
-        tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]);
-    }
+    // Add the weights input to the registration list, constant layers will be added by SetupConstantLayers if constant.
+    tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]);
 
-    Optional<ConstTensor> biasOptionalConstTensor;
     if (inputs.size() == 3)
     {
         desc.m_BiasEnabled = true;
-        if (desc.m_ConstantWeights)
-        {
-            TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
-            biasOptionalConstTensor   =  Optional<ConstTensor>(CreateConstTensorNonPermuted(inputs[2], biasTensorInfo));
-        }
-        else
-        {
-            // Non const biases will need to be registered as inputs
-            tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]);
-        }
+
+        // Add the biases input to the registration list, constant layer will be added by SetupConstantLayers.
+        tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]);
     }
 
-    layer = m_Network->AddFullyConnectedLayer(desc,
-                                              filterOptionalConstTensor,
-                                              biasOptionalConstTensor,
-                                              layerName.c_str());
+    // Filters and biases are always passed to fully connected as inputs
+    layer = m_Network->AddFullyConnectedLayer(desc, layerName.c_str());
 
     ARMNN_ASSERT(layer != nullptr);
     armnn::TensorInfo inputTensorInfo  = ToTensorInfo(inputs[0]);
@@ -3732,6 +3719,7 @@ void TfLiteParserImpl::RegisterInputSlots(size_t subgraphIndex,
 {
     CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
     ARMNN_ASSERT(layer != nullptr);
+
     if (tensorIndexes.size() + startingSlotIndex != layer->GetNumInputSlots())
     {
         throw ParseException(
@@ -3831,19 +3819,27 @@ void TfLiteParserImpl::SetupConstantLayers(size_t subgraphIndex)
                 m_SubgraphConnections[subgraphIndex][tensorIndex].inputSlots.size() > 0)
             {
                 TensorRawPtr tensorPtr = subgraphPtr->tensors[tensorIndex].get();
-                armnn::TensorInfo tensorInfo = ToTensorInfo(tensorPtr);
-                auto tensorAndData = CreateConstTensorNonPermuted(tensorPtr, tensorInfo);
 
-                std::string layerName = fmt::format("Constant:{}", tensorPtr->name);
-                IConnectableLayer *layer =
-                    m_Network->AddConstantLayer(tensorAndData, layerName.c_str());
+                if(IsConstTensor(tensorPtr))
+                {
+                    armnn::TensorInfo tensorInfo = ToTensorInfo(tensorPtr);
+                    auto tensorAndData = CreateConstTensorNonPermuted(tensorPtr, tensorInfo);
 
-                layer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
-                RegisterOutputSlots(subgraphIndex,
-                                    VIRTUAL_OPERATOR_ID,
-                                    layer,
-                                    { tensorIndex });
+                    std::string layerName = fmt::format("Constant:{}", tensorPtr->name);
+                    IConnectableLayer *layer = m_Network->AddConstantLayer(tensorAndData, layerName.c_str());
 
+                    layer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+                    RegisterOutputSlots(subgraphIndex,
+                                        VIRTUAL_OPERATOR_ID,
+                                        layer,
+                                        { tensorIndex });
+                }
+                else
+                {
+                    throw ParseException(
+                            fmt::format("Invalid Tensor: Tensor should be constant. {}",
+                                        CHECK_LOCATION().AsString()));
+                }
             }
         }
     }
@@ -3863,6 +3859,9 @@ TfLiteParserImpl::CreateConstTensorAndStoreData(TfLiteParserImpl::BufferRawPtr b
                                             armnn::TensorInfo& tensorInfo,
                                             armnn::Optional<armnn::PermutationVector&> permutationVector)
 {
+    // Make sure isConstant flag is set.
+    tensorInfo.SetConstant();
+
     auto constData = CreateConstTensorImpl<T>(bufferPtr,
                                               tensorPtr,
                                               tensorInfo,
@@ -3885,7 +3884,6 @@ bool TfLiteParserImpl::IsConstTensor(TensorRawPtr tensorPtr)
     return isConst;
 }
 
-
 std::pair<armnn::ConstTensor, TfLiteParserImpl::SupportedDataStorage>
 TfLiteParserImpl::CreateConstTensorPermuted(TensorRawPtr tensorPtr,
                                             armnn::TensorInfo& tensorInfo,
@@ -3895,6 +3893,9 @@ TfLiteParserImpl::CreateConstTensorPermuted(TensorRawPtr tensorPtr,
     auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer);
     CHECK_BUFFER_SIZE(bufferPtr, tensorInfo, tensorPtr->buffer);
 
+    // Make sure isConstant flag is set.
+    tensorInfo.SetConstant();
+
     switch (tensorInfo.GetDataType())
     {
         case armnn::DataType::Float32:
@@ -3941,6 +3942,9 @@ armnn::ConstTensor TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr t
     auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer);
     CHECK_BUFFER_SIZE(bufferPtr, tensorInfo, tensorPtr->buffer);
 
+    // Make sure isConstant flag is set.
+    tensorInfo.SetConstant();
+
     return ConstTensor(tensorInfo, bufferPtr->data.data());
 }
 
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 3fe0823b03..319cdb106b 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1041,15 +1041,12 @@ void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c
 {
     const std::string descriptorName{"FullyConnectedQueueDescriptor"};
 
-    uint32_t numInputs = 1;
-    if (!m_Parameters.m_ConstantWeights)
+    uint32_t numInputs = 2;
+    if (m_Parameters.m_BiasEnabled)
     {
-        numInputs = 2;
-        if (m_Parameters.m_BiasEnabled)
-        {
-            numInputs = 3;
-        }
+        numInputs = 3;
     }
+
     ValidateNumInputs(workloadInfo, descriptorName, numInputs);
     ValidateNumOutputs(workloadInfo, descriptorName, 1);
 
@@ -1063,30 +1060,12 @@ void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c
         throw InvalidArgumentException(descriptorName + ": Input tensor must have 2 or 4 dimensions.");
     }
 
-    TensorInfo weightTensorInfo;
-    if (m_Parameters.m_ConstantWeights)
-    {
-        ValidatePointer(m_Weight, descriptorName, "weight");
-        weightTensorInfo = m_Weight->GetTensorInfo();
-    }
-    else
-    {
-        weightTensorInfo  = workloadInfo.m_InputTensorInfos[1];
-    }
+    TensorInfo weightTensorInfo = workloadInfo.m_InputTensorInfos[1];
     ValidateTensorNumDimensions(weightTensorInfo, descriptorName, 2, "weight");
 
     if (m_Parameters.m_BiasEnabled)
     {
-        TensorInfo biasTensorInfo;
-        if (m_Parameters.m_ConstantWeights)
-        {
-            ValidatePointer(m_Bias, descriptorName, "bias");
-            biasTensorInfo = m_Bias->GetTensorInfo();
-        }
-        else
-        {
-            biasTensorInfo  = workloadInfo.m_InputTensorInfos[2];
-        }
+        TensorInfo biasTensorInfo = workloadInfo.m_InputTensorInfos[2];
         // Validates type and quantization values.
         ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName);
         ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias");
@@ -1894,11 +1873,9 @@ void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
     };
 
     ValidateDataTypes(inputTensorInfo,  supportedTypes, descriptorName);
-
-    if (inputTensorInfo != outputTensorInfo)
-    {
-        throw InvalidArgumentException(descriptorName + ": Input and output tensor infos do not match.");
-    }
+    ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+    ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+    ValidateTensorQuantizationSpace(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
 }
 
 void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 1c18551679..3f5972dab6 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -36,7 +36,11 @@ const TensorInfo OverrideDataType(const TensorInfo& info, Optional<DataType> typ
         return info;
     }
 
-    return TensorInfo(info.GetShape(), type.value(), info.GetQuantizationScale(), info.GetQuantizationOffset());
+    return TensorInfo(info.GetShape(),
+                      type.value(),
+                      info.GetQuantizationScale(),
+                      info.GetQuantizationOffset(),
+                      info.IsConstant());
 }
 
 } // anonymous namespace
@@ -364,16 +368,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
             TensorInfo weightsInfo;
             const TensorInfo* weightsInfoPtr = nullptr;
 
-            if (descriptor.m_ConstantWeights)
-            {
-                ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr);
-                weightsInfo = OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType);
-            }
-            else
-            {
-                weightsInfo = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), dataType);
-
-            }
+            weightsInfo = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), dataType);
             weightsInfoPtr = &weightsInfo;
 
             TensorInfo biasInfo;
@@ -385,17 +380,8 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
 
             if (descriptor.m_BiasEnabled)
             {
-                if(descriptor.m_ConstantWeights)
-                {
-                    ARMNN_ASSERT(cLayer->m_Bias.get() != nullptr);
-                    biasInfo = OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType));
-                    biasInfoPtr = &biasInfo;
-                }
-                else
-                {
-                    biasInfo = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), dataType);
-                    biasInfoPtr = &biasInfo;
-                }
+                biasInfo = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), dataType);
+                biasInfoPtr = &biasInfo;
             }
             else
             {
diff --git a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp
index 923d6f3641..af6b56852a 100644
--- a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp
@@ -28,10 +28,7 @@ armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeights(const armnn::Tenso
 
     armnn::IConnectableLayer* inputLayer  = network->AddInputLayer(0, "Input");
     armnn::IConnectableLayer* weightsInputLayer   = network->AddInputLayer(1, "Weights_Input");
-    armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor,
-                                                                                    armnn::EmptyOptional(),
-                                                                                    armnn::EmptyOptional(),
-                                                                                    "Fully_Connected");
+    armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected");
     armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output");
 
     Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0);
@@ -41,6 +38,52 @@ armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeights(const armnn::Tenso
     return network;
 }
 
+armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeightsConstBias(const armnn::TensorInfo& inputTensorInfo,
+                                                                       const armnn::TensorInfo& outputTensorInfo,
+                                                                       const armnn::TensorInfo& weightsTensorInfo,
+                                                                       const armnn::TensorInfo& biasTensorInfo,
+                                                                       const armnn::ConstTensor& biasConstantTensor,
+                                                                       armnn::FullyConnectedDescriptor descriptor)
+{
+    armnn::INetworkPtr network(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* inputLayer  = network->AddInputLayer(0, "Input");
+    armnn::IConnectableLayer* weightsInputLayer   = network->AddInputLayer(1, "Weights_Input");
+    armnn::IConnectableLayer* biasLayer  = network->AddConstantLayer(biasConstantTensor, "Weights");
+    armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected");
+    armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output");
+
+    Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0);
+    Connect(weightsInputLayer, fullyConnectedLayer, weightsTensorInfo, 0, 1);
+    Connect(biasLayer, fullyConnectedLayer, biasTensorInfo, 0, 2);
+    Connect(fullyConnectedLayer, outputLayer, outputTensorInfo, 0, 0);
+
+    return network;
+}
+
+armnn::INetworkPtr CreateFullyConnectedNetworkConstWeightsNonConstBias(const armnn::TensorInfo& inputTensorInfo,
+                                                                       const armnn::TensorInfo& outputTensorInfo,
+                                                                       const armnn::TensorInfo& weightsTensorInfo,
+                                                                       const armnn::TensorInfo& biasTensorInfo,
+                                                                       const armnn::ConstTensor& weightsConstantTensor,
+                                                                       armnn::FullyConnectedDescriptor descriptor)
+{
+    armnn::INetworkPtr network(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* inputLayer  = network->AddInputLayer(0, "Input");
+    armnn::IConnectableLayer* weightsLayer  = network->AddConstantLayer(weightsConstantTensor, "Weights");
+    armnn::IConnectableLayer* biasLayer   = network->AddInputLayer(2, "Bias_Input");
+    armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected");
+    armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output");
+
+    Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0);
+    Connect(weightsLayer, fullyConnectedLayer, weightsTensorInfo, 0, 1);
+    Connect(biasLayer, fullyConnectedLayer, biasTensorInfo, 0, 2);
+    Connect(fullyConnectedLayer, outputLayer, outputTensorInfo, 0, 0);
+
+    return network;
+}
+
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 void FullyConnectedWithDynamicWeightsEndToEnd(const std::vector<armnn::BackendId>& backends)
 {
@@ -94,4 +137,123 @@ void FullyConnectedWithDynamicWeightsEndToEnd(const std::vector<armnn::BackendId
                                                 backends,
                                                 1.0f);
 }
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void FullyConnectedWithDynamicOrConstantInputsEndToEnd(const std::vector<armnn::BackendId>& backends,
+                                                       const bool transposeWeights,
+                                                       const bool constantWeightsOrBias)
+{
+    unsigned int inputWidth = 1;
+    unsigned int inputHeight = 1;
+    unsigned int inputChannels = 5;
+    unsigned int inputNum = 2;
+
+    unsigned int outputChannels = 3;
+    unsigned int outputNum = 2;
+
+    unsigned int inputShape[]   = { inputNum, inputChannels, inputHeight, inputWidth };
+    unsigned int outputShape[]  = { outputNum, outputChannels };
+    unsigned int weightsShape[] = { inputChannels, outputChannels };
+
+    if (transposeWeights)
+    {
+        std::swap(weightsShape[0], weightsShape[1]);
+    }
+
+    unsigned int biasShape[] = { outputChannels };
+
+    armnn::TensorInfo inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    armnn::TensorInfo outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32);
+    armnn::TensorInfo weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32);
+    armnn::TensorInfo biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32);
+
+    std::vector<float> input =
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+        5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    std::vector<float> weights =
+    {
+        .5f, 2.f, .5f,
+        .5f, 2.f, 1.f,
+        .5f, 2.f, 2.f,
+        .5f, 2.f, 3.f,
+        .5f, 2.f, 4.f
+    };
+
+    if (transposeWeights)
+    {
+        weights =
+        {
+            .5f, .5f, .5f, .5f, .5f,
+            2.f, 2.f, 2.f, 2.f, 2.f,
+            .5f, 1.f, 2.f, 3.f, 4.f
+        };
+    }
+
+    std::vector<float> biasValues = std::vector<float>({10.f, 20.f, 30.f});
+
+    std::vector<float> expectedOutput =
+    {
+        0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0],
+        2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1],
+        0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2],
+
+        2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0],
+        10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1],
+        2.5f + 4.0f + 6.0f + 6.f + 4.f   + biasValues[2]
+    };
+
+    FullyConnectedDescriptor descriptor;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_TransposeWeightMatrix = transposeWeights;
+    descriptor.m_ConstantWeights = constantWeightsOrBias;
+
+    if (!constantWeightsOrBias)
+    {
+        // Tests non constant weights and constant bias.
+        ConstTensor biasConstantTensor(biasesDesc, biasValues.data());
+
+        armnn::INetworkPtr network = CreateFullyConnectedNetworkNonConstWeightsConstBias(inputTensorInfo,
+                                                                                         outputTensorInfo,
+                                                                                         weightsDesc,
+                                                                                         biasesDesc,
+                                                                                         biasConstantTensor,
+                                                                                         descriptor);
+        CHECK(network);
+
+        std::map<int, std::vector<T>> inputTensorData    = {{ 0, input }, {1, weights}};
+        std::map<int, std::vector<T>> expectedOutputTensorData = {{ 0, expectedOutput }};
+
+        EndToEndLayerTestImpl<ArmnnType, ArmnnType>(move(network),
+                                                    inputTensorData,
+                                                    expectedOutputTensorData,
+                                                    backends,
+                                                    1.0f);
+    }
+    else
+    {
+        // Tests constant weights and non constant bias.
+        ConstTensor weightsConstantTensor(weightsDesc, weights.data());
+
+        armnn::INetworkPtr network = CreateFullyConnectedNetworkConstWeightsNonConstBias(inputTensorInfo,
+                                                                                         outputTensorInfo,
+                                                                                         weightsDesc,
+                                                                                         biasesDesc,
+                                                                                         weightsConstantTensor,
+                                                                                         descriptor);
+        CHECK(network);
+
+        std::map<int, std::vector<T>> inputTensorData    = {{ 0, input }, {2, biasValues}};
+        std::map<int, std::vector<T>> expectedOutputTensorData = {{ 0, expectedOutput }};
+
+        EndToEndLayerTestImpl<ArmnnType, ArmnnType>(move(network),
+                                                    inputTensorData,
+                                                    expectedOutputTensorData,
+                                                    backends,
+                                                    1.0f);
+    }
+}
+
 } // anonymous namespace
diff --git a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp
index c47048e566..dcf87fe92b 100644
--- a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp
@@ -22,56 +22,6 @@
 
 template<typename T, typename B>
 LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::ITensorHandleFactory& tensorHandleFactory,
-        armnn::TensorInfo inputTensorInfo,
-        armnn::TensorInfo outputTensorInfo,
-        armnn::TensorInfo weightsDesc,
-        armnn::TensorInfo biasesDesc,
-        std::vector<T>& weights,
-        std::vector<B>& bias,
-        std::vector<T>& input,
-        bool biasEnabled,
-        bool transposeWeights)
-{
-    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
-    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
-
-    armnn::FullyConnectedQueueDescriptor data;
-    armnn::WorkloadInfo info;
-    armnn::ScopedTensorHandle weightsTensor(weightsDesc);
-    armnn::ScopedTensorHandle biasTensor(biasesDesc);
-
-    std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
-
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, weights.data());
-    AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
-
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-    data.m_Weight = &weightsTensor;
-    data.m_Bias = &biasTensor;
-    data.m_Parameters.m_BiasEnabled = biasEnabled;
-    data.m_Parameters.m_TransposeWeightMatrix = transposeWeights;
-
-    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info);
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    inputHandle->Allocate();
-    outputHandle->Allocate();
-    CopyDataToITensorHandle(inputHandle.get(), input.data());
-
-    ExecuteWorkload(*workload, memoryManager);
-
-    CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
-    result.m_ActualData = actualOutput;
-
-    return result;
-}
-
-template<typename T, typename B>
-LayerTestResult<T, 2> SimpleFullyConnectedTestWeightsAsInputsImpl(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
@@ -83,7 +33,8 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestWeightsAsInputsImpl(
     std::vector<B>& bias,
     std::vector<T>& input,
     bool biasEnabled,
-    bool transposeWeights)
+    bool transposeWeights,
+    bool constantWeights)
 {
     std::unique_ptr<armnn::ITensorHandle> input0Handle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
     std::unique_ptr<armnn::ITensorHandle> input1Handle = tensorHandleFactory.CreateTensorHandle(weightsTensorInfo);
@@ -93,13 +44,23 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestWeightsAsInputsImpl(
 
     armnn::FullyConnectedQueueDescriptor data;
     armnn::WorkloadInfo info;
+    armnn::ScopedTensorHandle weightsTensor(weightsTensorInfo);
+    armnn::ScopedTensorHandle biasTensor(biasesTensorInfo);
+
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, weights.data());
+    AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
 
     AddInputToWorkload(data, info, inputTensorInfo, input0Handle.get());
     AddInputToWorkload(data, info, weightsTensorInfo, input1Handle.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    // Need to set as layer members will be null when creating the workload because the optimization hasn't been run.
+    data.m_Weight = &weightsTensor;
+    data.m_Bias = &biasTensor;
+
     data.m_Parameters.m_BiasEnabled = biasEnabled;
     data.m_Parameters.m_TransposeWeightMatrix = transposeWeights;
-    data.m_Parameters.m_ConstantWeights = false;
+    data.m_Parameters.m_ConstantWeights = constantWeights;
 
     std::unique_ptr<armnn::ITensorHandle> input2Handle = nullptr;
     if (biasEnabled)
@@ -180,36 +141,19 @@ LayerTestResult<T, 2> FullyConnectedTest(
 
     std::vector<int32_t> bias = {9250, 67500};
 
-    if (constantWeights)
-    {
-        result = SimpleFullyConnectedTestImpl<T>(workloadFactory,
-                                                 memoryManager,
-                                                 tensorHandleFactory,
-                                                 inputTensorInfo,
-                                                 outputTensorInfo,
-                                                 weightsDesc,
-                                                 biasesDesc,
-                                                 weights,
-                                                 bias,
-                                                 input,
-                                                 biasEnabled,
-                                                 true);
-    }
-    else
-    {
-        result = SimpleFullyConnectedTestWeightsAsInputsImpl<T>(workloadFactory,
-                                                 memoryManager,
-                                                 tensorHandleFactory,
-                                                 inputTensorInfo,
-                                                 outputTensorInfo,
-                                                 weightsDesc,
-                                                 biasesDesc,
-                                                 weights,
-                                                 bias,
-                                                 input,
-                                                 biasEnabled,
-                                                 true);
-    }
+    result = SimpleFullyConnectedTestImpl<T>(workloadFactory,
+                                             memoryManager,
+                                             tensorHandleFactory,
+                                             inputTensorInfo,
+                                             outputTensorInfo,
+                                             weightsDesc,
+                                             biasesDesc,
+                                             weights,
+                                             bias,
+                                             input,
+                                             biasEnabled,
+                                             true,
+                                             constantWeights);
 
     if (biasEnabled)
     {
@@ -299,7 +243,7 @@ LayerTestResult<T, 2> FullyConnectedLargeTestCommon(
         inputTensorInfo, outputTensorInfo,
         weightsDesc, biasesDesc,
         weights, biasValues, input,
-        true, transposeWeights
+        true, transposeWeights, true
     );
 
     result.m_ExpectedData = armnnUtils::QuantizedVector<T>({ 965432.0f }, qScale, qOffset);
@@ -408,7 +352,7 @@ LayerTestResult<float, 2> FullyConnectedFloat32Test(
         inputTensorInfo, outputTensorInfo,
         weightsDesc, biasesDesc,
         weights, biasValues, input,
-        biasEnabled, transposeWeights
+        biasEnabled, transposeWeights, true
     );
 
     std::vector<float> expectedOutput =
diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp
index 441f4ebdf4..2855957e31 100644
--- a/src/backends/reference/RefBackend.hpp
+++ b/src/backends/reference/RefBackend.hpp
@@ -12,7 +12,8 @@ namespace armnn
 const BackendCapabilities cpuRefCapabilities("CpuRef",
                                              {
                                                     {"NonConstWeights", true},
-                                                    {"AsyncExecution", true}
+                                                    {"AsyncExecution", true},
+                                                    {"ConstantTensorsAsInputs", true}
                                              });
 
 const std::set<armnn::BackendCapability> oldCpuRefCapabilities {
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index 4293ef54f3..fae8d0cdd4 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -486,6 +486,24 @@ TEST_CASE("RefCreateFullyConnectedWithBlobWorkloadTest")
         TensorInfo({ 3, 7 }, armnn::DataType::Float32, outputQScale));
 }
 
+TEST_CASE("CreateFullyConnectedWorkloadWeightsBiasesAsInputsFloat32")
+{
+    Graph graph;
+    RefWorkloadFactory factory = GetFactory();
+
+    auto workload =
+            CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest<RefFullyConnectedWorkload,
+                                                                  armnn::DataType::Float32>(factory, graph);
+
+    // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
+    float inputsQScale = 0.0f;
+    float outputQScale = 0.0f;
+    CheckInputsOutput(std::move(workload),
+                      TensorInfo({ 3, 1, 4, 5 }, armnn::DataType::Float32, inputsQScale),
+                      TensorInfo({ 7, 20 }, armnn::DataType::Float32, inputsQScale),
+                      TensorInfo({ 3, 7 }, armnn::DataType::Float32, outputQScale));
+}
+
 template <typename FullyConnectedWorkloadType, armnn::DataType DataType>
 static void RefCreateFullyConnectedWorkloadTest()
 {
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 69a2048078..424df977c8 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -600,11 +600,21 @@ TEST_CASE("RefFillEndToEndTestInt32")
     FillEndToEnd<armnn::DataType::Signed32>(defaultBackends);
 }
 
-TEST_CASE("RefFullyConnectedEndToEndTestInt32")
+TEST_CASE("RefFullyConnectedEndToEndTestFloat32")
 {
     FullyConnectedWithDynamicWeightsEndToEnd<armnn::DataType::Float32>(defaultBackends);
 }
 
+TEST_CASE("RefFullyConnectedEndToEndTestNonConstantWeightsConstantBiasesFloat32")
+{
+    FullyConnectedWithDynamicOrConstantInputsEndToEnd<armnn::DataType::Float32>(defaultBackends, true, true);
+}
+
+TEST_CASE("RefFullyConnectedEndToEndTestConstantWeightsNonConstantBiasesFloat32")
+{
+    FullyConnectedWithDynamicOrConstantInputsEndToEnd<armnn::DataType::Float32>(defaultBackends, true, false);
+}
+
 TEST_CASE("RefGatherFloatTest")
 {
     GatherEndToEnd<armnn::DataType::Float32>(defaultBackends);
diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
index 99e3eab075..5a7951ec48 100644
--- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
+++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
@@ -16,20 +16,6 @@ RefFullyConnectedWorkload::RefFullyConnectedWorkload(
     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info)
         : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
 {
-    if (descriptor.m_Parameters.m_ConstantWeights)
-    {
-        m_Weight = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Weight));
-        const TensorInfo& rWeightInfo = m_Weight->GetTensorInfo();
-        m_WeightShape = rWeightInfo.GetShape();
-        m_WeightDecoder = MakeDecoder<float>(rWeightInfo, m_Weight->Map(true));
-
-        if (descriptor.m_Parameters.m_BiasEnabled)
-        {
-            m_Bias = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Bias));
-            const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
-            m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
-        }
-    }
 }
 
 void RefFullyConnectedWorkload::PostAllocationConfigure()
@@ -44,18 +30,15 @@ void RefFullyConnectedWorkload::PostAllocationConfigure(std::vector<ITensorHandl
     ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1);
     m_InputShape = inputInfo.GetShape();
 
-    if (!m_Data.m_Parameters.m_ConstantWeights)
+    const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]);
+    ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1);
+    m_WeightShape = rWeightInfo.GetShape();
+    m_WeightDecoder = MakeDecoder<float>(rWeightInfo);
+
+    if (m_Data.m_Parameters.m_BiasEnabled)
     {
-        const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]);
-        ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1);
-        m_WeightShape = rWeightInfo.GetShape();
-        m_WeightDecoder = MakeDecoder<float>(rWeightInfo);
-
-        if (m_Data.m_Parameters.m_BiasEnabled)
-        {
-            const TensorInfo& biasInfo = GetTensorInfo(inputs[2]);
-            m_BiasDecoder = MakeDecoder<float>(biasInfo);
-        }
+        const TensorInfo& biasInfo = GetTensorInfo(inputs[2]);
+        m_BiasDecoder = MakeDecoder<float>(biasInfo);
     }
 
     const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
@@ -87,13 +70,10 @@ void RefFullyConnectedWorkload::Execute(std::vector<ITensorHandle*> inputs, std:
     std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
     std::unique_ptr<Encoder<float>> OutputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
 
-    if (!m_Data.m_Parameters.m_ConstantWeights)
+    m_WeightDecoder->Reset(inputs[1]->Map());
+    if (m_Data.m_Parameters.m_BiasEnabled)
     {
-        m_WeightDecoder->Reset(inputs[1]->Map());
-        if (m_Data.m_Parameters.m_BiasEnabled)
-        {
-            m_BiasDecoder->Reset(inputs[2]->Map());
-        }
+        m_BiasDecoder->Reset(inputs[2]->Map());
     }
 
     FullyConnected(m_InputShape,
author	Matthew Sloyan <matthew.sloyan@arm.com>	2021-07-13 19:46:11 +0100
committer	Matthew Sloyan <matthew.sloyan@arm.com>	2021-08-06 09:25:26 +0000
commit	81beae3a870004795275e9266bc43d845b9f78db (patch)
tree	70af86f3c36c8e330c72770e6f1419ca7b2a4bb8
parent	95e9efc28ce70a8cda93e722f5ce90ebc96bdd95 (diff)
download	armnn-81beae3a870004795275e9266bc43d845b9f78db.tar.gz