From 4422ceca976a88aac49b21808a43e465bc87a35e Mon Sep 17 00:00:00 2001
From: Finn Williams <Finn.Williams@arm.com>
Date: Mon, 22 Mar 2021 17:51:06 +0000
Subject: Fix graph copy memory spike

 * Change layer storage of ConstTensors to std::shared_ptr<ConstCpuTensorHandle>
 * Change clone to share ConstTensor rather than copy
 * Remove uses of non-const GetTensor() call
 * Reduce scope of non-optimized network in ExeNet, so memory can be released after use

Signed-off-by: Finn Williams <Finn.Williams@arm.com>

Change-Id: Ibb2c7309d12411d21405bd6024c76bcdf5404545
---
 src/armnn/Layer.cpp                                |   4 +-
 src/armnn/Layer.hpp                                |   2 +-
 src/armnn/Network.cpp                              | 138 ++++++++++-----------
 src/armnn/layers/BatchNormalizationLayer.cpp       |   8 +-
 src/armnn/layers/BatchNormalizationLayer.hpp       |   8 +-
 src/armnn/layers/ConstantLayer.cpp                 |   2 +-
 src/armnn/layers/ConstantLayer.hpp                 |   2 +-
 src/armnn/layers/Convolution2dLayer.cpp            |   4 +-
 src/armnn/layers/Convolution2dLayer.hpp            |   4 +-
 src/armnn/layers/DepthwiseConvolution2dLayer.cpp   |   4 +-
 src/armnn/layers/DepthwiseConvolution2dLayer.hpp   |   4 +-
 src/armnn/layers/DetectionPostProcessLayer.cpp     |   2 +-
 src/armnn/layers/DetectionPostProcessLayer.hpp     |   2 +-
 src/armnn/layers/FullyConnectedLayer.cpp           |   4 +-
 src/armnn/layers/FullyConnectedLayer.hpp           |   4 +-
 src/armnn/layers/LstmLayer.cpp                     |  42 +++----
 src/armnn/layers/LstmLayer.hpp                     |  42 +++----
 src/armnn/layers/QLstmLayer.cpp                    |  42 +++----
 src/armnn/layers/QLstmLayer.hpp                    |  42 +++----
 src/armnn/layers/QuantizedLstmLayer.cpp            |  24 ++--
 src/armnn/layers/QuantizedLstmLayer.hpp            |  24 ++--
 src/armnn/layers/TransposeConvolution2dLayer.cpp   |   4 +-
 src/armnn/layers/TransposeConvolution2dLayer.hpp   |   4 +-
 .../optimizations/AddBroadcastReshapeLayer.hpp     |   2 +-
 src/armnn/optimizations/ConvertConstants.hpp       |  16 +--
 .../optimizations/ConvertFp32NetworkToBf16.hpp     |   7 +-
 src/armnn/test/GraphTests.cpp                      |  24 ++++
 .../optimizations/ConvertConstantsBFloatTests.cpp  |   4 +-
 .../ConvertConstantsFloatToHalfTests.cpp           |   2 +-
 .../ConvertConstantsHalfToFloatTests.cpp           |   2 +-
 .../Fp32NetworkToBf16ConverterTests.cpp            |   5 +-
 .../reference/workloads/RefLstmWorkload.cpp        |  42 +++----
 .../reference/workloads/RefQLstmWorkload.cpp       |  50 ++++----
 tests/InferenceModel.hpp                           |  16 ++-
 34 files changed, 310 insertions(+), 276 deletions(-)

diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index c9733e822b..13d834f6ae 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -274,9 +274,9 @@ void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
 void Layer::ReleaseConstantData()
 {
     // Now free up the static data.
-    OperateOnConstantTensors([](std::unique_ptr<ScopedCpuTensorHandle>& handle)
+    OperateOnConstantTensors([](std::shared_ptr<ConstCpuTensorHandle>& handle)
                                  {
-                                     handle.reset(nullptr);
+                                     handle.reset();
                                  });
 }
 
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index 2f5cacc3ce..5ab6b3152f 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -390,7 +390,7 @@ protected:
     LayerType* CloneBase(Graph& graph, Params&& ... params) const;
 
     // Retrieve the Handles to the constants
-    using ConstantTensors = std::vector<std::reference_wrapper<std::unique_ptr<ScopedCpuTensorHandle>>>;
+    using ConstantTensors = std::vector<std::reference_wrapper<std::shared_ptr<ConstCpuTensorHandle>>>;
     virtual ConstantTensors GetConstantTensorsByRef() {return ConstantTensors(); };
 
     // "Blob"
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 18a4d02fca..b9a0e47ec5 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -663,7 +663,7 @@ LayerT* ConvertBf16ToFp32Weight(Layer* l)
             std::vector<float> newValues(info.GetNumElements());
 
             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
-                layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
+                layer->m_Weight->template GetConstTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
 
             TensorInfo newInfo(info.GetShape(), DataType::Float32);
             ConstTensor newInput(newInfo, newValues);
@@ -1742,10 +1742,10 @@ IConnectableLayer* NetworkImpl::AddFullyConnectedLayerImpl(const FullyConnectedD
 
     if (fullyConnectedDescriptor.m_ConstantWeights)
     {
-        layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights.value());
+        layer->m_Weight = std::make_shared<ScopedCpuTensorHandle>(weights.value());
         if (fullyConnectedDescriptor.m_BiasEnabled)
         {
-            layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
+            layer->m_Bias = std::make_shared<ScopedCpuTensorHandle>(biases.value());
         }
     }
 
@@ -1806,11 +1806,11 @@ IConnectableLayer* NetworkImpl::AddConvolution2dLayerImpl(const Convolution2dDes
 
     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
 
-    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
+    layer->m_Weight = std::make_shared<ScopedCpuTensorHandle>(weights);
 
     if (convolution2dDescriptor.m_BiasEnabled)
     {
-        layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
+        layer->m_Bias = std::make_shared<ScopedCpuTensorHandle>(biases.value());
     }
 
     return layer;
@@ -1854,11 +1854,11 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayerImpl(
 
     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
 
-    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
+    layer->m_Weight = std::make_shared<ScopedCpuTensorHandle>(weights);
 
     if (convolution2dDescriptor.m_BiasEnabled)
     {
-        layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
+        layer->m_Bias = std::make_shared<ScopedCpuTensorHandle>(biases.value());
     }
 
     return layer;
@@ -1903,7 +1903,7 @@ IConnectableLayer* NetworkImpl::AddDetectionPostProcessLayer(const armnn::Detect
 {
     const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
 
-    layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
+    layer->m_Anchors = std::make_shared<ScopedCpuTensorHandle>(anchors);
 
     return layer;
 }
@@ -2001,10 +2001,10 @@ IConnectableLayer* NetworkImpl::AddBatchNormalizationLayer(const BatchNormalizat
 {
     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
 
-    layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
-    layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
-    layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
-    layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
+    layer->m_Mean = std::make_shared<ScopedCpuTensorHandle>(mean);
+    layer->m_Variance = std::make_shared<ScopedCpuTensorHandle>(variance);
+    layer->m_Beta = std::make_shared<ScopedCpuTensorHandle>(beta);
+    layer->m_Gamma = std::make_shared<ScopedCpuTensorHandle>(gamma);
 
     return layer;
 }
@@ -2061,7 +2061,7 @@ IConnectableLayer* NetworkImpl::AddConstantLayer(const ConstTensor& input, const
 {
     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
 
-    layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
+    layer->m_LayerOutput = std::make_shared<ScopedCpuTensorHandle>(input);
 
     return layer;
 }
@@ -2097,23 +2097,23 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
 
     //Lstm Basic Parameters
     layer->m_BasicParameters.m_InputToForgetWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
     layer->m_BasicParameters.m_InputToCellWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
     layer->m_BasicParameters.m_InputToOutputWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
     layer->m_BasicParameters.m_RecurrentToForgetWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
     layer->m_BasicParameters.m_RecurrentToCellWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
     layer->m_BasicParameters.m_RecurrentToOutputWeights =
-        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
+        std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
     layer->m_BasicParameters.m_ForgetGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
     layer->m_BasicParameters.m_CellBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellBias));
     layer->m_BasicParameters.m_OutputGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
 
     //Lstm Cifg parameters
     if(!descriptor.m_CifgEnabled)
@@ -2135,11 +2135,11 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
                                            "when CIFG is disabled.");
         }
         layer->m_CifgParameters.m_InputToInputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
         layer->m_CifgParameters.m_RecurrentToInputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
         layer->m_CifgParameters.m_InputGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
     }
 
     //Lstm projection parameters
@@ -2151,11 +2151,11 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
                                            "when projection is enabled.");
         }
         layer->m_ProjectionParameters.m_ProjectionWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
         if(params.m_ProjectionBias != nullptr)
         {
             layer->m_ProjectionParameters.m_ProjectionBias =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
         }
     }
 
@@ -2171,7 +2171,7 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
             }
 
             layer->m_PeepholeParameters.m_CellToInputWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
         }
 
         if(params.m_CellToForgetWeights == nullptr)
@@ -2186,9 +2186,9 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
         }
 
         layer->m_PeepholeParameters.m_CellToForgetWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
         layer->m_PeepholeParameters.m_CellToOutputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
     }
 
     //Lstm Layer Normalization params
@@ -2202,7 +2202,7 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
                                                "when layer normalization is enabled and CIFG disabled.");
             }
             layer->m_LayerNormParameters.m_InputLayerNormWeights =
-                    std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
+                    std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
         }
 
         if(params.m_ForgetLayerNormWeights == nullptr)
@@ -2221,11 +2221,11 @@ IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor&  descriptor,
                                            "when layer normalization is enabled.");
         }
         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
         layer->m_LayerNormParameters.m_CellLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
     }
     return layer;
 }
@@ -2320,11 +2320,11 @@ IConnectableLayer* NetworkImpl::AddTransposeConvolution2dLayer(const TransposeCo
 
     const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
 
-    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
+    layer->m_Weight = std::make_shared<ScopedCpuTensorHandle>(weights);
 
     if (descriptor.m_BiasEnabled)
     {
-        layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
+        layer->m_Bias = std::make_shared<ScopedCpuTensorHandle>(biases.value());
     }
 
     return layer;
@@ -2356,33 +2356,33 @@ IConnectableLayer* NetworkImpl::AddQuantizedLstmLayer(const QuantizedLstmInputPa
 
     // InputToX weights
     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
 
     // RecurrentToX weights
     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
 
     // Bias
     layer->m_QuantizedLstmParameters.m_InputGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetInputGateBias());
     layer->m_QuantizedLstmParameters.m_ForgetGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetForgetGateBias());
     layer->m_QuantizedLstmParameters.m_CellBias =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetCellBias());
     layer->m_QuantizedLstmParameters.m_OutputGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
+            std::make_shared<ScopedCpuTensorHandle>(params.GetOutputGateBias());
 
     return layer;
 }
@@ -2395,23 +2395,23 @@ IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor&  descriptor
 
     // QLstm Basic Parameters
     layer->m_BasicParameters.m_InputToForgetWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
     layer->m_BasicParameters.m_InputToCellWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
     layer->m_BasicParameters.m_InputToOutputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
     layer->m_BasicParameters.m_RecurrentToForgetWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
     layer->m_BasicParameters.m_RecurrentToCellWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
     layer->m_BasicParameters.m_RecurrentToOutputWeights =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
     layer->m_BasicParameters.m_ForgetGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
     layer->m_BasicParameters.m_CellBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellBias));
     layer->m_BasicParameters.m_OutputGateBias =
-            std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
+            std::make_shared<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
 
     // QLstm Cifg parameters
     if(!descriptor.m_CifgEnabled)
@@ -2433,11 +2433,11 @@ IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor&  descriptor
         }
 
         layer->m_CifgParameters.m_InputToInputWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
         layer->m_CifgParameters.m_RecurrentToInputWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
         layer->m_CifgParameters.m_InputGateBias =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
     }
 
     // QLstm Projection parameters
@@ -2449,13 +2449,13 @@ IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor&  descriptor
         }
 
         layer->m_ProjectionParameters.m_ProjectionWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
 
         // Projection bias is optional even if projection is enabled
         if(params.m_ProjectionWeights != nullptr)
         {
             layer->m_ProjectionParameters.m_ProjectionBias =
-                    std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
+                    std::make_shared<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
         }
 
     }
@@ -2481,13 +2481,13 @@ IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor&  descriptor
             }
 
             layer->m_PeepholeParameters.m_CellToInputWeights =
-                    std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
+                    std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
         }
 
         layer->m_PeepholeParameters.m_CellToForgetWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
         layer->m_PeepholeParameters.m_CellToOutputWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
     }
 
     // QLstm Layer Normalization params
@@ -2516,15 +2516,15 @@ IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor&  descriptor
             }
 
             layer->m_LayerNormParameters.m_InputLayerNormWeights =
-                    std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
+                    std::make_shared<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
         }
 
         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
         layer->m_LayerNormParameters.m_CellLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
-                std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
+                std::make_shared<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
     }
     return layer;
 }
diff --git a/src/armnn/layers/BatchNormalizationLayer.cpp b/src/armnn/layers/BatchNormalizationLayer.cpp
index 6df5195a55..680d9e56a0 100644
--- a/src/armnn/layers/BatchNormalizationLayer.cpp
+++ b/src/armnn/layers/BatchNormalizationLayer.cpp
@@ -41,10 +41,10 @@ BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
 {
     auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
 
-    layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
-    layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
-    layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
-    layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
+    layer->m_Mean = m_Mean ? m_Mean : nullptr;
+    layer->m_Variance = m_Variance ? m_Variance : nullptr;
+    layer->m_Beta = m_Beta ? m_Beta : nullptr;
+    layer->m_Gamma = m_Gamma ? m_Gamma : nullptr;
 
     return std::move(layer);
 }
diff --git a/src/armnn/layers/BatchNormalizationLayer.hpp b/src/armnn/layers/BatchNormalizationLayer.hpp
index dab75d1e12..bf9e4b7917 100644
--- a/src/armnn/layers/BatchNormalizationLayer.hpp
+++ b/src/armnn/layers/BatchNormalizationLayer.hpp
@@ -16,13 +16,13 @@ class BatchNormalizationLayer : public LayerWithParameters<BatchNormalizationDes
 {
 public:
     /// A unique pointer to store Mean values
-    std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
+    std::shared_ptr<ConstCpuTensorHandle> m_Mean;
     /// A unique pointer to store Variance values
-    std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
+    std::shared_ptr<ConstCpuTensorHandle> m_Variance;
     /// A unique pointer to store Beta values
-    std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
+    std::shared_ptr<ConstCpuTensorHandle> m_Beta;
     /// A unique pointer to store Gamma values
-    std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
+    std::shared_ptr<ConstCpuTensorHandle> m_Gamma;
 
     /// Makes a workload for the BatchNormalization type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/layers/ConstantLayer.cpp b/src/armnn/layers/ConstantLayer.cpp
index 31e9e974cf..8ae34b6709 100644
--- a/src/armnn/layers/ConstantLayer.cpp
+++ b/src/armnn/layers/ConstantLayer.cpp
@@ -32,7 +32,7 @@ ConstantLayer* ConstantLayer::Clone(Graph& graph) const
     // Cloned layers share the same layer output object.
     auto layer = CloneBase<ConstantLayer>(graph, GetName());
 
-    layer->m_LayerOutput = m_LayerOutput ? std::make_unique<ScopedCpuTensorHandle>(*m_LayerOutput) : nullptr;
+    layer->m_LayerOutput = m_LayerOutput ? m_LayerOutput : nullptr;
 
     return std::move(layer);
 }
diff --git a/src/armnn/layers/ConstantLayer.hpp b/src/armnn/layers/ConstantLayer.hpp
index 9d91551df9..ff4c03775f 100644
--- a/src/armnn/layers/ConstantLayer.hpp
+++ b/src/armnn/layers/ConstantLayer.hpp
@@ -43,7 +43,7 @@ public:
 
     void ExecuteStrategy(IStrategy& strategy) const override;
 
-    std::unique_ptr<ScopedCpuTensorHandle> m_LayerOutput;
+    std::shared_ptr<ConstCpuTensorHandle> m_LayerOutput;
 
 protected:
     /// Constructor to create a ConstantLayer.
diff --git a/src/armnn/layers/Convolution2dLayer.cpp b/src/armnn/layers/Convolution2dLayer.cpp
index 0c3040ea6e..cf7cf0f129 100644
--- a/src/armnn/layers/Convolution2dLayer.cpp
+++ b/src/armnn/layers/Convolution2dLayer.cpp
@@ -70,11 +70,11 @@ Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
 {
     auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
 
-    layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
+    layer->m_Weight = m_Weight ? m_Weight : nullptr;
 
     if (layer->m_Param.m_BiasEnabled)
     {
-        layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
+        layer->m_Bias = m_Bias ? m_Bias : nullptr;
     }
 
     return std::move(layer);
diff --git a/src/armnn/layers/Convolution2dLayer.hpp b/src/armnn/layers/Convolution2dLayer.hpp
index 440c80dfa9..2d5ab194de 100644
--- a/src/armnn/layers/Convolution2dLayer.hpp
+++ b/src/armnn/layers/Convolution2dLayer.hpp
@@ -17,9 +17,9 @@ class Convolution2dLayer : public LayerWithParameters<Convolution2dDescriptor>
 public:
 
     /// A unique pointer to store Weight values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+    std::shared_ptr<ConstCpuTensorHandle> m_Weight;
     /// A unique pointer to store Bias values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+    std::shared_ptr<ConstCpuTensorHandle> m_Bias;
 
     /// Makes a workload for the Convolution2d type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index 1871b7d15d..0b2114a196 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -71,11 +71,11 @@ std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const IWo
 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
 {
     auto layer      = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
-    layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
+    layer->m_Weight = m_Weight ? m_Weight : nullptr;
 
     if (layer->m_Param.m_BiasEnabled)
     {
-        layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
+        layer->m_Bias = m_Bias ? m_Bias : nullptr;
     }
 
     return std::move(layer);
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
index 7388cbcd8e..c83aa434d5 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
@@ -16,9 +16,9 @@ class DepthwiseConvolution2dLayer : public LayerWithParameters<DepthwiseConvolut
 {
 public:
     /// A unique pointer to store Weight values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+    std::shared_ptr<ConstCpuTensorHandle> m_Weight;
     /// A unique pointer to store Bias values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+    std::shared_ptr<ConstCpuTensorHandle> m_Bias;
 
     /// Makes a workload for the DepthwiseConvolution2d type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/layers/DetectionPostProcessLayer.cpp b/src/armnn/layers/DetectionPostProcessLayer.cpp
index 356377a2f5..e5bbeca424 100644
--- a/src/armnn/layers/DetectionPostProcessLayer.cpp
+++ b/src/armnn/layers/DetectionPostProcessLayer.cpp
@@ -32,7 +32,7 @@ std::unique_ptr<IWorkload> DetectionPostProcessLayer::CreateWorkload(const armnn
 DetectionPostProcessLayer* DetectionPostProcessLayer::Clone(Graph& graph) const
 {
     auto layer = CloneBase<DetectionPostProcessLayer>(graph, m_Param, GetName());
-    layer->m_Anchors = m_Anchors ? std::make_unique<ScopedCpuTensorHandle>(*m_Anchors) : nullptr;
+    layer->m_Anchors = m_Anchors ? m_Anchors : nullptr;
     return std::move(layer);
 }
 
diff --git a/src/armnn/layers/DetectionPostProcessLayer.hpp b/src/armnn/layers/DetectionPostProcessLayer.hpp
index b0d58589b4..e40966a19c 100644
--- a/src/armnn/layers/DetectionPostProcessLayer.hpp
+++ b/src/armnn/layers/DetectionPostProcessLayer.hpp
@@ -17,7 +17,7 @@ class DetectionPostProcessLayer : public LayerWithParameters<DetectionPostProces
 {
 public:
     /// A unique pointer to store Anchor values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Anchors;
+    std::shared_ptr<ConstCpuTensorHandle> m_Anchors;
 
     /// Makes a workload for the DetectionPostProcess type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/layers/FullyConnectedLayer.cpp b/src/armnn/layers/FullyConnectedLayer.cpp
index 6d0b57a84c..44c8920136 100644
--- a/src/armnn/layers/FullyConnectedLayer.cpp
+++ b/src/armnn/layers/FullyConnectedLayer.cpp
@@ -44,11 +44,11 @@ FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
     auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
     if (m_Param.m_ConstantWeights)
     {
-        layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
+        layer->m_Weight = m_Weight ? m_Weight : nullptr;
 
         if (layer->m_Param.m_BiasEnabled)
         {
-            layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
+            layer->m_Bias = m_Bias ? m_Bias : nullptr;
         }
     }
     return std::move(layer);
diff --git a/src/armnn/layers/FullyConnectedLayer.hpp b/src/armnn/layers/FullyConnectedLayer.hpp
index 4a9cbe1136..c45b081c85 100644
--- a/src/armnn/layers/FullyConnectedLayer.hpp
+++ b/src/armnn/layers/FullyConnectedLayer.hpp
@@ -16,9 +16,9 @@ class FullyConnectedLayer : public LayerWithParameters<FullyConnectedDescriptor>
 {
 public:
     /// A unique pointer to store Weight values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+    std::shared_ptr<ConstCpuTensorHandle> m_Weight;
     /// A unique pointer to store Bias values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+    std::shared_ptr<ConstCpuTensorHandle> m_Bias;
 
     /// Makes a workload for the FullyConnected type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/layers/LstmLayer.cpp b/src/armnn/layers/LstmLayer.cpp
index ebc408a636..0eeb2f8eab 100644
--- a/src/armnn/layers/LstmLayer.cpp
+++ b/src/armnn/layers/LstmLayer.cpp
@@ -82,41 +82,41 @@ LstmLayer* LstmLayer::Clone(Graph& graph) const
     auto layer = CloneBase<LstmLayer>(graph, m_Param, GetName());
 
     layer->m_BasicParameters.m_InputToForgetWeights = m_BasicParameters.m_InputToForgetWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToForgetWeights)
+            m_BasicParameters.m_InputToForgetWeights
                 : nullptr;
     layer->m_BasicParameters.m_InputToCellWeights = m_BasicParameters.m_InputToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToCellWeights) : nullptr;
+            m_BasicParameters.m_InputToCellWeights : nullptr;
     layer->m_BasicParameters.m_InputToOutputWeights = m_BasicParameters.m_InputToOutputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToOutputWeights) : nullptr;
+            m_BasicParameters.m_InputToOutputWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToForgetWeights = m_BasicParameters.m_RecurrentToForgetWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToForgetWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToForgetWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToCellWeights = m_BasicParameters.m_RecurrentToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToCellWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToCellWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToOutputWeights = m_BasicParameters.m_RecurrentToOutputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToOutputWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToOutputWeights : nullptr;
     layer->m_BasicParameters.m_ForgetGateBias = m_BasicParameters.m_ForgetGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_ForgetGateBias) : nullptr;
+            m_BasicParameters.m_ForgetGateBias : nullptr;
     layer->m_BasicParameters.m_CellBias = m_BasicParameters.m_CellBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_CellBias) : nullptr;
+            m_BasicParameters.m_CellBias : nullptr;
     layer->m_BasicParameters.m_OutputGateBias = m_BasicParameters.m_OutputGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_OutputGateBias) : nullptr;
+            m_BasicParameters.m_OutputGateBias : nullptr;
 
     if (!m_Param.m_CifgEnabled)
     {
         layer->m_CifgParameters.m_InputToInputWeights = m_CifgParameters.m_InputToInputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_InputToInputWeights) : nullptr;
+                m_CifgParameters.m_InputToInputWeights : nullptr;
         layer->m_CifgParameters.m_RecurrentToInputWeights = m_CifgParameters.m_RecurrentToInputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_RecurrentToInputWeights) : nullptr;
+                m_CifgParameters.m_RecurrentToInputWeights : nullptr;
         layer->m_CifgParameters.m_InputGateBias = m_CifgParameters.m_InputGateBias ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_InputGateBias) : nullptr;
+                m_CifgParameters.m_InputGateBias : nullptr;
     }
 
     if (m_Param.m_ProjectionEnabled)
     {
         layer->m_ProjectionParameters.m_ProjectionWeights = m_ProjectionParameters.m_ProjectionWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_ProjectionParameters.m_ProjectionWeights) : nullptr;
+               m_ProjectionParameters.m_ProjectionWeights : nullptr;
         layer->m_ProjectionParameters.m_ProjectionBias = m_ProjectionParameters.m_ProjectionBias ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_ProjectionParameters.m_ProjectionBias) : nullptr;
+               m_ProjectionParameters.m_ProjectionBias : nullptr;
     }
 
     if (m_Param.m_PeepholeEnabled)
@@ -124,24 +124,24 @@ LstmLayer* LstmLayer::Clone(Graph& graph) const
         if (!m_Param.m_CifgEnabled)
         {
             layer->m_PeepholeParameters.m_CellToInputWeights = m_PeepholeParameters.m_CellToInputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToInputWeights) : nullptr;
+                m_PeepholeParameters.m_CellToInputWeights : nullptr;
         }
         layer->m_PeepholeParameters.m_CellToForgetWeights = m_PeepholeParameters.m_CellToForgetWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToForgetWeights) : nullptr;
+               m_PeepholeParameters.m_CellToForgetWeights : nullptr;
         layer->m_PeepholeParameters.m_CellToOutputWeights = m_PeepholeParameters.m_CellToOutputWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToOutputWeights) : nullptr;
+               m_PeepholeParameters.m_CellToOutputWeights : nullptr;
     }
 
     if (m_Param.m_LayerNormEnabled)
     {
         layer->m_LayerNormParameters.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_InputLayerNormWeights) : nullptr;
+               m_LayerNormParameters.m_InputLayerNormWeights : nullptr;
         layer->m_LayerNormParameters.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_ForgetLayerNormWeights) : nullptr;
+               m_LayerNormParameters.m_ForgetLayerNormWeights : nullptr;
         layer->m_LayerNormParameters.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_CellLayerNormWeights) : nullptr;
+               m_LayerNormParameters.m_CellLayerNormWeights : nullptr;
         layer->m_LayerNormParameters.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights ?
-               std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_OutputLayerNormWeights) : nullptr;
+               m_LayerNormParameters.m_OutputLayerNormWeights : nullptr;
     }
 
     return std::move(layer);
diff --git a/src/armnn/layers/LstmLayer.hpp b/src/armnn/layers/LstmLayer.hpp
index 30f952e276..80b57a88f7 100644
--- a/src/armnn/layers/LstmLayer.hpp
+++ b/src/armnn/layers/LstmLayer.hpp
@@ -14,63 +14,63 @@ class ScopedCpuTensorHandle;
 struct LstmOptLayerNormParameters
 {
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_ForgetLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_OutputLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_OutputLayerNormWeights;
 };
 
 struct LstmOptCifgParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToInputWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToInputWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputGateBias;
 };
 
 struct LstmOptProjectionParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_ProjectionWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [output_size].
-    std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_ProjectionBias;
 };
 
 struct LstmOptPeepholeParameters
 {
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToInputWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToForgetWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToOutputWeights;
 };
 
 struct LstmBasicParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToOutputWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToOutputWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_ForgetGateBias;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellBias;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units].
-    std::unique_ptr<ScopedCpuTensorHandle> m_OutputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_OutputGateBias;
 };
 
 /// This layer represents a LSTM operation.
diff --git a/src/armnn/layers/QLstmLayer.cpp b/src/armnn/layers/QLstmLayer.cpp
index d957bbb485..16aa718eb9 100644
--- a/src/armnn/layers/QLstmLayer.cpp
+++ b/src/armnn/layers/QLstmLayer.cpp
@@ -83,68 +83,68 @@ QLstmLayer* QLstmLayer::Clone(Graph& graph) const
     auto layer = CloneBase<QLstmLayer>(graph, m_Param, GetName());
 
     layer->m_BasicParameters.m_InputToForgetWeights = m_BasicParameters.m_InputToForgetWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToForgetWeights) : nullptr;
+            m_BasicParameters.m_InputToForgetWeights : nullptr;
     layer->m_BasicParameters.m_InputToCellWeights = m_BasicParameters.m_InputToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToCellWeights) : nullptr;
+            m_BasicParameters.m_InputToCellWeights : nullptr;
     layer->m_BasicParameters.m_InputToOutputWeights = m_BasicParameters.m_InputToOutputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_InputToOutputWeights) : nullptr;
+            m_BasicParameters.m_InputToOutputWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToForgetWeights = m_BasicParameters.m_RecurrentToForgetWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToForgetWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToForgetWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToCellWeights = m_BasicParameters.m_RecurrentToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToCellWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToCellWeights : nullptr;
     layer->m_BasicParameters.m_RecurrentToOutputWeights = m_BasicParameters.m_RecurrentToOutputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_RecurrentToOutputWeights) : nullptr;
+            m_BasicParameters.m_RecurrentToOutputWeights : nullptr;
     layer->m_BasicParameters.m_ForgetGateBias = m_BasicParameters.m_ForgetGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_ForgetGateBias) : nullptr;
+            m_BasicParameters.m_ForgetGateBias : nullptr;
     layer->m_BasicParameters.m_CellBias = m_BasicParameters.m_CellBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_CellBias) : nullptr;
+            m_BasicParameters.m_CellBias : nullptr;
     layer->m_BasicParameters.m_OutputGateBias = m_BasicParameters.m_OutputGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_BasicParameters.m_OutputGateBias) : nullptr;
+            m_BasicParameters.m_OutputGateBias : nullptr;
 
     if (!m_Param.m_CifgEnabled)
     {
         layer->m_CifgParameters.m_InputToInputWeights = m_CifgParameters.m_InputToInputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_InputToInputWeights) : nullptr;
+                m_CifgParameters.m_InputToInputWeights : nullptr;
         layer->m_CifgParameters.m_RecurrentToInputWeights = m_CifgParameters.m_RecurrentToInputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_RecurrentToInputWeights) : nullptr;
+                m_CifgParameters.m_RecurrentToInputWeights : nullptr;
         layer->m_CifgParameters.m_InputGateBias = m_CifgParameters.m_InputGateBias ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_CifgParameters.m_InputGateBias) : nullptr;
+                m_CifgParameters.m_InputGateBias : nullptr;
     }
 
     if (m_Param.m_ProjectionEnabled)
     {
         layer->m_ProjectionParameters.m_ProjectionWeights = m_ProjectionParameters.m_ProjectionWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_ProjectionParameters.m_ProjectionWeights) : nullptr;
+                m_ProjectionParameters.m_ProjectionWeights : nullptr;
         layer->m_ProjectionParameters.m_ProjectionBias = m_ProjectionParameters.m_ProjectionBias ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_ProjectionParameters.m_ProjectionBias) : nullptr;
+                m_ProjectionParameters.m_ProjectionBias : nullptr;
     }
 
     if (m_Param.m_PeepholeEnabled)
     {
         if (!m_Param.m_CifgEnabled) {
             layer->m_PeepholeParameters.m_CellToInputWeights = m_PeepholeParameters.m_CellToInputWeights ?
-                    std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToInputWeights) : nullptr;
+                    m_PeepholeParameters.m_CellToInputWeights : nullptr;
         }
 
         layer->m_PeepholeParameters.m_CellToForgetWeights = m_PeepholeParameters.m_CellToForgetWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToForgetWeights) : nullptr;
+                m_PeepholeParameters.m_CellToForgetWeights : nullptr;
         layer->m_PeepholeParameters.m_CellToOutputWeights = m_PeepholeParameters.m_CellToOutputWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_PeepholeParameters.m_CellToOutputWeights) : nullptr;
+                m_PeepholeParameters.m_CellToOutputWeights : nullptr;
     }
 
     if (m_Param.m_LayerNormEnabled)
     {
         if (!m_Param.m_CifgEnabled) {
             layer->m_LayerNormParameters.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights ?
-                    std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_InputLayerNormWeights) : nullptr;
+                    m_LayerNormParameters.m_InputLayerNormWeights : nullptr;
         }
 
         layer->m_LayerNormParameters.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_ForgetLayerNormWeights) : nullptr;
+                m_LayerNormParameters.m_ForgetLayerNormWeights : nullptr;
         layer->m_LayerNormParameters.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_CellLayerNormWeights) : nullptr;
+                m_LayerNormParameters.m_CellLayerNormWeights : nullptr;
         layer->m_LayerNormParameters.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights ?
-                std::make_unique<ScopedCpuTensorHandle>(*m_LayerNormParameters.m_OutputLayerNormWeights) : nullptr;
+                m_LayerNormParameters.m_OutputLayerNormWeights : nullptr;
     }
 
     return std::move(layer);
diff --git a/src/armnn/layers/QLstmLayer.hpp b/src/armnn/layers/QLstmLayer.hpp
index 70cc4f2b15..09a020dc1d 100644
--- a/src/armnn/layers/QLstmLayer.hpp
+++ b/src/armnn/layers/QLstmLayer.hpp
@@ -14,65 +14,65 @@ class ScopedCpuTensorHandle;
 struct QLstmBasicParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToOutputWeights;
 
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToOutputWeights;
 
     /// A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_ForgetGateBias;
     /// A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellBias;
     /// A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_OutputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_OutputGateBias;
 };
 
 struct QLstmOptProjectionParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_ProjectionWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [output_size] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_ProjectionBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_ProjectionBias;
 };
 
 struct QLstmOptPeepholeParameters
 {
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToInputWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToForgetWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellToOutputWeights;
 };
 
 struct QLstmOptCifgParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToInputWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units] (QSymmS8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToInputWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputGateBias;
 };
 
 struct QLstmOptLayerNormParameters
 {
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_ForgetLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellLayerNormWeights;
     /// A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
-    std::unique_ptr<ScopedCpuTensorHandle> m_OutputLayerNormWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_OutputLayerNormWeights;
 };
 
 /// This layer represents a QLstm operation.
diff --git a/src/armnn/layers/QuantizedLstmLayer.cpp b/src/armnn/layers/QuantizedLstmLayer.cpp
index 578d9eb137..a1ff985abe 100644
--- a/src/armnn/layers/QuantizedLstmLayer.cpp
+++ b/src/armnn/layers/QuantizedLstmLayer.cpp
@@ -49,31 +49,31 @@ QuantizedLstmLayer* QuantizedLstmLayer::Clone(Graph& graph) const
     auto layer = CloneBase<QuantizedLstmLayer>(graph, GetName());
 
     layer->m_QuantizedLstmParameters.m_InputToInputWeights = m_QuantizedLstmParameters.m_InputToInputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToInputWeights) : nullptr;
+            m_QuantizedLstmParameters.m_InputToInputWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_InputToForgetWeights = m_QuantizedLstmParameters.m_InputToForgetWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToForgetWeights) : nullptr;
+            m_QuantizedLstmParameters.m_InputToForgetWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_InputToCellWeights = m_QuantizedLstmParameters.m_InputToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToCellWeights) : nullptr;
+            m_QuantizedLstmParameters.m_InputToCellWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_InputToOutputWeights = m_QuantizedLstmParameters.m_InputToOutputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToOutputWeights) : nullptr;
+            m_QuantizedLstmParameters.m_InputToOutputWeights : nullptr;
 
     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights = m_QuantizedLstmParameters.m_RecurrentToInputWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToInputWeights) : nullptr;
+            m_QuantizedLstmParameters.m_RecurrentToInputWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights = m_QuantizedLstmParameters.m_RecurrentToForgetWeights
-            ? std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToForgetWeights) : nullptr;
+            ? m_QuantizedLstmParameters.m_RecurrentToForgetWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights = m_QuantizedLstmParameters.m_RecurrentToCellWeights ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToCellWeights) : nullptr;
+            m_QuantizedLstmParameters.m_RecurrentToCellWeights : nullptr;
     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights = m_QuantizedLstmParameters.m_RecurrentToOutputWeights
-            ? std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToOutputWeights) : nullptr;
+            ? m_QuantizedLstmParameters.m_RecurrentToOutputWeights : nullptr;
 
     layer->m_QuantizedLstmParameters.m_InputGateBias = m_QuantizedLstmParameters.m_InputGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputGateBias) : nullptr;
+            m_QuantizedLstmParameters.m_InputGateBias : nullptr;
     layer->m_QuantizedLstmParameters.m_ForgetGateBias = m_QuantizedLstmParameters.m_ForgetGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_ForgetGateBias) : nullptr;
+            m_QuantizedLstmParameters.m_ForgetGateBias : nullptr;
     layer->m_QuantizedLstmParameters.m_CellBias = m_QuantizedLstmParameters.m_CellBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_CellBias) : nullptr;
+            m_QuantizedLstmParameters.m_CellBias : nullptr;
     layer->m_QuantizedLstmParameters.m_OutputGateBias = m_QuantizedLstmParameters.m_OutputGateBias ?
-            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_OutputGateBias) : nullptr;
+            m_QuantizedLstmParameters.m_OutputGateBias : nullptr;
 
     return std::move(layer);
 }
diff --git a/src/armnn/layers/QuantizedLstmLayer.hpp b/src/armnn/layers/QuantizedLstmLayer.hpp
index 544acbd816..ca97a6bb65 100644
--- a/src/armnn/layers/QuantizedLstmLayer.hpp
+++ b/src/armnn/layers/QuantizedLstmLayer.hpp
@@ -14,31 +14,31 @@ class ScopedCpuTensorHandle;
 struct QuantizedLstmParameters
 {
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToInputWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputToOutputWeights;
 
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToInputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToInputWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToForgetWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToForgetWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToCellWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToCellWeights;
     /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
-    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToOutputWeights;
+    std::shared_ptr<ConstCpuTensorHandle> m_RecurrentToOutputWeights;
 
     /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_InputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_InputGateBias;
     /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_ForgetGateBias;
     /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_CellBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_CellBias;
     /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
-    std::unique_ptr<ScopedCpuTensorHandle> m_OutputGateBias;
+    std::shared_ptr<ConstCpuTensorHandle> m_OutputGateBias;
 };
 
 /// This layer represents a QuantizedLstm operation.
diff --git a/src/armnn/layers/TransposeConvolution2dLayer.cpp b/src/armnn/layers/TransposeConvolution2dLayer.cpp
index bd8cb096e2..8f6908ea5d 100644
--- a/src/armnn/layers/TransposeConvolution2dLayer.cpp
+++ b/src/armnn/layers/TransposeConvolution2dLayer.cpp
@@ -44,11 +44,11 @@ TransposeConvolution2dLayer* TransposeConvolution2dLayer::Clone(Graph& graph) co
 {
     auto layer = CloneBase<TransposeConvolution2dLayer>(graph, m_Param, GetName());
 
-    layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
+    layer->m_Weight = m_Weight ? m_Weight : nullptr;
 
     if (layer->m_Param.m_BiasEnabled)
     {
-        layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
+        layer->m_Bias = m_Bias ? m_Bias : nullptr;
     }
 
     return std::move(layer);
diff --git a/src/armnn/layers/TransposeConvolution2dLayer.hpp b/src/armnn/layers/TransposeConvolution2dLayer.hpp
index 903c957393..53e73491d6 100644
--- a/src/armnn/layers/TransposeConvolution2dLayer.hpp
+++ b/src/armnn/layers/TransposeConvolution2dLayer.hpp
@@ -16,9 +16,9 @@ class TransposeConvolution2dLayer : public LayerWithParameters<TransposeConvolut
 {
 public:
     /// A unique pointer to store weight values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Weight;
+    std::shared_ptr<ConstCpuTensorHandle> m_Weight;
     /// A unique pointer to store bias values.
-    std::unique_ptr<ScopedCpuTensorHandle> m_Bias;
+    std::shared_ptr<ConstCpuTensorHandle> m_Bias;
 
     /// Makes a workload for the TransposeConvolution2d type.
     /// @param [in] graph The graph where this layer can be found.
diff --git a/src/armnn/optimizations/AddBroadcastReshapeLayer.hpp b/src/armnn/optimizations/AddBroadcastReshapeLayer.hpp
index 26661cfcde..0a5ad9d152 100644
--- a/src/armnn/optimizations/AddBroadcastReshapeLayer.hpp
+++ b/src/armnn/optimizations/AddBroadcastReshapeLayer.hpp
@@ -74,7 +74,7 @@ public:
                 ConstantLayer& constantLayer = static_cast<ConstantLayer&>(parentLayer);
 
                 constantLayer.m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(
-                                ConstTensor(reshapeInfo,constantLayer.m_LayerOutput.get()->GetTensor<void>()));
+                                ConstTensor(reshapeInfo,constantLayer.m_LayerOutput.get()->GetConstTensor<void>()));
                 constantLayer.GetOutputSlot().SetTensorInfo(reshapeInfo);
 
                 return;
diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp
index f3ebcdf5d9..df5a5b4f67 100644
--- a/src/armnn/optimizations/ConvertConstants.hpp
+++ b/src/armnn/optimizations/ConvertConstants.hpp
@@ -23,7 +23,7 @@ namespace optimizations
 
 struct BFloat16ToFloat32
 {
-    static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+    static void Func(std::shared_ptr<ConstCpuTensorHandle>& handle)
     {
         const TensorInfo& info = handle->GetTensorInfo();
 
@@ -31,7 +31,7 @@ struct BFloat16ToFloat32
         {
             std::vector<float> newValues(info.GetNumElements());
 
-            armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetTensor<BFloat16>(),
+            armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetConstTensor<BFloat16>(),
                                                                          info.GetNumElements(),
                                                                          newValues.data());
 
@@ -44,7 +44,7 @@ struct BFloat16ToFloat32
 
 struct Float16ToFloat32
 {
-    static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+    static void Func(std::shared_ptr<ConstCpuTensorHandle>& handle)
     {
         const TensorInfo& info = handle->GetTensorInfo();
 
@@ -52,7 +52,7 @@ struct Float16ToFloat32
         {
             std::vector<float> newValues(info.GetNumElements());
 
-            armnnUtils::FloatingPointConverter::ConvertFloat16To32(handle->GetTensor<Half>(),
+            armnnUtils::FloatingPointConverter::ConvertFloat16To32(handle->GetConstTensor<Half>(),
                                                                    info.GetNumElements(),
                                                                    newValues.data());
 
@@ -65,7 +65,7 @@ struct Float16ToFloat32
 
 struct Float32ToBFloat16
 {
-    static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+    static void Func(std::shared_ptr<ConstCpuTensorHandle>& handle)
     {
         const TensorInfo& info = handle->GetTensorInfo();
 
@@ -73,7 +73,7 @@ struct Float32ToBFloat16
         {
             std::vector<BFloat16> newValues(info.GetNumElements());
 
-            armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetTensor<float>(),
+            armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetConstTensor<float>(),
                                                                          info.GetNumElements(),
                                                                          newValues.data());
 
@@ -86,7 +86,7 @@ struct Float32ToBFloat16
 
 struct Float32ToFloat16
 {
-    static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+    static void Func(std::shared_ptr<ConstCpuTensorHandle>& handle)
     {
         const TensorInfo& info = handle->GetTensorInfo();
 
@@ -94,7 +94,7 @@ struct Float32ToFloat16
         {
             std::vector<Half> newValues(info.GetNumElements());
 
-            armnnUtils::FloatingPointConverter::ConvertFloat32To16(handle->GetTensor<float>(),
+            armnnUtils::FloatingPointConverter::ConvertFloat32To16(handle->GetConstTensor<float>(),
                                                                    info.GetNumElements(),
                                                                    newValues.data());
 
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
index c45ab2cded..a0856a485b 100644
--- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
@@ -27,9 +27,10 @@ inline LayerT* ConvertWeight(Layer* l)
         {
             std::vector<BFloat16> newValues(info.GetNumElements());
 
-            armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(layer->m_Weight->template GetTensor<float>(),
-                                                                         info.GetNumElements(),
-                                                                         newValues.data());
+            armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(
+                    layer->m_Weight->template GetConstTensor<float>(),
+                    info.GetNumElements(),
+                    newValues.data());
 
             TensorInfo newInfo(info);
             newInfo.SetDataType(DataType::BFloat16);
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 55a01fcc94..602575b352 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -590,4 +590,28 @@ BOOST_AUTO_TEST_CASE(DuplicateLayerNames)
     BOOST_TEST(((*std::next(it))->GetType() == armnn::LayerType::Output));
 }
 
+BOOST_AUTO_TEST_CASE(CheckGraphConstTensorSharing)
+{
+    armnn::Graph graph0;
+    const float* sharedWeightPtr;
+
+    {
+        armnn::Graph graph1;
+
+        armnn::FullyConnectedLayer* const fcLayer =
+                graph1.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+
+        float weight = 1.0f;
+        armnn::ConstTensor constTensor({{ 1, 1 }, armnn::DataType::Float32}, &weight);
+        fcLayer->m_Weight = std::make_shared<armnn::ScopedCpuTensorHandle>(constTensor);;
+        // point sharedWeightPtr to graph1's const tensor
+        sharedWeightPtr = fcLayer->m_Weight->GetConstTensor<float>();
+
+        graph0 = armnn::Graph(graph1);
+        // graph1 goes out of scope
+    }
+
+    BOOST_TEST(*sharedWeightPtr == 1);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
index 5cb89daafd..bb8e674b56 100644
--- a/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
+++ b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
@@ -57,7 +57,7 @@ BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToBFloatTest)
     BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16);
 
     // Check whether data matches expected Bf16 data
-    BFloat16* data = fc->m_Weight->GetTensor<BFloat16>();
+    const BFloat16* data = fc->m_Weight->GetConstTensor<BFloat16>();
     BOOST_CHECK(data[0] == BFloat16(0.0f));
     BOOST_CHECK(data[1] == BFloat16(-1.0f));
     BOOST_CHECK(data[2] == BFloat16(3.796875f)); // 0x4073
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(ConvertConstantsBFloatToFloatTest)
     BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
 
     // Now test the data matches float32 data
-    float* data = fc->m_Weight->GetTensor<float>();
+    const float* data = fc->m_Weight->GetConstTensor<float>();
     BOOST_CHECK(data[0] == 0.0f);
     BOOST_CHECK(data[1] == -1.0f);
     BOOST_CHECK(data[2] == 3.796875f);
diff --git a/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp b/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
index 84f525f4bf..12df462456 100644
--- a/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
+++ b/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
@@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToHalfTest)
     BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16);
 
     // Check whether data matches expected fp16 data
-    Half* data = fc->m_Weight->GetTensor<Half>();
+    const Half* data = fc->m_Weight->GetConstTensor<Half>();
     BOOST_CHECK(data[0] == Half(1.0f));
     BOOST_CHECK(data[1] == Half(2.0f));
     BOOST_CHECK(data[2] == Half(3.0f));
diff --git a/src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp b/src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
index f2dea53afd..7d7c6b2b0a 100644
--- a/src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
+++ b/src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
@@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(ConvertConstantsHalfToFloatTest)
     BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
 
     // Now test the data matches float32 data
-    float* data = fc->m_Weight->GetTensor<float>();
+    const float* data = fc->m_Weight->GetConstTensor<float>();
     BOOST_CHECK(1.0f == data[0]);
     BOOST_CHECK(2.0f == data[1]);
     BOOST_CHECK(3.0f == data[2]);
diff --git a/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
index b35f983434..a65012eef4 100644
--- a/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
+++ b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
@@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE(Fp32NetworkToBf16OptimizationConv2DTest)
     BOOST_TEST((outputTensor.GetDataType() == armnn::DataType::Float32));
 
     // Check whether data matches expected Bf16 data
-    armnn::BFloat16* data = conv->m_Weight->GetTensor<armnn::BFloat16>();
+    const armnn::BFloat16* data = conv->m_Weight->GetConstTensor<armnn::BFloat16>();
     BOOST_CHECK(data[0] == armnn::BFloat16(0.0f));
     BOOST_CHECK(data[1] == armnn::BFloat16(-1.0f));
     BOOST_CHECK(data[2] == armnn::BFloat16(3.796875f)); // 0x4073
@@ -171,7 +171,7 @@ BOOST_AUTO_TEST_CASE(Fp32NetworkToBf16OptimizationFullyConnectedTest)
     BOOST_TEST((outputTensor.GetDataType() == armnn::DataType::Float32));
 
     // Check whether data matches expected Bf16 data
-    armnn::BFloat16* data = fc->m_Weight->GetTensor<armnn::BFloat16>();
+    const armnn::BFloat16* data = fc->m_Weight->GetConstTensor<armnn::BFloat16>();
     BOOST_CHECK(data[0] == armnn::BFloat16(0.0f));
     BOOST_CHECK(data[1] == armnn::BFloat16(-1.0f));
     BOOST_CHECK(data[2] == armnn::BFloat16(3.796875f)); // 0x4073
@@ -182,4 +182,5 @@ BOOST_AUTO_TEST_CASE(Fp32NetworkToBf16OptimizationFullyConnectedTest)
     BOOST_CHECK(data[7] == armnn::BFloat16(-9.131327E-10f)); // 0xB07B
 }
 
+
 BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp
index 70b3443d88..7c37301d1d 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.cpp
@@ -110,27 +110,27 @@ void RefLstmWorkload::Execute() const
 
     std::unique_ptr<Decoder<float>> inputToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> inputToForgetWeightsTensor = MakeDecoder<float>(
-        m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetTensor<void>());
+        m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToCellWeightsTensor = MakeDecoder<float>(
-        m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetTensor<void>());
+        m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToOutputWeightsTensor = MakeDecoder<float>(
-        m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetTensor<void>());
+        m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> recurrentToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> recurrentToForgetWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetTensor<void>());
+        m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToCellWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetTensor<void>());
+        m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToOutputWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetTensor<void>());
+        m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> inputGateBiasTensor;
     std::unique_ptr<Decoder<float>> forgetGateBiasTensor = MakeDecoder<float>(
-        m_ForgetGateBiasTensor->GetTensorInfo(), m_ForgetGateBiasTensor->GetTensor<void>());
+        m_ForgetGateBiasTensor->GetTensorInfo(), m_ForgetGateBiasTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> cellBiasTensor = MakeDecoder<float>(
-        m_CellBiasTensor->GetTensorInfo(), m_CellBiasTensor->GetTensor<void>());
+        m_CellBiasTensor->GetTensorInfo(), m_CellBiasTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> outputGateBiasTensor = MakeDecoder<float>(
-        m_OutputGateBiasTensor->GetTensorInfo(), m_OutputGateBiasTensor->GetTensor<void>());
+        m_OutputGateBiasTensor->GetTensorInfo(), m_OutputGateBiasTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> cellToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> cellToForgetWeightsTensor;
@@ -149,48 +149,48 @@ void RefLstmWorkload::Execute() const
         if (!useCifg)
         {
             inputLayerNormWeights = MakeDecoder<float>(
-                    m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetTensor<void>());
+                    m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetConstTensor<void>());
         }
         forgetLayerNormWeights = MakeDecoder<float>(
-                m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetTensor<void>());
+                m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetConstTensor<void>());
         cellLayerNormWeights = MakeDecoder<float>(
-                m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetTensor<void>());
+                m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetConstTensor<void>());
         outputLayerNormWeights = MakeDecoder<float>(
-                m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetTensor<void>());
+                m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetConstTensor<void>());
     }
 
     if (!useCifg)
     {
         inputToInputWeightsTensor = MakeDecoder<float>(
-            m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetTensor<void>());
+            m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<void>());
         inputGateBiasTensor = MakeDecoder<float>(
-            m_InputGateBiasTensor->GetTensorInfo(), m_InputGateBiasTensor->GetTensor<void>());
+            m_InputGateBiasTensor->GetTensorInfo(), m_InputGateBiasTensor->GetConstTensor<void>());
         recurrentToInputWeightsTensor = MakeDecoder<float>(
-            m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetTensor<void>());
+            m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (usePeephole)
     {
         cellToForgetWeightsTensor = MakeDecoder<float>(
-            m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetTensor<void>());
+            m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<void>());
         cellToOutputWeightsTensor = MakeDecoder<float>(
-            m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetTensor<void>());
+            m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<void>());
     }
 
     if (!useCifg && usePeephole)
     {
         cellToInputWeightsTensor = MakeDecoder<float>(
-            m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetTensor<void>());
+            m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (m_Data.m_Parameters.m_ProjectionEnabled)
     {
         projectionWeightsTensor = MakeDecoder<float>(
-            m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetTensor<void>());
+            m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<void>());
         if (m_ProjectionBiasTensor)
         {
             projectionBiasTensor = MakeDecoder<float>(
-                m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetTensor<void>());
+                m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<void>());
         }
     }
 
diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp
index e11ea55add..bcd6a627de 100644
--- a/src/backends/reference/workloads/RefQLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp
@@ -101,18 +101,20 @@ void RefQLstmWorkload::Execute() const
 
     // Weights decoders
     std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
-            m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetTensor<void>());
+            m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
-            m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetTensor<void>());
+            m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
-            m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetTensor<void>());
+            m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetTensor<void>());
+            m_RecurrentToForgetWeightsTensor->GetTensorInfo(),
+            m_RecurrentToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetTensor<void>());
+            m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetTensor<void>());
+            m_RecurrentToOutputWeightsTensor->GetTensorInfo(),
+            m_RecurrentToOutputWeightsTensor->GetConstTensor<void>());
 
     // Optional CIFG params
     std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
@@ -198,9 +200,9 @@ void RefQLstmWorkload::Execute() const
     if (!cifgEnabled)
     {
         inputToInputWeightsDecoder = MakeDecoder<float>(
-                m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetTensor<void>());
-        recurrentToInputWeightsDecoder = MakeDecoder<float>(
-                m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetTensor<void>());
+                m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<void>());
+        recurrentToInputWeightsDecoder = MakeDecoder<float>(m_RecurrentToInputWeightsTensor->GetTensorInfo(),
+                                                            m_RecurrentToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (peepholeEnabled)
@@ -208,22 +210,22 @@ void RefQLstmWorkload::Execute() const
         if (!cifgEnabled)
         {
             cellToInputWeightsDecoder = MakeDecoder<float>(
-                    m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetTensor<void>());
+                    m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<void>());
         }
         cellToForgetWeightsDecoder = MakeDecoder<float>(
-                m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetTensor<void>());
+                m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<void>());
         cellToOutputWeightsDecoder = MakeDecoder<float>(
-                m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetTensor<void>());
+                m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<void>());
     }
 
     if (projectionEnabled)
     {
         projectionWeightsDecoder = MakeDecoder<float>(
-                m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetTensor<void>());
+                m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<void>());
         if (m_ProjectionBiasTensor)
         {
             projectionBiasDecoder = MakeDecoder<float>(
-                    m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetTensor<void>());
+                    m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<void>());
         }
     }
 
@@ -231,38 +233,40 @@ void RefQLstmWorkload::Execute() const
     {
         if (!cifgEnabled)
         {
-            inputLayerNormWeightsDecoder = MakeDecoder<float>(
-                    m_InputLayerNormWeightsTensor->GetTensorInfo(), m_InputLayerNormWeightsTensor->GetTensor<void>());
+            inputLayerNormWeightsDecoder = MakeDecoder<float>(m_InputLayerNormWeightsTensor->GetTensorInfo(),
+                                                              m_InputLayerNormWeightsTensor->GetConstTensor<void>());
 
             // Bias only used if layer norm enabled
             armnn::TensorInfo inputGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                     m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
             inputGateBiasDecoder = MakeDecoder<float>(
-                    inputGateBiasTensorInfo, m_InputGateBiasTensor->GetTensor<void>());
+                    inputGateBiasTensorInfo, m_InputGateBiasTensor->GetConstTensor<void>());
         }
 
         forgetLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_ForgetLayerNormWeightsTensor->GetTensorInfo(), m_ForgetLayerNormWeightsTensor->GetTensor<void>());
+                m_ForgetLayerNormWeightsTensor->GetTensorInfo(),
+                m_ForgetLayerNormWeightsTensor->GetConstTensor<void>());
         cellLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetTensor<void>());
+                m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetConstTensor<void>());
         outputLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_OutputLayerNormWeightsTensor->GetTensorInfo(), m_OutputLayerNormWeightsTensor->GetTensor<void>());
+                m_OutputLayerNormWeightsTensor->GetTensorInfo(),
+                m_OutputLayerNormWeightsTensor->GetConstTensor<void>());
 
         // Bias only used if layer norm enabled
         armnn::TensorInfo forgetGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         forgetGateBiasDecoder = MakeDecoder<float>(
-                forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetTensor<void>());
+                forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetConstTensor<void>());
 
         armnn::TensorInfo cellGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         cellGateBiasDecoder = MakeDecoder<float>(
-                cellGateBiasTensorInfo, m_CellBiasTensor->GetTensor<void>());
+                cellGateBiasTensorInfo, m_CellBiasTensor->GetConstTensor<void>());
 
         armnn::TensorInfo outputGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         outputGateBiasDecoder = MakeDecoder<float>(
-                outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetTensor<void>());
+                outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetConstTensor<void>());
     }
 
     // Initialize internal state tensors with zeroes.
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 6bfad067ca..dca3ab2788 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -419,14 +419,14 @@ public:
             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
         }
 
-        const auto parsing_start_time = armnn::GetTimeNow();
-        armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
-
-        ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
-                        << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
-
         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
         {
+            const auto parsing_start_time = armnn::GetTimeNow();
+            armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
+
+            ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
+                            << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
+
             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
 
             armnn::OptimizerOptions options;
@@ -460,6 +460,8 @@ public:
             {
                 throw armnn::Exception("Optimize returned nullptr");
             }
+
+
         }
 
         if (params.m_VisualizePostOptimizationModel)
@@ -470,6 +472,8 @@ public:
             optNet->SerializeToDot(file);
         }
 
+
+
         armnn::Status ret;
         {
             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
-- 
cgit v1.2.1