From 0690265d83e5aa79bd174544a7b35330781619dd Mon Sep 17 00:00:00 2001
From: Cathal Corbett <cathal.corbett@arm.com>
Date: Thu, 14 Apr 2022 17:55:11 +0100
Subject: IVGCVSW-6127 ConstTensorsAsInput: DepthwiseConvolution2d

!android-nn-driver:7418

 * Update Front-end and Tools.
 * Updated Serializer, Deserializer and unit tests to reflect this.
 * Updated TfLiteDelegate, TfLiteParser and OnnxParser.
 * Change NNDriver to new API.
 * Updated Ref.
 * Neon and Cl backend partially completed (Backend.cpp files).
 * Added dynamic or constant input EndToEnd tests.
 * Added ConstantTensorAsInputMemeberVariableRedirect Optimization.

Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: Ib18b6c10a093042e165e25237dc04a4c67ba82da
---
 delegate/src/Convolution.hpp                       |  29 +--
 include/armnn/Descriptors.hpp                      |   3 +
 include/armnn/ILayerVisitor.hpp                    |  10 +
 include/armnn/INetwork.hpp                         |   9 +
 include/armnn/LayerVisitorBase.hpp                 |   4 +
 src/armnn/BackendHelper.cpp                        |  60 ++++-
 src/armnn/Descriptors.cpp                          |  11 +
 src/armnn/Graph.cpp                                |   3 +-
 src/armnn/Layer.cpp                                |  28 ++-
 src/armnn/LoadedNetwork.cpp                        |  20 +-
 src/armnn/Network.cpp                              |  59 +++--
 src/armnn/Network.hpp                              |  10 +-
 src/armnn/layers/DepthwiseConvolution2dLayer.cpp   |  51 ++--
 src/armnn/layers/DepthwiseConvolution2dLayer.hpp   |   4 +
 src/armnn/optimizations/FoldPadIntoLayer2d.hpp     |  23 +-
 src/armnn/optimizations/FuseBatchNorm.hpp          |  84 ++++++-
 .../RedirectMembersToConstantInputs.hpp            |   3 +-
 src/armnn/test/ConstTensorLayerVisitor.cpp         |  53 ++--
 src/armnn/test/ConstTensorLayerVisitor.hpp         |  12 -
 src/armnn/test/GraphTests.cpp                      |   4 +-
 src/armnn/test/OptimizerTests.cpp                  |  19 +-
 src/armnn/test/ShapeInferenceTests.cpp             |  17 +-
 src/armnn/test/SubgraphViewTests.cpp               |   9 +-
 src/armnn/test/optimizations/FoldPadTests.cpp      |  19 +-
 .../test/optimizations/FuseActivationTests.cpp     |  37 +--
 .../test/optimizations/FuseBatchNormTests.cpp      | 124 +++++++--
 src/armnnDeserializer/Deserializer.cpp             | 152 +++++++----
 src/armnnDeserializer/test/DeserializeConstant.cpp |   5 +-
 .../test/DeserializeDepthwiseConv2d.cpp            | 280 ++++++++++++++++++++-
 src/armnnDeserializer/test/DeserializeGather.cpp   |   5 +-
 src/armnnDeserializer/test/DeserializeGatherNd.cpp |   5 +-
 src/armnnOnnxParser/OnnxParser.cpp                 |  27 +-
 src/armnnSerializer/Serializer.cpp                 |  17 +-
 src/armnnSerializer/Serializer.hpp                 |   1 -
 src/armnnSerializer/test/SerializerTestUtils.cpp   |  10 +-
 src/armnnSerializer/test/SerializerTestUtils.hpp   |   1 +
 src/armnnSerializer/test/SerializerTests.cpp       |  89 ++++++-
 src/armnnTestUtils/CreateWorkload.hpp              |  26 +-
 src/armnnTfLiteParser/TfLiteParser.cpp             |  37 +--
 src/backends/aclCommon/ArmComputeSubgraphUtils.hpp |  25 +-
 src/backends/backendsCommon/WorkloadData.cpp       |  16 +-
 src/backends/backendsCommon/WorkloadFactory.cpp    |  28 ++-
 src/backends/backendsCommon/test/CMakeLists.txt    |   1 +
 .../test/DepthwiseConvolution2dEndToEndTests.hpp   | 183 ++++++++++++++
 .../backendsCommon/test/OptimizationViewsTests.cpp |   6 +-
 .../test/layerTests/Conv2dTestImpl.cpp             | 143 +++++++++--
 src/backends/cl/ClBackend.cpp                      |   4 +-
 src/backends/neon/NeonBackend.cpp                  |   4 +-
 src/backends/neon/NeonTensorHandle.hpp             |   1 +
 src/backends/reference/test/RefLayerTests.cpp      | 220 +++++++++-------
 .../RefDepthwiseConvolution2dWorkload.cpp          |  47 +++-
 .../RefDepthwiseConvolution2dWorkload.hpp          |   2 +
 52 files changed, 1531 insertions(+), 509 deletions(-)
 create mode 100644 src/backends/backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp

diff --git a/delegate/src/Convolution.hpp b/delegate/src/Convolution.hpp
index 1b5ed40f0c..f02a56fc7d 100644
--- a/delegate/src/Convolution.hpp
+++ b/delegate/src/Convolution.hpp
@@ -513,22 +513,23 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
-    armnn::IConnectableLayer* layer = nullptr;
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor);
 
-    if(biasEnabled)
-    {
-        auto biases =
-            CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
-                              biasTensorInfo);
-        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                                       filter,
-                                                                       armnn::Optional<armnn::ConstTensor>(biases));
-    }
-    else
+    armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(filter);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(filterTensorInfo);
+
+    if (biasEnabled)
     {
-        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                                       filter,
-                                                                       armnn::EmptyOptional());
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(tflite::IsConstantTensor(&tfLiteBiasTensor))
+        {
+            auto biasTensor = CreateConstTensor(&tfLiteBiasTensor, biasTensorInfo);
+            armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biasTensor);
+            ARMNN_ASSERT(biasLayer != nullptr);
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo);
+        }
     }
 
     ARMNN_ASSERT(layer != nullptr);
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 4c2242e1ad..4aa23a4ee0 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -648,6 +648,9 @@ struct DepthwiseConvolution2dDescriptor : BaseDescriptor
                m_DataLayout  == rhs.m_DataLayout;
     }
 
+    /// Get the number of views/inputs.
+    uint32_t GetNumInputs() const;
+
     /// Padding left value in the width dimension.
     uint32_t   m_PadLeft;
     /// Padding right value in the width dimension.
diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
index 3961ae347a..d5e3d11fbd 100644
--- a/include/armnn/ILayerVisitor.hpp
+++ b/include/armnn/ILayerVisitor.hpp
@@ -119,6 +119,15 @@ public:
                                         const DepthToSpaceDescriptor& depthToSpaceDescriptor,
                                         const char* name = nullptr) = 0;
 
+    /// Function that a 2D depthwise convolution layer with biases should call back to when its
+    /// Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+                                                  const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+                                                  const char* name = nullptr) = 0;
+
     /// Function that a 2D depthwise convolution layer with biases should call back to when its
     /// Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
@@ -126,6 +135,7 @@ public:
     /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
     /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
     /// @param name - Optional name for the layer.
+    ARMNN_DEPRECATED_MSG("Use VisitDepthwiseConvolution2dLayer without ConstTensors")
     virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
                                                   const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
                                                   const ConstTensor& weights,
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 7488fdc026..1d6276bfe7 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -327,12 +327,21 @@ public:
     IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
                                             const char* name = nullptr);
 
+    /// Adds a 2D depthwise convolution layer to the network.
+    /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDepthwiseConvolution2dLayer(
+        const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+        const char* name = nullptr);
+
     /// Adds a 2D depthwise convolution layer to the network.
     /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
     /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
     /// @param biases Optional tensor for the bias data. If specified, must match the output tensor shape.
     /// @param name - Optional name for the layer.
     /// @return - Interface for configuring the layer.
+    ARMNN_DEPRECATED_MSG("This AddDepthwiseConvolution2dLayer overload is deprecated")
     IConnectableLayer* AddDepthwiseConvolution2dLayer(
         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
         const ConstTensor& weights,
diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
index 3d43725527..acc8aa00b3 100644
--- a/include/armnn/LayerVisitorBase.hpp
+++ b/include/armnn/LayerVisitorBase.hpp
@@ -76,6 +76,10 @@ public:
                                 const DepthToSpaceDescriptor&,
                                 const char*) override { DefaultPolicy::Apply(__func__); }
 
+    void VisitDepthwiseConvolution2dLayer(const IConnectableLayer*,
+                                          const DepthwiseConvolution2dDescriptor&,
+                                          const char*) override { DefaultPolicy::Apply(__func__); }
+
     void VisitDepthwiseConvolution2dLayer(const IConnectableLayer*,
                                           const DepthwiseConvolution2dDescriptor&,
                                           const ConstTensor&,
diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp
index e2aa67275f..03f32ac191 100644
--- a/src/armnn/BackendHelper.cpp
+++ b/src/armnn/BackendHelper.cpp
@@ -439,6 +439,32 @@ bool LayerSupportHandle::IsDepthwiseConvolutionSupported(
     TensorInfo biasesVal =  biases.has_value() ? biases.value() : TensorInfo();
     TensorInfos infos{input, output, weights, biasesVal};
 
+    Optional<const BackendOptions::BackendOption> capability ;
+    if(!m_BackendId.IsUndefined())
+    {
+        capability = GetCapability("ConstantTensorsAsInputs", m_BackendId);
+        if(!capability.has_value() || capability.value().GetValue().AsBool() == false)
+        {
+            if(!weights.IsConstant())
+            {
+                return false;
+            }
+            if(descriptor.m_BiasEnabled)
+            {
+                if(!biases.value().IsConstant())
+                {
+                    return false;
+                }
+            }
+            // At the first stage we will only print a warning. this is to give
+            // backend developers a chance to adopt and read weights from input slots.
+            ARMNN_LOG(warning) << "The backend makes use of a deprecated interface to read constant tensors. "
+                                  "If you are a backend developer please find more information in our "
+                                  "doxygen documentation on github https://github.com/ARM-software/armnn "
+                                  "under the keyword 'ConstTensorsAsInputs'.";
+        }
+    }
+
     return m_LayerSupport->IsLayerSupported(LayerType::DepthwiseConvolution2d,
                                             infos,
                                             descriptor,
@@ -492,6 +518,32 @@ bool LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(
     TensorInfo biasesVal =  biases.has_value() ? biases.value() : TensorInfo();
     TensorInfos infos{input, output, weights, biasesVal};
 
+    Optional<const BackendOptions::BackendOption> capability ;
+    if(!m_BackendId.IsUndefined())
+    {
+        capability = GetCapability("ConstantTensorsAsInputs", m_BackendId);
+        if(!capability.has_value() || capability.value().GetValue().AsBool() == false)
+        {
+            if(!weights.IsConstant())
+            {
+                return false;
+            }
+            if(descriptor.m_BiasEnabled)
+            {
+                if(!biases.value().IsConstant())
+                {
+                    return false;
+                }
+            }
+            // At the first stage we will only print a warning. this is to give
+            // backend developers a chance to adopt and read weights from input slots.
+            ARMNN_LOG(warning) << "The backend makes use of a deprecated interface to read constant tensors. "
+                                  "If you are a backend developer please find more information in our "
+                                  "doxygen documentation on github https://github.com/ARM-software/armnn "
+                                  "under the keyword 'ConstTensorsAsInputs'.";
+        }
+    }
+
     return m_LayerSupport->IsLayerSupported(LayerType::DepthwiseConvolution2d,
                                             infos,
                                             descriptor,
@@ -590,8 +642,8 @@ bool LayerSupportHandle::IsFullyConnectedSupported(const TensorInfo& input,
                 if (reasonIfUnsupported.has_value())
                 {
                     reasonIfUnsupported.value() =
-                            "This backend might not support non constant weights. "
-                            "If weights are constant make sure to set IsConstant when creating TensorInfo";
+                        "This backend might not support non constant weights. "
+                        "If weights are constant make sure to set IsConstant when creating TensorInfo";
                 }
 
                 return false;
@@ -603,8 +655,8 @@ bool LayerSupportHandle::IsFullyConnectedSupported(const TensorInfo& input,
                     if (reasonIfUnsupported.has_value())
                     {
                         reasonIfUnsupported.value() =
-                                "This backend might not support non constant weights. "
-                                "If weights are constant make sure to set IsConstant when creating TensorInfo";
+                            "This backend might not support non constant bias. "
+                            "If bias are constant make sure to set IsConstant when creating TensorInfo";
                     }
                     return false;
                 }
diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp
index ef55ee7bb5..d67d4404e0 100644
--- a/src/armnn/Descriptors.cpp
+++ b/src/armnn/Descriptors.cpp
@@ -452,4 +452,15 @@ uint32_t Convolution3dDescriptor::GetNumInputs() const
     return numInputs;
 }
 
+uint32_t DepthwiseConvolution2dDescriptor::GetNumInputs() const
+{
+    // Return 2 otherwise check if bias is enabled
+    unsigned int numInputs = 2;
+    if (m_BiasEnabled)
+    {
+        numInputs = 3;
+    }
+    return numInputs;
+}
+
 }
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 1bea6cc2ae..c1cec482b6 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -603,7 +603,8 @@ void Graph::ConstructErrorMessageForUnconnectedInputs(Layer* const layer,
     bool noWeightsAndBias = false;
 
     if ((layer->GetType() == armnn::LayerType::FullyConnected ||
-         layer->GetType() == armnn::LayerType::Convolution3d) && slotIndex > 0)
+         layer->GetType() == armnn::LayerType::Convolution3d  ||
+         layer->GetType() == armnn::LayerType::DepthwiseConvolution2d) && slotIndex > 0)
     {
         // If weights are not set and is bias enabled, also check if bias is set
         if (slotIndex == 1 && layer->GetNumInputSlots() == 3)
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index 805612d1cb..a31119b395 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -23,6 +23,19 @@ namespace armnn
 // Instantiate the static member variable
 NullDescriptor Layer::m_NullDescriptor;
 
+template <typename LayerT>
+void AssertMultipleInputSlots(Layer& layer)
+{
+    if(PolymorphicDowncast<const LayerT*>(&(layer.GetParameters()))->m_BiasEnabled)
+    {
+        ARMNN_ASSERT(layer.GetNumInputSlots() == 3);
+    }
+    else
+    {
+        ARMNN_ASSERT(layer.GetNumInputSlots() == 2);
+    }
+}
+
 void InputSlot::Insert(Layer& layer)
 {
     ARMNN_ASSERT(layer.GetNumOutputSlots() == 1);
@@ -34,8 +47,21 @@ void InputSlot::Insert(Layer& layer)
         // Disconnects parent from this.
         prevSlot->Disconnect(*this);
 
+        switch (layer.GetType())
+        {
+            case LayerType::DepthwiseConvolution2d:
+            {
+                AssertMultipleInputSlots<DepthwiseConvolution2dDescriptor>(layer);
+                break;
+            }
+            default:
+            {
+                ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
+                break;
+            }
+        }
+
         // Connects inserted layer to parent.
-        ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
         int idx = prevSlot->Connect(layer.GetInputSlot(0));
         prevSlot->SetEdgeStrategy(armnn::numeric_cast<unsigned int>(idx), EdgeStrategy::Undefined);
 
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index f10fb89e15..a88fa5ab9c 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -272,6 +272,8 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
         timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
     }
 
+    std::vector<IWorkload*> ConstWorkloads;
+
     //Then create workloads.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
@@ -325,6 +327,11 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                     else
                     {
                         m_WorkloadQueue.push_back(std::move(workload));
+
+                        if (layer->GetType() == LayerType::Constant)
+                        {
+                            ConstWorkloads.push_back(m_WorkloadQueue.back().get());
+                        }
                     }
 
                     // release the constant data in the layer..
@@ -506,6 +513,17 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
             AllocateAndExecuteConstantWorkloadsAsync();
         }
     }
+
+    // If synchronous, execute all constant layer workloads as the FoldPad optimization
+    // may have created a new conv2d layer prior to the input constant layers which will
+    // cause a failure if constant workloads are not executed
+    if (!networkProperties.m_AsyncEnabled)
+    {
+        for (auto workload: ConstWorkloads)
+        {
+            workload->Execute();
+        }
+    }
 }
 
 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
@@ -519,8 +537,6 @@ void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
     }
 }
 
-
-
 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 226d478110..1f4e72771c 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -127,6 +127,15 @@ IConnectableLayer* INetwork::AddDepthToSpaceLayer(const DepthToSpaceDescriptor&
 }
 
 
+IConnectableLayer* INetwork::AddDepthwiseConvolution2dLayer(
+    const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+    const char* name)
+{
+    return pNetworkImpl->AddDepthwiseConvolution2dLayer(convolution2dDescriptor, name);
+}
+
+
+ARMNN_NO_DEPRECATE_WARN_BEGIN
 IConnectableLayer* INetwork::AddDepthwiseConvolution2dLayer(
     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
     const ConstTensor& weights,
@@ -135,6 +144,7 @@ IConnectableLayer* INetwork::AddDepthwiseConvolution2dLayer(
 {
     return pNetworkImpl->AddDepthwiseConvolution2dLayer(convolution2dDescriptor, weights, biases, name);
 }
+ARMNN_NO_DEPRECATE_WARN_END
 
 
 IConnectableLayer* INetwork::AddDequantizeLayer(const char* name)
@@ -1727,7 +1737,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
                                                 PermuteAsReshape(),
                                                 TransposeAsReshape(),
                                                 OptimizeConsecutiveReshapes(),
-                                                RedirectMembersToConstantInputs(),
                                                 FoldPadIntoConvolution2d(),
                                                 FoldPadIntoDepthwiseConvolution2d(),
                                                 FoldPadIntoPooling2d(),
@@ -1736,7 +1745,8 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
                                                 FuseBatchNormIntoConvolution2DFloat32(),
                                                 FuseBatchNormIntoConvolution2DFloat16(),
                                                 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
-                                                FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
+                                                FuseBatchNormIntoDepthwiseConvolution2DFloat16(),
+                                                RedirectMembersToConstantInputs()));
 
     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
     if (options.m_ReduceFp32ToFp16)
@@ -2066,38 +2076,43 @@ IConnectableLayer* NetworkImpl::AddDepthToSpaceLayer(const DepthToSpaceDescripto
     return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
 }
 
-IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayerImpl(
-        const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
-        const ConstTensor& weights,
-        const Optional<ConstTensor>& biases,
-        const char* name)
+IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
+    const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+    const char* name)
 {
-    if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
-    {
-        throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
-    }
+    return m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
+}
 
-    const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
+IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
+    const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+    const ConstTensor& weights,
+    const Optional<ConstTensor>& biases,
+    const char* name)
+{
+    auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
 
+    // Add a constant layer for weights
+    ConstantLayer* weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights");
+    weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
     layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
 
-    if (convolution2dDescriptor.m_BiasEnabled)
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    // Add a constant layer for biases
+    if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled)
     {
+        ConstantLayer* biasLayer = m_Graph->AddLayer<ConstantLayer>("Bias");
+        biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value());
         layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
+
+        biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
+        biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
     }
 
     return layer;
 }
 
-IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
-        const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
-        const ConstTensor& weights,
-        const Optional<ConstTensor>& biases,
-        const char* name)
-{
-    return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
-}
-
 IConnectableLayer* NetworkImpl::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
                                                          const ConstTensor& anchors, const char* name)
 {
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index 6e4d29e490..c5ed8de50d 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -94,6 +94,11 @@ public:
     IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
                                             const char* name = nullptr);
 
+    IConnectableLayer* AddDepthwiseConvolution2dLayer(
+        const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+        const char* name = nullptr);
+
+    ARMNN_DEPRECATED_MSG("This AddDepthwiseConvolution2dLayer overload is deprecated")
     IConnectableLayer* AddDepthwiseConvolution2dLayer(
         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
         const ConstTensor& weights,
@@ -256,11 +261,6 @@ private:
                                                  const Optional<ConstTensor>& biases,
                                                  const char* name);
 
-    IConnectableLayer* AddDepthwiseConvolution2dLayerImpl(const DepthwiseConvolution2dDescriptor& conv2dDescriptor,
-                                                          const ConstTensor& weights,
-                                                          const Optional<ConstTensor>& biases,
-                                                          const char* name);
-
     bool GetShapeInferenceMethod();
     NetworkOptions m_NetworkOptions;
 
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index b23661b4a8..08f6fafa1b 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -22,7 +22,7 @@ namespace armnn
 
 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
                                                          const char* name)
-    : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
+    : LayerWithParameters(param.GetNumInputs(), 1, LayerType::DepthwiseConvolution2d, param, name)
 {
 }
 
@@ -31,10 +31,9 @@ void DepthwiseConvolution2dLayer::SerializeLayerParameters(ParameterStringifyFun
     const std::vector<TensorShape>& inputShapes =
     {
         GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
-        m_Weight->GetTensorInfo().GetShape()
+        GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()
     };
     const TensorShape filterShape = inputShapes[1];
-    DataLayoutIndexed dataLayoutIndex(m_Param.m_DataLayout);
     unsigned int inputChannels = filterShape[1];
     unsigned int filterWidth = filterShape[3];
     unsigned int filterHeight = filterShape[2];
@@ -50,16 +49,14 @@ void DepthwiseConvolution2dLayer::SerializeLayerParameters(ParameterStringifyFun
 
 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const IWorkloadFactory& factory) const
 {
-    // on this level constant data should not be released..
-    ARMNN_ASSERT_MSG(m_Weight != nullptr, "DepthwiseConvolution2dLayer: Weights data should not be null.");
-
     DepthwiseConvolution2dQueueDescriptor descriptor;
 
-    descriptor.m_Weight = m_Weight.get();
-
-    if (m_Param.m_BiasEnabled)
+    if (m_Weight)
+    {
+        descriptor.m_Weight = m_Weight.get();
+    }
+    if (m_Param.m_BiasEnabled && m_Bias)
     {
-        ARMNN_ASSERT_MSG(m_Bias != nullptr, "DepthwiseConvolution2dLayer: Bias data should not be null.");
         descriptor.m_Bias = m_Bias.get();
     }
 
@@ -124,19 +121,19 @@ DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& i
 
 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
 {
-    VerifyLayerConnections(1, CHECK_LOCATION());
+    VerifyLayerConnections(m_Param.GetNumInputs(), CHECK_LOCATION());
 
     const TensorShape& outputShape = GetOutputSlot(0).GetTensorInfo().GetShape();
 
     VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod);
 
-    // on this level constant data should not be released..
-    ARMNN_ASSERT_MSG(m_Weight != nullptr, "DepthwiseConvolution2dLayer: Weights data should not be null.");
+    ARMNN_ASSERT_MSG(GetInputSlot(1).GetConnection(),
+                     "DepthwiseConvolution2dLayer: Weights data should not be null.");
 
     auto inferredShapes = InferOutputShapes({
         GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
-        m_Weight->GetTensorInfo().GetShape()
-     });
+        GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()
+    });
 
     ARMNN_ASSERT(inferredShapes.size() == 1);
 
@@ -152,33 +149,13 @@ Layer::ConstantTensors DepthwiseConvolution2dLayer::GetConstantTensorsByRef()
 ARMNN_NO_DEPRECATE_WARN_BEGIN
 void DepthwiseConvolution2dLayer::Accept(ILayerVisitor& visitor) const
 {
-    ManagedConstTensorHandle managedWeight(m_Weight);
-    ConstTensor weightsTensor(managedWeight.GetTensorInfo(), managedWeight.Map());
-    Optional<ConstTensor> optionalBiasTensor = EmptyOptional();
-
-    ManagedConstTensorHandle managedBias(m_Bias);
-    if (GetParameters().m_BiasEnabled)
-    {
-        ConstTensor biasTensor(managedBias.GetTensorInfo(), managedBias.Map());
-        optionalBiasTensor = Optional<ConstTensor>(biasTensor);
-    }
-
-    visitor.VisitDepthwiseConvolution2dLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName());
+    visitor.VisitDepthwiseConvolution2dLayer(this, GetParameters(), GetName());
 }
 ARMNN_NO_DEPRECATE_WARN_END
 
 void DepthwiseConvolution2dLayer::ExecuteStrategy(IStrategy& strategy) const
 {
-    ManagedConstTensorHandle managedWeight(m_Weight);
-    std::vector<armnn::ConstTensor> constTensors { { managedWeight.GetTensorInfo(), managedWeight.Map() } };
-
-    ManagedConstTensorHandle managedBias(m_Bias);
-    if (GetParameters().m_BiasEnabled)
-    {
-        constTensors.emplace_back(ConstTensor(managedBias.GetTensorInfo(), managedBias.Map(true)));
-    }
-
-    strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName());
+    strategy.ExecuteStrategy(this, GetParameters(), {}, GetName());
 }
 
 } // namespace armnn
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
index 8f8f020a0f..e8ae9a6e79 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp
@@ -16,8 +16,10 @@ class DepthwiseConvolution2dLayer : public LayerWithParameters<DepthwiseConvolut
 {
 public:
     /// A unique pointer to store Weight values.
+    /// @Note Deprecated. Bias are stored in ConstantLayers now.
     std::shared_ptr<ConstTensorHandle> m_Weight;
     /// A unique pointer to store Bias values.
+    /// @Note Deprecated. Bias are stored in ConstantLayers now.
     std::shared_ptr<ConstTensorHandle> m_Bias;
 
     /// Makes a workload for the DepthwiseConvolution2d type.
@@ -60,6 +62,8 @@ protected:
 
     /// Retrieve the handles to the constant values stored by the layer.
     /// @return A vector of the constant tensors stored by this layer.
+    /// @Note Deprecated. GetConstantTensorsByRef is deprecated. m_Weights and m_Bias
+    ///                   should be connected to layer as Constant Layers instead."
     ConstantTensors GetConstantTensorsByRef() override;
 };
 
diff --git a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
index 87117debe9..bbaabb815e 100644
--- a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
+++ b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
@@ -191,21 +191,26 @@ class FoldPadIntoDepthwiseConvolution2dImpl
 public:
     void Run(Graph& graph, InputSlot& connection) const
     {
-        const auto newConv2dLayer = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
+        const auto newLayer2d = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
 
-        if (newConv2dLayer != nullptr)
+        if (newLayer2d != nullptr)
         {
-            const auto conv2dLayer = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
-            // Copy weights and bias to the new convolution layer
-            ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr,
+            const auto layer2d = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
+
+            // Move weights and bias layer connections to the new convolution layer
+            ARMNN_ASSERT_MSG(layer2d->GetInputSlot(1).GetConnection() != nullptr,
                              "FoldPadIntoDepthwiseConvolution2d: Weights data should not be null.");
-            newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight);
+            Layer& weightLayer = layer2d->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
+            weightLayer.GetOutputSlot(0).Disconnect(layer2d->GetInputSlot(1));
+            weightLayer.GetOutputSlot(0).Connect(newLayer2d->GetInputSlot(1));
 
-            if (conv2dLayer->GetParameters().m_BiasEnabled)
+            if (layer2d->GetParameters().m_BiasEnabled)
             {
-                ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr,
+                ARMNN_ASSERT_MSG(layer2d->GetInputSlot(2).GetConnection() != nullptr,
                                 "FoldPadIntoDepthwiseConvolution2d: Bias data should not be null if bias is enabled.");
-                newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias);
+                Layer& biasLayer = layer2d->GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer();
+                biasLayer.GetOutputSlot(0).Disconnect(layer2d->GetInputSlot(2));
+                biasLayer.GetOutputSlot(0).Connect(newLayer2d->GetInputSlot(2));
             }
         }
     }
diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp
index 66f722a8ef..6a50fc4a0c 100644
--- a/src/armnn/optimizations/FuseBatchNorm.hpp
+++ b/src/armnn/optimizations/FuseBatchNorm.hpp
@@ -50,12 +50,28 @@ public:
             ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(true));
             ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true));
 
-            auto convDescriptor = convLayer->GetParameters();
-            auto weightsInfo(convLayer->m_Weight->GetTensorInfo());
-            ConstTensor weightsTensor(weightsInfo, convLayer->m_Weight->Map(true));
+            auto        convDescriptor = convLayer->GetParameters();
+            ConstTensor weightsTensor;
+            if (convLayer->GetNumInputSlots() > 1)
+            {
+                ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[1].GetConnection() != nullptr,
+                                 "FuseBatchNorm: Weight data should not be null.");
+                InputSlot    & oldSlotWeights      = const_cast<InputSlot&>(convLayer->GetInputSlots()[1]);
+                OutputSlot   & constantSlotWeights = const_cast<OutputSlot&>(*oldSlotWeights.GetConnectedOutputSlot());
+                ConstantLayer* weightLayer         = PolymorphicDowncast<ConstantLayer*>(
+                    &constantSlotWeights.GetOwningLayer());
+                weightsTensor = ConstTensor(weightLayer->m_LayerOutput->GetTensorInfo(),
+                                            weightLayer->m_LayerOutput->Map(true));
+            }
+            else
+            {
+                ARMNN_ASSERT_MSG(convLayer->m_Weight != nullptr,
+                                 "FuseBatchNorm: Bias data should not be null if bias is enabled.");
+                weightsTensor = ConstTensor(convLayer->m_Weight->GetTensorInfo(), convLayer->m_Weight->Map(true));
+            }
 
             armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);
-            auto weightsShape = weightsInfo.GetShape();
+            auto weightsShape = weightsTensor.GetInfo().GetShape();
             const unsigned int inputChannels   = parentOut->GetTensorInfo().GetShape()[dataLayout.GetChannelsIndex()];
             const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;
             const unsigned int outputChannels  = depthwise ? weightsShape[3] : weightsShape[0];
@@ -116,16 +132,32 @@ public:
                     }
                 }
             }
-            ConstTensor fusedWeightsTensor(weightsInfo, fusedWeightsVector);
+            ConstTensor fusedWeightsTensor(weightsTensor.GetInfo(), fusedWeightsVector);
 
             //  fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta;
             std::vector<T> fusedBiasVector(outputChannels);
-            if (convDescriptor.m_BiasEnabled)
+            bool biasWasEnabledBeforeOpt = convDescriptor.m_BiasEnabled;
+            if (biasWasEnabledBeforeOpt)
             {
-                ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr,
-                                 "FuseBatchNorm: Bias data should not be null if bias is enabled.");
+                ConstTensor biasTensor;
+                if (convLayer->GetNumInputSlots() > 1)
+                {
+                    ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[2].GetConnection() != nullptr,
+                                     "FuseBatchNorm: Bias data should not be null if bias is enabled.");
+                    InputSlot    & oldSlotBias      = const_cast<InputSlot&>(convLayer->GetInputSlots()[2]);
+                    OutputSlot   & constantSlotBias = const_cast<OutputSlot&>(*oldSlotBias.GetConnectedOutputSlot());
+                    ConstantLayer* biasLayer        = PolymorphicDowncast<ConstantLayer*>(
+                        &constantSlotBias.GetOwningLayer());
+                    biasTensor = ConstTensor(biasLayer->m_LayerOutput->GetTensorInfo(),
+                                             biasLayer->m_LayerOutput->Map(true));
+                }
+                else
+                {
+                    ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr,
+                                     "FuseBatchNorm: Bias data should not be null if bias is enabled.");
+                    biasTensor = ConstTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true));
+                }
 
-                ConstTensor biasTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true));
                 const auto* biasBuffer = static_cast<const T*>(biasTensor.GetMemoryArea());
                 std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
 
@@ -156,6 +188,40 @@ public:
             newConv2dLayer.m_Weight = std::make_unique<ScopedTensorHandle>(fusedWeightsTensor);
             newConv2dLayer.m_Bias = std::make_unique<ScopedTensorHandle>(ConstTensor(fusedBiasTensor));
 
+            // Connect weights and bias from old to new Conv2d layer
+            // This optimization will always have 3 input slots on the Conv2d base layer
+            if (newConv2dLayer.GetNumInputSlots() > 1)
+            {
+                ConstantLayer* weightLayer = PolymorphicDowncast<ConstantLayer*>(
+                    &base.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer());
+                // Remove old connection and connect to new layer2d
+                weightLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(1));
+                weightLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(1));
+                weightLayer->m_LayerOutput = newConv2dLayer.m_Weight;
+
+                // Move bias const layers as normal if it was enabled before the optimisation
+                ConstantLayer* biasLayer;
+                if (biasWasEnabledBeforeOpt)
+                {
+                    biasLayer = PolymorphicDowncast<ConstantLayer*>(
+                        &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer());
+                    // Remove old connection and connect to new layer2d
+                    biasLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(2));
+                    biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));
+
+                }
+                // Otherwise create a new bias layer and add to the new convolution2d
+                else
+                {
+                    // Add in bias constant layer
+                    biasLayer = graph.AddLayer<ConstantLayer>("Bias");
+                    biasLayer->GetOutputSlot(0).SetTensorInfo(fusedBiasTensor.GetInfo());
+                    biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));
+                }
+                biasLayer->m_LayerOutput = newConv2dLayer.m_Bias;
+            }
+
+
             // Reconnects with original parent.
             newConv2dLayer.GetOutputSlot().MoveAllConnections(*parentOut);
             // Parent is now the new convolution2d layer.
diff --git a/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp b/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp
index 85d715c6b1..cb97a0fe32 100644
--- a/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp
+++ b/src/armnn/optimizations/RedirectMembersToConstantInputs.hpp
@@ -31,6 +31,7 @@ public:
             case LayerType::Convolution2d:
                 break;
             case LayerType::DepthwiseConvolution2d:
+                RedirectWeightsAndBiases<DepthwiseConvolution2dLayer>(&layer);
                 break;
             case LayerType::DetectionPostProcess:
                 break;
@@ -80,7 +81,7 @@ private:
     }
 };
 
-using RedirectMembersToConstantInputs = OptimizeForType<FullyConnectedLayer, RedirectMembersToConstantInputsImpl>;
+using RedirectMembersToConstantInputs = OptimizeForType<Layer, RedirectMembersToConstantInputsImpl>;
 
 } // namespace optimizations
 } // namespace armnn
diff --git a/src/armnn/test/ConstTensorLayerVisitor.cpp b/src/armnn/test/ConstTensorLayerVisitor.cpp
index cbc97b3c0e..af0581ce4c 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.cpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.cpp
@@ -230,11 +230,16 @@ TEST_CASE("CheckDepthwiseConvolution2dLayer")
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
-    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, weights, EmptyOptional());
-
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, weights, EmptyOptional());
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
+
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
@@ -254,14 +259,16 @@ TEST_CASE("CheckNamedDepthwiseConvolution2dLayer")
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
-    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, weights, EmptyOptional(), layerName);
-
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor,
-                                                                        weights,
-                                                                        EmptyOptional(),
-                                                                        layerName);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
+
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
@@ -284,13 +291,21 @@ TEST_CASE("CheckDepthwiseConvolution2dLayerWithBiases")
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
-    Optional<ConstTensor> optionalBiases(biases);
 
-    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, weights, optionalBiases);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestConstantLayerVisitor biasesVisitor(biases);
+    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBiases);
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
+    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
+
+    weightsLayer->ExecuteStrategy(weightsVisitor);
+    biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
@@ -314,13 +329,21 @@ TEST_CASE("CheckNamedDepthwiseConvolution2dLayerWithBiases")
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
-    Optional<ConstTensor> optionalBiases(biases);
 
-    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, weights, optionalBiases, layerName);
+    TestConstantLayerVisitor weightsVisitor(weights);
+    TestConstantLayerVisitor biasesVisitor(biases);
+    TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
-    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBiases, layerName);
+    IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
+    IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
+    IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
+
+    weightsLayer->ExecuteStrategy(weightsVisitor);
+    biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
diff --git a/src/armnn/test/ConstTensorLayerVisitor.hpp b/src/armnn/test/ConstTensorLayerVisitor.hpp
index 4d887c8e37..00d17b4ae8 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.hpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.hpp
@@ -74,13 +74,9 @@ class TestDepthwiseConvolution2dLayerVisitor : public TestLayerVisitor
 {
 public:
     explicit TestDepthwiseConvolution2dLayerVisitor(const DepthwiseConvolution2dDescriptor& descriptor,
-                                                    const ConstTensor& weights,
-                                                    const Optional<ConstTensor>& biases,
                                                     const char* name = nullptr)
         : TestLayerVisitor(name)
         , m_Descriptor(descriptor)
-        , m_Weights(weights)
-        , m_Biases(biases)
     {}
 
     virtual ~TestDepthwiseConvolution2dLayerVisitor() {}
@@ -99,12 +95,6 @@ public:
                 CheckLayerPointer(layer);
                 CheckLayerName(name);
                 CheckDescriptor(static_cast<const armnn::DepthwiseConvolution2dDescriptor&>(descriptor));
-                CheckConstTensors(m_Weights, constants[0]);
-                if (m_Biases.has_value())
-                {
-                    CHECK(constants.size() == 2);
-                    CheckConstTensors(m_Biases.value(), constants[1]);
-                }
                 break;
             }
             default:
@@ -119,8 +109,6 @@ protected:
 
 private:
     DepthwiseConvolution2dDescriptor m_Descriptor;
-    ConstTensor m_Weights;
-    Optional<ConstTensor> m_Biases;
 };
 
 class TestFullyConnectedLayerVistor : public TestLayerVisitor
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index d3dd499850..95421c5683 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -632,8 +632,10 @@ TEST_CASE("IConnectableLayerConstantTensorsByRef")
     TensorInfo weightsInfo = constInfo;
     ConstTensor weights(weightsInfo, weightData);
     DepthwiseConvolution2dDescriptor desc;
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    // GetConstantTensorsByRef() returns {m_Weights, m_Bias} so we need to use the old AddDepthwiseConvolution2dLayer()
     const auto depthwiseLayer = net->AddDepthwiseConvolution2dLayer(desc, weights, EmptyOptional(), "Depthwise");
-
+    ARMNN_NO_DEPRECATE_WARN_END
     const void* resultData = depthwiseLayer->GetConstantTensorsByRef()[0].get()->GetConstTensor<void>();
     auto resultValue = reinterpret_cast<const uint8_t*>(resultData);
     CHECK(resultValue[0] == 3);
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index a7277b78b5..6a13dc6456 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -478,11 +478,10 @@ void CreateDepthwiseConvolution2dGraph(Graph &graph, const unsigned int* inputSh
 {
     armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32);
     armnn::TensorInfo outputInfo(4, outputShape, DataType::Float32);
+    armnn::TensorInfo weightsInfo(TensorShape(4, weightsShape), armnn::DataType::Float32, 0.0f, 0, true);
 
     std::vector<float> weightsVector(18);
-    armnn::ConstTensor weights(
-            armnn::TensorInfo(4, weightsShape, armnn::DataType::Float32, 0.0f, 0, true),
-            weightsVector);
+    armnn::ConstTensor weights(weightsInfo, weightsVector);
 
     DepthwiseConvolution2dDescriptor desc;
     desc.m_BiasEnabled = false;
@@ -490,15 +489,19 @@ void CreateDepthwiseConvolution2dGraph(Graph &graph, const unsigned int* inputSh
     desc.m_StrideY     = 1;
     desc.m_DataLayout  = dataLayout;
 
-    Layer* input = graph.AddLayer<InputLayer>(0, "input");
-    input->GetOutputSlot().SetTensorInfo(inputInfo);
-
+    InputLayer* input                  = graph.AddLayer<InputLayer>(0, "input");
     DepthwiseConvolution2dLayer* layer = graph.AddLayer<DepthwiseConvolution2dLayer>(desc, "depthwiseConv2d");
-    layer->m_Weight                    = std::make_unique<armnn::ScopedTensorHandle>(weights);
+    ConstantLayer* weightsLayer        = graph.AddLayer<ConstantLayer>("weights");
+    OutputLayer* output                = graph.AddLayer<OutputLayer>(0, "output");
+
+    input->GetOutputSlot().SetTensorInfo(inputInfo);
     layer->GetOutputSlot().SetTensorInfo(outputInfo);
+    weightsLayer->GetOutputSlot().SetTensorInfo(weightsInfo);
+
+    weightsLayer->m_LayerOutput = std::make_unique<armnn::ScopedTensorHandle>(weights);
 
-    Layer* output = graph.AddLayer<OutputLayer>(0, "output");
     input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot().Connect(layer->GetInputSlot(1));
     layer->GetOutputSlot().Connect(output->GetInputSlot(0));
 }
 
diff --git a/src/armnn/test/ShapeInferenceTests.cpp b/src/armnn/test/ShapeInferenceTests.cpp
index 687462dfb5..d45c9900c0 100644
--- a/src/armnn/test/ShapeInferenceTests.cpp
+++ b/src/armnn/test/ShapeInferenceTests.cpp
@@ -331,18 +331,11 @@ TEST_CASE("DepthwiseConvolutionTest")
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = false;
 
-    Graph graph;
-
-    auto layer = BuildGraph<DepthwiseConvolution2dLayer>(&graph,
-                                                        {{ 8, 16, 2, 1 }},
-                                                        descriptor,
-                                                        "depthwiseconv2d");
-
-    const float Datum = 0.0f;
-    ConstTensor weights({{ 2, 5, 3, 2 }, DataType::Float32, 0.0f, 0, true}, &Datum);
-    layer->m_Weight = std::make_unique<ScopedTensorHandle>(weights);
-
-    RunShapeInferenceTest<DepthwiseConvolution2dLayer>(layer, {{ 8, 18, 1, 2 }});
+    CreateGraphAndRunTest<DepthwiseConvolution2dLayer>({{ 8, 16, 2, 1 },   // input
+                                                        { 2, 5, 3, 2 }},   // weights
+                                                       {{ 8, 18, 1, 2 }}, // output
+                                                       descriptor,
+                                                       "conv2d");
 }
 
 TEST_CASE("DequantizeTest")
diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp
index 212ae0ee01..048c4f51fd 100644
--- a/src/armnn/test/SubgraphViewTests.cpp
+++ b/src/armnn/test/SubgraphViewTests.cpp
@@ -1928,6 +1928,7 @@ bool ReplaceConstantMultiplicationWithDepthwise(SubgraphView& subgraph,
     if (layer->GetType() == LayerType::Multiplication)
     {
         IInputSlot* patternSubgraphInput = &layer->GetInputSlot(0);
+        IInputSlot* patternSubgraphConstant = &layer->GetInputSlot(1);
 
         const IConnectableLayer* inputLayer    = &patternSubgraphInput->GetConnection()->GetOwningIConnectableLayer();
         const IConnectableLayer* constantLayer = &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer();
@@ -1935,7 +1936,7 @@ bool ReplaceConstantMultiplicationWithDepthwise(SubgraphView& subgraph,
         // Figure out which of the two inputs is the constant
         if (constantLayer->GetType() != LayerType::Constant)
         {
-            patternSubgraphInput = &layer->GetInputSlot(1);
+            std::swap(patternSubgraphInput, patternSubgraphConstant);
             std::swap(inputLayer, constantLayer);
         }
 
@@ -1965,7 +1966,7 @@ bool ReplaceConstantMultiplicationWithDepthwise(SubgraphView& subgraph,
                 ConstTensor           weights(weightsInfo, weightData);
 
                 const auto depthwiseLayer = replacementGraph->AddDepthwiseConvolution2dLayer(
-                        desc, weights, armnn::EmptyOptional(), "Replacement for Constant-Multiplication");
+                        desc, "Replacement for Constant-Multiplication");
 
                 auto& outslot = layer->GetOutputSlot(0);
                 SubgraphView::IOutputSlots outputs{ &outslot };
@@ -1973,7 +1974,9 @@ bool ReplaceConstantMultiplicationWithDepthwise(SubgraphView& subgraph,
                 layers.push_back(layer);
                 layers.push_back(const_cast<IConnectableLayer*>(constantLayer));
 
-                SubgraphView patternSubgraph(std::move(layers), {patternSubgraphInput}, {&layer->GetOutputSlot(0)});
+                SubgraphView patternSubgraph(std::move(layers),
+                                             {patternSubgraphInput, patternSubgraphConstant},
+                                             {&layer->GetOutputSlot(0)});
 
                 subgraph.SubstituteSubgraph(patternSubgraph, depthwiseLayer );
 
diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp
index 2f9e1c6d31..9919c6d0e6 100644
--- a/src/armnn/test/optimizations/FoldPadTests.cpp
+++ b/src/armnn/test/optimizations/FoldPadTests.cpp
@@ -126,14 +126,18 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer")
 
     auto* depthwiseConv2dLayer = graph.AddLayer<DepthwiseConvolution2dLayer>(depthwiseConvolution2dDescriptor,
                                                                              "depthwiseConv2d");
-    depthwiseConv2dLayer->m_Weight = std::make_unique<ScopedTensorHandle>(weights);
+    auto* weightsLayer = graph.AddLayer<ConstantLayer>("weights");
+
+    weightsLayer->GetOutputSlot().SetTensorInfo(weights.GetInfo());
     depthwiseConv2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
+    depthwiseConv2dLayer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
 
     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
 
     // Connect up layers - input -> pad -> depthwiseConv2d -> output
     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
     padLayer->GetOutputSlot().Connect(depthwiseConv2dLayer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot().Connect(depthwiseConv2dLayer->GetInputSlot(1));
     depthwiseConv2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
 
     auto checkSimpleDepthwiseConv2d = [](const Layer* const layer)->bool {
@@ -151,6 +155,7 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer")
                              &IsLayerOfType<InputLayer>,
                              &IsLayerOfType<PadLayer>,
                              checkSimpleDepthwiseConv2d,
+                             &IsLayerOfType<ConstantLayer>,
                              &IsLayerOfType<OutputLayer>));
 
     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoDepthwiseConvolution2d()));
@@ -170,6 +175,7 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer")
     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
                              &IsLayerOfType<InputLayer>,
                              checkPadFoldedIntoDepthwiseConv2d,
+                             &IsLayerOfType<ConstantLayer>,
                              &IsLayerOfType<OutputLayer>));
 }
 
@@ -741,11 +747,8 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConv2dLayer_ExecuteInferenceWithAndWithoutOp
         std::vector<float>    biasVector   = {5, 6, 7, 8, 9, 10, 11, 12, 5, 6, 7, 8};
         TensorInfo            biasInfo({12}, DataType::Float32, 0.0f, 0, true);
         ConstTensor           bias(biasInfo, biasVector);
-        Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
 
         IConnectableLayer* conv2dLayer = network->AddDepthwiseConvolution2dLayer(convDescriptor,
-                                                                                 weights,
-                                                                                 optionalBias,
                                                                                  "DepthwiseConv2D");
 
         TensorInfo outputInfo(4, outputShape, DataType::Float32);
@@ -758,6 +761,14 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConv2dLayer_ExecuteInferenceWithAndWithoutOp
         padLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(0));
         conv2dLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
 
+        auto weightsLayer = network->AddConstantLayer(weights, "Weights");
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
+        weightsLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(1));
+
+        auto biasLayer = network->AddConstantLayer(bias, "Bias");
+        biasLayer->GetOutputSlot(0).SetTensorInfo(bias.GetInfo());
+        biasLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(2));
+
         // Create ArmNN runtime
         IRuntimePtr          run              = IRuntime::Create(IRuntime::CreationOptions());    // default options
         // Optimise the network
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index ac327bb609..e5f54208f0 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -90,7 +90,7 @@ struct DWConvolution2dTest
 public:
     using LayerType = DepthwiseConvolution2dLayer;
     static const bool isElementWise = false;
-    static const bool isConstTensorAsInputSupported = false;
+    static const bool isConstTensorAsInputSupported = true;
 
     static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }   // [N,H,W,Cin]
     static TensorShape GetOutputShape()  { return TensorShape( {1, 3, 3, 12}); }  // [N,H,W,Cout]
@@ -104,32 +104,35 @@ public:
                                                float scale = 1.f,
                                                int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         DepthwiseConvolution2dDescriptor descriptor;
         descriptor.m_BiasEnabled = false;
         descriptor.m_DataLayout  = DataLayout::NHWC;
         descriptor.m_StrideX     = 1;
         descriptor.m_StrideY     = 1;
 
-        std::vector<float> weightsData   = { 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
-                                            11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
-                                            21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                                            31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
-        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
-        ConstTensor        weights(weightsInfo, weightsVector);
-        Optional<ConstTensor> optionalBias;
-
-        return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
+        return network->AddDepthwiseConvolution2dLayer(descriptor, name);
     }
 
     static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
                                                              float scale = 1.f,
                                                              int32_t offset = 0)
     {
-        IgnoreUnused(network);
-        IgnoreUnused(scale);
-        IgnoreUnused(offset);
-        return {};
+        std::vector<float> weightsData   = { 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
+                                             11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                             21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                             31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
+        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
+        ConstTensor        weights(weightsInfo, weightsVector);
+
+        IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+        std::vector<IConnectableLayer*> layers = { weightsLayer };
+        return layers;
     }
 };
 
@@ -390,10 +393,10 @@ INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preven
                                                                      "activation");
 
     IConnectableLayer* outputLayer  = network->AddOutputLayer(0);
-    IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
+    IConnectableLayer* output2Layer = preventFusing ? network->AddOutputLayer(1) : nullptr;
 
     // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
-    if(LayerTest::isConstTensorAsInputSupported)
+    if (LayerTest::isConstTensorAsInputSupported)
     {
         std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
                                                                                       scale,
diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
index 70cffea2b2..b28bb17773 100644
--- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp
+++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
@@ -24,6 +24,7 @@ class Conv2dTest
 public:
     using ConvDescriptorType            = armnn::Convolution2dDescriptor;
     using ConvLayerType                 = armnn::Convolution2dLayer;
+    static const bool isConstTensorAsInputSupported = false;
 
     static IConnectableLayer *AddConvolution(INetwork *network,
                                              const Convolution2dDescriptor &descriptor,
@@ -33,6 +34,19 @@ public:
     {
         return network->AddConvolution2dLayer(descriptor, weights, biases, name);
     }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork *network,
+                                                             const Convolution2dDescriptor &descriptor,
+                                                             const ConstTensor &weights,
+                                                             const Optional<ConstTensor> &biases)
+    {
+        IgnoreUnused(network);
+        IgnoreUnused(descriptor);
+        IgnoreUnused(weights);
+        IgnoreUnused(biases);
+
+        return {};
+    }
 };
 
 class DepthwiseConv2dTest
@@ -40,6 +54,7 @@ class DepthwiseConv2dTest
 public:
     using ConvDescriptorType            = armnn::DepthwiseConvolution2dDescriptor;
     using ConvLayerType                 = armnn::DepthwiseConvolution2dLayer;
+    static const bool isConstTensorAsInputSupported = true;
 
     static IConnectableLayer *AddConvolution(INetwork *network,
                                              const DepthwiseConvolution2dDescriptor &descriptor,
@@ -47,7 +62,29 @@ public:
                                              const Optional<ConstTensor> &biases,
                                              const char *name)
     {
-        return network->AddDepthwiseConvolution2dLayer(descriptor, weights, biases, name);
+        IgnoreUnused(weights);
+        IgnoreUnused(biases);
+
+        return network->AddDepthwiseConvolution2dLayer(descriptor, name);
+    }
+
+    static std::vector<IConnectableLayer*> AddConstantLayers(INetwork *network,
+                                                             const DepthwiseConvolution2dDescriptor &descriptor,
+                                                             const ConstTensor &weights,
+                                                             const Optional<ConstTensor> &biases)
+    {
+        auto weightsLayer = network->AddConstantLayer(weights, "Weights");
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
+        std::vector<IConnectableLayer*> layers = {weightsLayer};
+
+        if (descriptor.m_BiasEnabled)
+        {
+            auto biasLayer = network->AddConstantLayer(biases.value(), "Bias");
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biases.value().GetInfo());
+            layers.emplace_back(biasLayer);
+        }
+
+        return layers;
     }
 };
 
@@ -73,7 +110,7 @@ template <typename Conv2dTest,
           armnn::DataType ArmnnType,
           typename ConvDescriptorType = typename Conv2dTest::ConvDescriptorType,
           typename T = armnn::ResolveType<ArmnnType>>
-INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
+INetworkPtr CreateNetwork(bool depthwise, bool preventFusing)
 {
     // Define layers information
     ConvDescriptorType convolution2dDescriptor;
@@ -110,11 +147,6 @@ INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
     TensorInfo weightsInfo(4, weightsDimensionSizes, ArmnnType, 0.0f, 0, true);
     ConstTensor weights(weightsInfo, weightsVector);
 
-    std::vector<T> biasVector = GetVector<T>(outputDimensionSizes[3], 3.3f, 0.1f);
-    TensorInfo biasInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true);
-    ConstTensor bias(biasInfo, biasVector);
-    Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
-
     std::vector<T> betaVector     = GetVector<T>(outputDimensionSizes[3], 0.0f, 0.2f);
     std::vector<T> gammaVector    = GetVector<T>(outputDimensionSizes[3], 0.5f, 0.1f);
     std::vector<T> meanVector     = GetVector<T>(outputDimensionSizes[3], 0.1f, 0.1f);
@@ -133,7 +165,7 @@ INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
     IConnectableLayer* convLayer      = Conv2dTest::AddConvolution(network.get(),
                                                                    convolution2dDescriptor,
                                                                    weights,
-                                                                   optionalBias,
+                                                                   Optional<ConstTensor>(),
                                                                    "convolution");
 
     IConnectableLayer* batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
@@ -151,6 +183,21 @@ INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
         output2Layer                  = network->AddOutputLayer(1);
     }
 
+    // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
+    if (Conv2dTest::isConstTensorAsInputSupported)
+    {
+        std::vector<IConnectableLayer*> constantLayers = Conv2dTest::AddConstantLayers(network.get(),
+                                                                                       convolution2dDescriptor,
+                                                                                       weights,
+                                                                                       Optional<ConstTensor>());
+
+        // Connect constant layers to receiverLayer.
+        for (unsigned int i = 0; i < constantLayers.size(); ++i)
+        {
+            constantLayers[i]->GetOutputSlot(0).Connect(convLayer->GetInputSlot(i + 1));
+        }
+    }
+
     // Set layer information
     inputLayer    ->GetOutputSlot(0).SetTensorInfo(inputInfo);
     convLayer     ->GetOutputSlot(0).SetTensorInfo(outputInfo);
@@ -178,7 +225,7 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b
 {
     // FIRST NETWORK: Fused
     // Construct ArmNN network
-    INetworkPtr networkFused = CreatNetwork<Conv2dTest, ArmnnType>(depthwise, false);
+    INetworkPtr networkFused = CreateNetwork<Conv2dTest, ArmnnType>(depthwise, false);
 
     // Create ArmNN runtime
     IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -194,12 +241,26 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b
                (layer->GetNameStr() == "fused-batchNorm-into-convolution");
     };
 
-    CHECK(3 == graphFused.GetNumLayers());
-    CHECK(CheckSequence(graphFused.cbegin(),
-                             graphFused.cend(),
-                             &IsLayerOfType<InputLayer>,
-                             checkFusedConv2d,
-                             &IsLayerOfType<OutputLayer>));
+    if (Conv2dTest::isConstTensorAsInputSupported)
+    {
+        CHECK(5 == graphFused.GetNumLayers());
+        CHECK(CheckSequence(graphFused.cbegin(),
+                            graphFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            &IsLayerOfType<ConstantLayer>,
+                            &IsLayerOfType<ConstantLayer>,
+                            checkFusedConv2d,
+                            &IsLayerOfType<OutputLayer>));
+    }
+    else
+    {
+        CHECK(3 == graphFused.GetNumLayers());
+        CHECK(CheckSequence(graphFused.cbegin(),
+                            graphFused.cend(),
+                            &IsLayerOfType<InputLayer>,
+                            checkFusedConv2d,
+                            &IsLayerOfType<OutputLayer>));
+    }
 
     // Load network into runtime
     NetworkId networkIdentifier;
@@ -227,7 +288,7 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b
 
     // SECOND NETWORK: NotFused
     // Construct ArmNN network
-    INetworkPtr networkNotFused = CreatNetwork<Conv2dTest, ArmnnType>(depthwise, true);
+    INetworkPtr networkNotFused = CreateNetwork<Conv2dTest, ArmnnType>(depthwise, true);
 
     // Create ArmNN runtime
     IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -237,14 +298,29 @@ void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute b
 
     Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
 
-    CHECK(5 == graphNotFused.GetNumLayers());
-    CHECK(CheckSequence(graphNotFused.cbegin(),
-                             graphNotFused.cend(),
-                             &IsLayerOfType<armnn::InputLayer>,
-                             &IsLayerOfType<ConvLayerType>,
-                             &IsLayerOfType<armnn::BatchNormalizationLayer>,
-                             &IsLayerOfType<armnn::OutputLayer>,
-                             &IsLayerOfType<armnn::OutputLayer>));
+    if (Conv2dTest::isConstTensorAsInputSupported)
+    {
+        CHECK(6 == graphNotFused.GetNumLayers());
+        CHECK(CheckSequence(graphNotFused.cbegin(),
+                            graphNotFused.cend(),
+                            &IsLayerOfType<armnn::InputLayer>,
+                            &IsLayerOfType<armnn::ConstantLayer>,
+                            &IsLayerOfType<ConvLayerType>,
+                            &IsLayerOfType<armnn::BatchNormalizationLayer>,
+                            &IsLayerOfType<armnn::OutputLayer>,
+                            &IsLayerOfType<armnn::OutputLayer>));
+    }
+    else
+    {
+        CHECK(5 == graphNotFused.GetNumLayers());
+        CHECK(CheckSequence(graphNotFused.cbegin(),
+                            graphNotFused.cend(),
+                            &IsLayerOfType<armnn::InputLayer>,
+                            &IsLayerOfType<ConvLayerType>,
+                            &IsLayerOfType<armnn::BatchNormalizationLayer>,
+                            &IsLayerOfType<armnn::OutputLayer>,
+                            &IsLayerOfType<armnn::OutputLayer>));
+    }
 
     // Load network into runtime
     NetworkId networkIdentifierNotFused;
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 93fa99dcc3..704b6c35c1 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -1372,11 +1372,48 @@ void IDeserializer::DeserializerImpl::ParseConstant(GraphPtr graph, unsigned int
     auto serializerInput = serializerLayer->input();
 
     armnn::ConstTensor input = ToConstTensor(serializerInput);
+    IConnectableLayer* layer;
 
-    IConnectableLayer* layer = m_Network->AddConstantLayer(input, layerName.c_str());
+    // Required for when Constant Layer is used as an inputs to DepthwiseConvolution2d Layer.
+    // Running a model that was created before weights layout scheme version was added to our flatbuffers
+    // file ensuring older models can still be read and executed. featureVersion weights layout scheme 1
+    // indicates a change in the depthwise weights layout within ArmNN from [M,I,H,W] --> [1,H,W,I*M]
+    if (this->GetFeatureVersions(graph).m_WeightsLayoutScheme <= 0)
+    {
+        // Permute weights  [ H, W, M, I ] --> [ 1, H, W, I*M ]
+        // Step1: [ M, I, H, W ] --> [ H, W, I, M]
+        PermutationVector permutationVector = { 3, 2, 0, 1 };
+        armnn::TensorInfo weightsInfo = input.GetInfo();
+        std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightsInfo.GetNumBytes()]);
+        weightsInfo = armnnUtils::Permuted(weightsInfo, permutationVector);
+        armnnUtils::Permute(weightsInfo.GetShape(), permutationVector,
+                            input.GetMemoryArea(), permuteBuffer.get(),
+                            GetDataTypeSize(weightsInfo.GetDataType()));
 
-    armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
-    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+        // Step2: Reshape [ H, W, I, M] --> [ 1, H, W, I*M ]
+        auto weightsShape = weightsInfo.GetShape();
+        weightsInfo.SetShape({1,
+                              weightsShape[0],
+                              weightsShape[1],
+                              weightsShape[2]*weightsShape[3]});
+
+        armnn::ConstTensor weightsPermuted(weightsInfo, permuteBuffer.get());
+
+        layer = m_Network->AddConstantLayer(weightsPermuted, layerName.c_str());
+
+        layer->GetOutputSlot(0).SetTensorInfo(weightsPermuted.GetInfo());
+
+        RegisterOutputSlots(graph, layerIndex, layer);
+
+        return;
+    }
+    else
+    {
+        layer = m_Network->AddConstantLayer(input, layerName.c_str());
+
+        armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
+        layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+    }
 
     RegisterOutputSlots(graph, layerIndex, layer);
 }
@@ -1499,7 +1536,6 @@ void IDeserializer::DeserializerImpl::ParseDepthwiseConvolution2d(GraphPtr graph
     CHECK_LAYERS(graph, 0, layerIndex);
     auto inputs = GetInputs(graph, layerIndex);
     CHECK_LOCATION();
-    CHECK_VALID_SIZE(inputs.size(), 1);
 
     auto outputs = GetOutputs(graph, layerIndex);
     CHECK_VALID_SIZE(outputs.size(), 1);
@@ -1509,67 +1545,89 @@ void IDeserializer::DeserializerImpl::ParseDepthwiseConvolution2d(GraphPtr graph
     auto serializerDescriptor = serializerLayer->descriptor();
 
     armnn::DepthwiseConvolution2dDescriptor descriptor;
-    descriptor.m_PadLeft     = serializerDescriptor->padLeft();
-    descriptor.m_PadRight    = serializerDescriptor->padRight();
-    descriptor.m_PadTop      = serializerDescriptor->padTop();
-    descriptor.m_PadBottom   = serializerDescriptor->padBottom();
-    descriptor.m_StrideX     = serializerDescriptor->strideX();
-    descriptor.m_StrideY     = serializerDescriptor->strideY();
-    descriptor.m_DilationX   = serializerDescriptor->dilationX();
-    descriptor.m_DilationY   = serializerDescriptor->dilationY();
-    descriptor.m_BiasEnabled = serializerDescriptor->biasEnabled();;
-    descriptor.m_DataLayout  = ToDataLayout(serializerDescriptor->dataLayout());
+    descriptor.m_PadLeft = serializerDescriptor->padLeft();
+    descriptor.m_PadRight = serializerDescriptor->padRight();
+    descriptor.m_PadTop = serializerDescriptor->padTop();
+    descriptor.m_PadBottom = serializerDescriptor->padBottom();
+    descriptor.m_StrideX = serializerDescriptor->strideX();
+    descriptor.m_StrideY = serializerDescriptor->strideY();
+    descriptor.m_DilationX = serializerDescriptor->dilationX();
+    descriptor.m_DilationY = serializerDescriptor->dilationY();
+    descriptor.m_BiasEnabled = serializerDescriptor->biasEnabled();
+    descriptor.m_DataLayout = ToDataLayout(serializerDescriptor->dataLayout());
 
     IConnectableLayer* layer;
+    std::vector<unsigned int> ignoreSlots {};
 
-    armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
-    if (descriptor.m_BiasEnabled)
-    {
-        armnn::ConstTensor biases = ToConstTensor(serializerLayer->biases());
-        optionalBiases = armnn::Optional<armnn::ConstTensor>(biases);
-    }
-
-    armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
-    // The data layout for weights in ArmNN used to be [M,I,H,W] but now it's changed to [1,H,W,I*M]
-    // When reading older flatbuffer files we need to add a permutation to get to the new layout.
-    if (this->GetFeatureVersions(graph).m_WeightsLayoutScheme <= 0)
+    // Weights and biases used to be always constant and were stored as members of the layer. This has changed and
+    // they are now passed as inputs. If they are constant then they will be stored in a ConstantLayer.
+    if (this->GetFeatureVersions(graph).m_ConstTensorsAsInputs <= 0)
     {
-        // Permute weights  [ H, W, M, I ] --> [ 1, H, W, I*M ]
-        // Step1: [ M, I, H, W ] --> [ H, W, I, M]
-        PermutationVector permutationVector = { 3, 2, 0, 1 };
-        armnn::TensorInfo weightsInfo = weights.GetInfo();
-        std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightsInfo.GetNumBytes()]);
-        weightsInfo = armnnUtils::Permuted(weightsInfo, permutationVector);
-        armnnUtils::Permute(weightsInfo.GetShape(), permutationVector,
-                            weights.GetMemoryArea(), permuteBuffer.get(),
-                            GetDataTypeSize(weightsInfo.GetDataType()));
-
-        // Step2: Reshape [ H, W, I, M] --> [ 1, H, W, I*M ]
-        auto weightsShape = weightsInfo.GetShape();
-        weightsInfo.SetShape({1,
-                              weightsShape[0],
-                              weightsShape[1],
-                              weightsShape[2]*weightsShape[3]});
+        CHECK_VALID_SIZE(inputs.size(), 1);
 
-        armnn::ConstTensor weightsPermuted(weightsInfo, permuteBuffer.get());
+        // If the model stores weights and biases as members of the layer we have to read them from there
+        // but add them to their own ConstantLayer for compatibility
+        armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
+        ignoreSlots.emplace_back(1u);
 
         layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                          weightsPermuted,
-                                                          optionalBiases,
                                                           layerName.c_str());
+
+        armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
+        if (descriptor.m_BiasEnabled)
+        {
+            armnn::ConstTensor biases = ToConstTensor(serializerLayer->biases());
+            ignoreSlots.emplace_back(2u);
+
+            auto biasLayer = m_Network->AddConstantLayer(biases);
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biases.GetInfo());
+        }
+
+        if (this->GetFeatureVersions(graph).m_WeightsLayoutScheme <= 0)
+        {
+            // Permute weights  [ H, W, M, I ] --> [ 1, H, W, I*M ]
+            // Step1: [ M, I, H, W ] --> [ H, W, I, M]
+            PermutationVector permutationVector = { 3, 2, 0, 1 };
+            armnn::TensorInfo weightsInfo = weights.GetInfo();
+            std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightsInfo.GetNumBytes()]);
+            weightsInfo = armnnUtils::Permuted(weightsInfo, permutationVector);
+            armnnUtils::Permute(weightsInfo.GetShape(), permutationVector,
+                                weights.GetMemoryArea(), permuteBuffer.get(),
+                                GetDataTypeSize(weightsInfo.GetDataType()));
+
+            // Step2: Reshape [ H, W, I, M] --> [ 1, H, W, I*M ]
+            auto weightsShape = weightsInfo.GetShape();
+            weightsInfo.SetShape({1,
+                                  weightsShape[0],
+                                  weightsShape[1],
+                                  weightsShape[2]*weightsShape[3]});
+
+            armnn::ConstTensor weightsPermuted(weightsInfo, permuteBuffer.get());
+
+            auto weightsLayer = m_Network->AddConstantLayer(weightsPermuted);
+            weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+            weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsPermuted.GetInfo());
+        }
+        else
+        {
+            auto weightsLayer = m_Network->AddConstantLayer(weights);
+            weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+            weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
+        }
     }
     else
     {
         layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                          weights,
-                                                          optionalBiases,
                                                           layerName.c_str());
+        uint32_t numInputs = descriptor.GetNumInputs();
+        CHECK_VALID_SIZE(inputs.size(), numInputs);
     }
 
     armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
-    RegisterInputSlots(graph, layerIndex, layer);
+    RegisterInputSlots(graph, layerIndex, layer, ignoreSlots);
     RegisterOutputSlots(graph, layerIndex, layer);
 }
 
diff --git a/src/armnnDeserializer/test/DeserializeConstant.cpp b/src/armnnDeserializer/test/DeserializeConstant.cpp
index 682e8a157d..b5a2151268 100644
--- a/src/armnnDeserializer/test/DeserializeConstant.cpp
+++ b/src/armnnDeserializer/test/DeserializeConstant.cpp
@@ -121,7 +121,10 @@ struct ConstantAddFixture : public ParserFlatbuffersSerializeFixture
                                         },
                                 }],
                             }}},
-                }]
+                }],
+                featureVersions: {
+                    weightsLayoutScheme: 1,
+                }
          }
         )";
         SetupSingleInputSingleOutput("InputLayer1", "OutputLayer");
diff --git a/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp b/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
index 1920c1cc79..d5f3c6396f 100644
--- a/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
+++ b/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
@@ -13,9 +13,9 @@
 
 TEST_SUITE("Deserializer_DepthwiseConv2d")
 {
-struct DepthwiseConv2dFlatbufferVersion1Fixture : public ParserFlatbuffersSerializeFixture
+struct DepthwiseConv2dFlatbufferVersion1FixtureOld : public ParserFlatbuffersSerializeFixture
 {
-    explicit DepthwiseConv2dFlatbufferVersion1Fixture()
+    explicit DepthwiseConv2dFlatbufferVersion1FixtureOld()
     {
         m_JsonString = R"(
         {
@@ -214,20 +214,282 @@ struct DepthwiseConv2dFlatbufferVersion1Fixture : public ParserFlatbuffersSerial
     }
 };
 
+struct DepthwiseConv2dFlatbufferVersion1Fixture : public ParserFlatbuffersSerializeFixture
+{
+    explicit DepthwiseConv2dFlatbufferVersion1Fixture()
+    {
+        m_JsonString = R"(
+        {
+          "layers": [
+            {
+              "layer_type": "InputLayer",
+              "layer": {
+                "base": {
+                  "base": {
+                    "index": 0,
+                    "layerName": "InputLayer",
+                    "layerType": "Input",
+                    "inputSlots": [
+                    ],
+                    "outputSlots": [
+                      {
+                        "index": 0,
+                        "tensorInfo": {
+                          "dimensions": [
+                            1,
+                            3,
+                            3,
+                            3
+                          ],
+                          "dataType": "QAsymmS8",
+                          "quantizationScale": 1.0,
+                          "quantizationOffset": 0,
+                          "quantizationDim": 0,
+                          "dimensionality": 1,
+                          "dimensionSpecificity": [
+                            true,
+                            true,
+                            true,
+                            true
+                          ]
+                        }
+                      }
+                    ]
+                  },
+                  "layerBindingId": 0
+                }
+              }
+            },
+            {
+              "layer_type": "DepthwiseConvolution2dLayer",
+              "layer": {
+                "base": {
+                  "index": 1,
+                  "layerName": "depthwiseConvolution2dWithPerAxis",
+                  "layerType": "DepthwiseConvolution2d",
+                  "inputSlots": [
+                    {
+                      "index": 0,
+                      "connection": {
+                        "sourceLayerIndex": 0,
+                        "outputSlotIndex": 0
+                      }
+                    },
+                    {
+                      "index": 1,
+                      "connection": {
+                        "sourceLayerIndex": 2,
+                        "outputSlotIndex": 0
+                      }
+                    }
+                  ],
+                  "outputSlots": [
+                    {
+                      "index": 0,
+                      "tensorInfo": {
+                        "dimensions": [
+                          1,
+                          3,
+                          3,
+                          3
+                        ],
+                        "dataType": "QAsymmS8",
+                        "quantizationScale": 1.0,
+                        "quantizationOffset": 0,
+                        "quantizationDim": 0,
+                        "dimensionality": 1,
+                        "dimensionSpecificity": [
+                          true,
+                          true,
+                          true,
+                          true
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "descriptor": {
+                  "padLeft": 1,
+                  "padRight": 1,
+                  "padTop": 1,
+                  "padBottom": 1,
+                  "strideX": 1,
+                  "strideY": 1,
+                  "dilationX": 1,
+                  "dilationY": 1,
+                  "biasEnabled": false,
+                  "dataLayout": "NHWC"
+                }
+              }
+            },
+            {
+              "layer_type": "ConstantLayer",
+              "layer": {
+                "base": {
+                  "index": 2,
+                  "layerName": "Weights",
+                  "layerType": "Constant",
+                  "inputSlots": [
+                  ],
+                  "outputSlots": [
+                    {
+                      "index": 0,
+                      "tensorInfo": {
+                        "dimensions": [
+                          1,
+                          3,
+                          3,
+                          3
+                        ],
+                        "dataType": "QSymmS8",
+                        "quantizationScale": 0.25,
+                        "quantizationOffset": 0,
+                        "quantizationDim": 0,
+                        "dimensionality": 1,
+                        "dimensionSpecificity": [
+                          true,
+                          true,
+                          true,
+                          true
+                        ],
+                        quantizationScales: [
+                              0.25,
+                              0.2,
+                              0.1
+                        ],
+                        "isConstant": true,
+                      }
+                    }
+                  ]
+                },
+                "input": {
+                  "info": {
+                    "dimensions": [
+                       1,
+                       3,
+                       3,
+                       3
+                    ],
+                    "dataType": "QSymmS8",
+                    "quantizationScale": 0.25,
+                    "quantizationOffset": 0,
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true,
+                      true,
+                      true
+                    ],
+                    quantizationScales: [
+                      0.25,
+                      0.2,
+                      0.1
+                    ]
+                  },
+                  "data_type": "ByteData",
+                  "data": {
+                    "data": [
+                      4,
+                      20,
+                      0,
+                      8,
+                      20,
+                      30,
+                      4,
+                      0,
+                      10,
+                      12,
+                      0,
+                      40,
+                      0,
+                      5,
+                      30,
+                      16,
+                      10,
+                      40,
+                      12,
+                      0,
+                      30,
+                      16,
+                      20,
+                      0,
+                      12,
+                      20,
+                      20
+                    ]
+                  }
+                }
+              }
+            },
+            {
+              "layer_type": "OutputLayer",
+              "layer": {
+                "base": {
+                  "base": {
+                    "index": 3,
+                    "layerName": "OutputLayer",
+                    "layerType": "Output",
+                    "inputSlots": [
+                      {
+                        "index": 0,
+                        "connection": {
+                          "sourceLayerIndex": 1,
+                          "outputSlotIndex": 0
+                        }
+                      }
+                    ],
+                    "outputSlots": [
+                    ]
+                  },
+                  "layerBindingId": 0
+                }
+              }
+            }
+          ],
+          "inputIds": [
+            0
+          ],
+          "outputIds": [
+            0
+          ],
+          "featureVersions": {
+            "bindingIdsScheme": 1,
+            "constantTensorsAsInputs": 1
+          }
+        }
+        )";
+        Setup();
+    }
+};
+
 // This test uses a model that was created before weights layout scheme version was added to our flatbuffers
 // file. It ensures older models can still be read and executed
 // featureVersion weights layout scheme 1 indicates a change in the depthwise weights layout within
 // armm from [M,I,H,W] --> [1,H,W,I*M]
-TEST_CASE_FIXTURE(DepthwiseConv2dFlatbufferVersion1Fixture, "DepthwiseConv2d_FlatbufferVersion1")
+TEST_CASE_FIXTURE(DepthwiseConv2dFlatbufferVersion1FixtureOld, "DepthwiseConv2d_FlatbufferVersion1Old")
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 3,2,0,0,4,3,0,1,2,
+          0,1,3,0,4,2,2,2,3,
+          2,4,3,2,0,4,3,4,0},
+        { 15,60,10,11,37,20, 0,18,17,
+          20,65,28,28,74,26,12,20,18,
+          25,36,12,37,42,25,29,14, 9});
+}
+
+TEST_CASE_FIXTURE(DepthwiseConv2dFlatbufferVersion1Fixture,
+                  "DepthwiseConv2d_FlatbufferVersion1_WeightsAndBiasesAsConstantLayers")
 {
     RunTest<4, armnn::DataType::QAsymmS8>(
             0,
-            { 3,2,0,0,4,3,0,1,2,
-              0,1,3,0,4,2,2,2,3,
-              2,4,3,2,0,4,3,4,0},
-            { 15,60,10,11,37,20, 0,18,17,
-              20,65,28,28,74,26,12,20,18,
-              25,36,12,37,42,25,29,14, 9});
+            {{"InputLayer", { 3,2,0,0,4,3,0,1,2,
+                              0,1,3,0,4,2,2,2,3,
+                              2,4,3,2,0,4,3,4,0}}},
+            {{"OutputLayer", { 15,60,10,11,37,20, 0,18,17,
+                               20,65,28,28,74,26,12,20,18,
+                               25,36,12,37,42,25,29,14, 9}}});
 }
 
 }
\ No newline at end of file
diff --git a/src/armnnDeserializer/test/DeserializeGather.cpp b/src/armnnDeserializer/test/DeserializeGather.cpp
index 47919c4481..0d12d71c9d 100644
--- a/src/armnnDeserializer/test/DeserializeGather.cpp
+++ b/src/armnnDeserializer/test/DeserializeGather.cpp
@@ -119,7 +119,10 @@ struct GatherFixture : public ParserFlatbuffersSerializeFixture
                                         },
                                 }],
                             }}},
-                }]
+                }],
+                featureVersions: {
+                    weightsLayoutScheme: 1,
+                }
                  } )";
 
         Setup();
diff --git a/src/armnnDeserializer/test/DeserializeGatherNd.cpp b/src/armnnDeserializer/test/DeserializeGatherNd.cpp
index 684a42ca07..f0341e24ee 100644
--- a/src/armnnDeserializer/test/DeserializeGatherNd.cpp
+++ b/src/armnnDeserializer/test/DeserializeGatherNd.cpp
@@ -115,7 +115,10 @@ struct GatherNdFixture : public ParserFlatbuffersSerializeFixture
                                         },
                                 }],
                             }}},
-                }]
+                }],
+                featureVersions: {
+                    weightsLayoutScheme: 1,
+                }
                  } )";
 
         Setup();
diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp
index d97fa1c4f1..dd6a06fd00 100644
--- a/src/armnnOnnxParser/OnnxParser.cpp
+++ b/src/armnnOnnxParser/OnnxParser.cpp
@@ -1042,12 +1042,17 @@ void OnnxParserImpl::AddConvLayerWithDepthwiseConv(const onnx::NodeProto& node,
     desc.m_StrideY      = convDesc.m_StrideY;
     desc.m_BiasEnabled  = convDesc.m_BiasEnabled;
 
-    armnn::IConnectableLayer* layer;
+    armnn::IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, node.name().c_str());
+    std::vector<std::string> tensorIndexes= {node.input(0), node.input(1)};
 
     // weights come in as [O,1,H,W] from ONNX and need to be converted to ArmNNs dephtwise weights layout [1,H,W,O]
     armnn::PermutationVector perVec {3,0,1,2};
     auto weightTensor = CreateConstTensor(node.input(1), perVec);
 
+    IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(weightTensor.first);
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightTensor.first.GetInfo());
+    weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+
     if (node.input_size() == 3)
     {
         if(!m_TensorsInfo[node.input(2)].isConstant())
@@ -1057,20 +1062,16 @@ void OnnxParserImpl::AddConvLayerWithDepthwiseConv(const onnx::NodeProto& node,
                                              node.name(),
                                              CHECK_LOCATION().AsString()));
         }
+
         desc.m_BiasEnabled = true;
         auto biasTensor = CreateConstTensor(node.input(2));
-        layer = m_Network->AddDepthwiseConvolution2dLayer(desc,
-                                                          weightTensor.first,
-                                                          Optional<ConstTensor>(biasTensor.first),
-                                                          node.name().c_str());
-    }
-    else
-    {
-        layer = m_Network->AddDepthwiseConvolution2dLayer(desc,
-                                                          weightTensor.first,
-                                                          EmptyOptional(),
-                                                          node.name().c_str());
+        tensorIndexes.emplace_back(node.input(2));
+
+        IConnectableLayer* biasLayer = m_Network->AddConstantLayer(biasTensor.first);
+        biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensor.first.GetInfo());
+        biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
     }
+
     ARMNN_ASSERT(layer != nullptr);
 
     auto outputInfo = ComputeOutputInfo({ node.output(0) }, layer,
@@ -1081,7 +1082,7 @@ void OnnxParserImpl::AddConvLayerWithDepthwiseConv(const onnx::NodeProto& node,
 
     // register the input connection slots for the layer, connections are made after all layers have been created
     // only the tensors for the inputs are relevant, exclude the const tensors
-    RegisterInputSlots(layer, {node.input(0)});
+    RegisterInputSlots(layer, tensorIndexes);
 
     // register the output connection slots for the layer, connections are made after all layers have been created
     RegisterOutputSlots(layer, {node.output(0)});
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 3b9dfb0ae8..99d1c2bd18 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -441,13 +441,10 @@ void SerializerStrategy::SerializeDepthToSpaceLayer(const armnn::IConnectableLay
 
 void SerializerStrategy::SerializeDepthwiseConvolution2dLayer(const armnn::IConnectableLayer* layer,
                                                               const armnn::DepthwiseConvolution2dDescriptor& descriptor,
-                                                              const std::vector<armnn::ConstTensor>& constants,
                                                               const char* name)
 {
     IgnoreUnused(name);
 
-    const armnn::ConstTensor& weights = constants[0];
-
     auto fbBaseLayer  = CreateLayerBase(layer, serializer::LayerType::LayerType_DepthwiseConvolution2d);
     auto fbDescriptor = CreateDepthwiseConvolution2dDescriptor(m_flatBufferBuilder,
                                                                descriptor.m_PadLeft,
@@ -461,20 +458,9 @@ void SerializerStrategy::SerializeDepthwiseConvolution2dLayer(const armnn::IConn
                                                                descriptor.m_BiasEnabled,
                                                                GetFlatBufferDataLayout(descriptor.m_DataLayout));
 
-    flatbuffers::Offset<serializer::ConstTensor> fbWeightsConstTensorInfo = CreateConstTensorInfo(weights);
-    flatbuffers::Offset<serializer::ConstTensor> fbBiasesConstTensorInfo;
-
-    if (constants.size() > 1)
-    {
-        const armnn::ConstTensor& biases = constants[1];
-        fbBiasesConstTensorInfo = CreateConstTensorInfo(biases);
-    }
-
     auto flatBufferLayer = CreateDepthwiseConvolution2dLayer(m_flatBufferBuilder,
                                                              fbBaseLayer,
-                                                             fbDescriptor,
-                                                             fbWeightsConstTensorInfo,
-                                                             fbBiasesConstTensorInfo);
+                                                             fbDescriptor);
 
     CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_DepthwiseConvolution2dLayer);
 }
@@ -2090,7 +2076,6 @@ void SerializerStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
                     static_cast<const armnn::DepthwiseConvolution2dDescriptor&>(descriptor);
             SerializeDepthwiseConvolution2dLayer(layer,
                                                  layerDescriptor,
-                                                 constants,
                                                  name);
             break;
         }
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index 98c1984cd2..afde778dc2 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -158,7 +158,6 @@ private:
 
     void SerializeDepthwiseConvolution2dLayer(const armnn::IConnectableLayer* layer,
                                               const armnn::DepthwiseConvolution2dDescriptor& descriptor,
-                                              const std::vector<armnn::ConstTensor>& constants,
                                               const char* name = nullptr);
 
     void SerializeDequantizeLayer(const armnn::IConnectableLayer* layer,
diff --git a/src/armnnSerializer/test/SerializerTestUtils.cpp b/src/armnnSerializer/test/SerializerTestUtils.cpp
index d0a8e2d5ec..cf2cb15b15 100644
--- a/src/armnnSerializer/test/SerializerTestUtils.cpp
+++ b/src/armnnSerializer/test/SerializerTestUtils.cpp
@@ -54,8 +54,14 @@ void LayerVerifierBase::VerifyNameAndConnections(const armnn::IConnectableLayer*
         CHECK(
             GetDataTypeName(connectedInfo.GetDataType()) == GetDataTypeName(m_InputTensorInfos[i].GetDataType()));
 
-        CHECK(connectedInfo.GetQuantizationScale() == m_InputTensorInfos[i].GetQuantizationScale());
-        CHECK(connectedInfo.GetQuantizationOffset() == m_InputTensorInfos[i].GetQuantizationOffset());
+        // If weights and bias are connected to DepthwiseConvolution2d via Constant Layer we do not check.
+        // Constant Layer already disabled in SerializerTestUtils.hpp from entering function.
+        if (layer->GetType() == armnn::LayerType::DepthwiseConvolution2d &&
+            connectedOutput->GetOwningIConnectableLayer().GetType() != armnn::LayerType::Constant)
+        {
+            CHECK(connectedInfo.GetQuantizationScale() == m_InputTensorInfos[i].GetQuantizationScale());
+            CHECK(connectedInfo.GetQuantizationOffset() == m_InputTensorInfos[i].GetQuantizationOffset());
+        }
     }
 
     for (unsigned int i = 0; i < m_OutputTensorInfos.size(); i++)
diff --git a/src/armnnSerializer/test/SerializerTestUtils.hpp b/src/armnnSerializer/test/SerializerTestUtils.hpp
index ce4d2cc330..ac15cedd95 100644
--- a/src/armnnSerializer/test/SerializerTestUtils.hpp
+++ b/src/armnnSerializer/test/SerializerTestUtils.hpp
@@ -128,6 +128,7 @@ public:
         {
             case armnn::LayerType::Input: break;
             case armnn::LayerType::Output: break;
+            case armnn::LayerType::Constant: break;
             default:
             {
                 this->VerifyNameAndConnections(layer, name);
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index a765290de8..278715bfa4 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -553,11 +553,8 @@ TEST_CASE("SerializeDepthwiseConvolution2d")
 
     armnn::INetworkPtr network = armnn::INetwork::Create();
     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
-    armnn::IConnectableLayer* const depthwiseConvLayer =
-        network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                weights,
-                                                armnn::Optional<armnn::ConstTensor>(biases),
-                                                layerName.c_str());
+    armnn::IConnectableLayer* const depthwiseConvLayer = network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                                                 layerName.c_str());
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
     inputLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(0));
@@ -566,12 +563,20 @@ TEST_CASE("SerializeDepthwiseConvolution2d")
     inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
     depthwiseConvLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
 
+    armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights);
+    weightsLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(1u));
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
+
+     armnn::IConnectableLayer* const biasLayer = network->AddConstantLayer(biases);
+    biasLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(2u));
+    biasLayer->GetOutputSlot(0).SetTensorInfo(biases.GetInfo());
+
     armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
     CHECK(deserializedNetwork);
 
     const std::vector<armnn::ConstTensor>& constants {weights, biases};
     LayerVerifierBaseWithDescriptorAndConstants<armnn::DepthwiseConvolution2dDescriptor> verifier(
-            layerName, {inputInfo}, {outputInfo}, descriptor, constants);
+        layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor, constants);
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
@@ -610,11 +615,8 @@ TEST_CASE("SerializeDepthwiseConvolution2dWithPerAxisParams")
 
     armnn::INetworkPtr network = armnn::INetwork::Create();
     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
-    armnn::IConnectableLayer* const depthwiseConvLayer =
-        network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                weights,
-                                                armnn::Optional<armnn::ConstTensor>(biases),
-                                                layerName.c_str());
+    armnn::IConnectableLayer* const depthwiseConvLayer = network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                                                 layerName.c_str());
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
     inputLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(0));
@@ -623,12 +625,75 @@ TEST_CASE("SerializeDepthwiseConvolution2dWithPerAxisParams")
     inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
     depthwiseConvLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
 
+    armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights);
+    weightsLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(1u));
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
+
+    armnn::IConnectableLayer* const biasLayer = network->AddConstantLayer(biases);
+    biasLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(2u));
+    biasLayer->GetOutputSlot(0).SetTensorInfo(biases.GetInfo());
+
     armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
     CHECK(deserializedNetwork);
 
     const std::vector<armnn::ConstTensor>& constants {weights, biases};
     LayerVerifierBaseWithDescriptorAndConstants<armnn::DepthwiseConvolution2dDescriptor> verifier(
-            layerName, {inputInfo}, {outputInfo}, descriptor, constants);
+            layerName, {inputInfo, kernelInfo, biasInfo}, {outputInfo}, descriptor, constants);
+    deserializedNetwork->ExecuteStrategy(verifier);
+}
+
+TEST_CASE("SerializeDepthwiseConvolution2dWeightsAndBiasesAsConstantLayers")
+{
+    const std::string layerName("depthwiseConvolution2d");
+    const armnn::TensorInfo inputInfo ({ 1, 5, 5, 1 }, armnn::DataType::Float32);
+    const armnn::TensorInfo outputInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32);
+
+    const armnn::TensorInfo weightsInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32, 0.0f, 0, true);
+    const armnn::TensorInfo biasesInfo ({ 1 }, armnn::DataType::Float32, 0.0f, 0, true);
+
+    std::vector<float> weightsData = GenerateRandomData<float>(weightsInfo.GetNumElements());
+    armnn::ConstTensor weights(weightsInfo, weightsData);
+
+    std::vector<float> biasesData = GenerateRandomData<float>(biasesInfo.GetNumElements());
+    armnn::ConstTensor biases(biasesInfo, biasesData);
+
+    armnn::DepthwiseConvolution2dDescriptor descriptor;
+    descriptor.m_PadLeft     = 1;
+    descriptor.m_PadRight    = 1;
+    descriptor.m_PadTop      = 1;
+    descriptor.m_PadBottom   = 1;
+    descriptor.m_StrideX     = 2;
+    descriptor.m_StrideY     = 2;
+    descriptor.m_DilationX   = 2;
+    descriptor.m_DilationY   = 2;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_DataLayout  = armnn::DataLayout::NHWC;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer  = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
+    armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases");
+    armnn::IConnectableLayer* const convLayer   = network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                                          layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2));
+    convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+    biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+    convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    CHECK(deserializedNetwork);
+
+    const std::vector<armnn::ConstTensor>& constants {weights, biases};
+    LayerVerifierBaseWithDescriptorAndConstants<armnn::DepthwiseConvolution2dDescriptor> verifier(
+        layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor, constants);
+
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
diff --git a/src/armnnTestUtils/CreateWorkload.hpp b/src/armnnTestUtils/CreateWorkload.hpp
index d01919c09d..2590ae89b2 100644
--- a/src/armnnTestUtils/CreateWorkload.hpp
+++ b/src/armnnTestUtils/CreateWorkload.hpp
@@ -1146,23 +1146,30 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
     layerDesc.m_BiasEnabled = false;
     layerDesc.m_DataLayout  = dataLayout;
 
+    float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
+    float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
+
+    TensorShape weightShape({1, 4, 4, 2});
+    TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
+                             TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
+    TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
+                              TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
+
     DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
 
-    layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType)); // [ 1, H, W, I*M ]
+    // As optimization isn't run member variables need to be updated.
+    layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType)); // [ 1, H, W, I*M ]
     layer->m_Weight->Allocate();
 
     // Creates extra layers.
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const weights = graph.AddLayer<ConstantLayer>("weights");
     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
 
-    TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
-                TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
-    TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
-                TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
-
     // Connects up.
-    Connect(input, layer, TensorInfo(inputShape, DataType));
-    Connect(layer, output, TensorInfo(outputShape, DataType));
+    Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
+    Connect(weights, layer, TensorInfo(weightShape, DataType, inputsQScale, 0.0f, true), 0, 1);
+    Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
     CreateTensorHandles(graph, factory);
 
     // Makes the workload and checks it.
@@ -1178,9 +1185,8 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == false);
     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
 
-    CHECK(queueDescriptor.m_Inputs.size() == 1);
+    CHECK(queueDescriptor.m_Inputs.size() == 2);
     CHECK(queueDescriptor.m_Outputs.size() == 1);
-    CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
 
     // Returns so we can do extra, backend-specific tests.
     return workload;
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 5f71ebcff6..7cb9f6a7bc 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -1227,7 +1227,6 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato
     CHECK_SUPPORTED_FUSED_ACTIVATION(options, subgraphIndex, operatorIndex);
 
     DepthwiseConvolution2dDescriptor desc;
-    desc.m_BiasEnabled = false;
     desc.m_StrideX = CHECKED_NON_NEGATIVE(options->stride_w);
     desc.m_StrideY = CHECKED_NON_NEGATIVE(options->stride_h);
     desc.m_DataLayout = armnn::DataLayout::NHWC;
@@ -1235,6 +1234,11 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato
 
     auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
     CHECK_VALID_SIZE(inputs.size(), 2, 3);
+    if (inputs.size() == 3)
+    {
+        desc.m_BiasEnabled = true;
+    }
+
     auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
     CHECK_VALID_SIZE(outputs.size(), 1);
     desc.m_DilationX = CHECKED_NON_NEGATIVE(options->dilation_w_factor);
@@ -1257,26 +1261,24 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato
                 desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding);
 
     // ArmNN uses the same filter tensor layout at TfLite [1, H, W, O] no need for any permutation
-    auto filterTensor = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo, inputTensorInfo.GetDataType());
-    armnn::IConnectableLayer* layer = nullptr;
+    auto filterTensor = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo);
     auto layerName = fmt::format("DepthwiseConv2D:{}:{}", subgraphIndex, operatorIndex);
 
-    if (inputs.size() == 3)
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    // Add the first input and weights tensor to the registration list.
+    // The constant weights will be added by SetupConstantLayers.
+    std::vector<unsigned int> tensorIndexesToRegister = {inputTensorIndexes[0], inputTensorIndexes[1]};
+
+    armnn::IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, layerName.c_str());
+
+    if (desc.m_BiasEnabled)
     {
         desc.m_BiasEnabled = true;
         TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
-        auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo, inputTensorInfo.GetDataType());
-        layer = m_Network->AddDepthwiseConvolution2dLayer(desc,
-                                                          filterTensor.first,
-                                                          Optional<ConstTensor>(biasTensorAndData.first),
-                                                          layerName.c_str());
-    }
-    else
-    {
-        layer = m_Network->AddDepthwiseConvolution2dLayer(desc,
-                                                          filterTensor.first,
-                                                          EmptyOptional(),
-                                                          layerName.c_str());
+        auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo);
+
+        // Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers.
+        tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]);
     }
     ARMNN_ASSERT(layer != nullptr);
 
@@ -1285,8 +1287,7 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato
 
     // register the input connection slots for the layer, connections are made after all layers have been created
     // only the tensors for the inputs are relevant, exclude the const tensors
-    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
-    RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
+    RegisterInputSlots(subgraphIndex, operatorIndex, layer, tensorIndexesToRegister);
 
     layer = AddFusedActivationLayer(layer, 0, options->fused_activation_function);
     // register the output connection slots for the layer, connections are made after all layers have been created
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
index 74ab789402..de3a34ee08 100644
--- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -263,28 +263,13 @@ LayerType* FuseDepthwiseConvolution2dLayer(OptimizationViews& optimizationViews,
                                            ActivationDescriptor& activationDesc,
                                            std::string name)
 {
-    std::shared_ptr<ConstTensorHandle> weightHandle = baseLayer->m_Weight;
-    TensorInfo weightInfo = weightHandle->GetTensorInfo();
-
-    std::shared_ptr<ConstTensorHandle> biasHandle = baseLayer->m_Bias;
-    ConstTensor biasTensor;
-    if (!biasHandle)
-    {
-        biasTensor = ConstTensor();
-    }
-    else
-    {
-        biasTensor = ConstTensor(biasHandle->GetTensorInfo(), biasHandle->Map(true));
-    }
-
-    IConnectableLayer* replacement =
-        optimizationViews.GetINetwork()->
-            AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(),
-                                           ConstTensor(weightInfo, weightHandle->Map(true)),
-                                           Optional<ConstTensor>(biasTensor),
-                                           name.c_str());
+    IConnectableLayer* replacement = optimizationViews.GetINetwork()->
+        AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
+    replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
+    replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
+
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index d89b5899ba..7a46741964 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1382,7 +1382,13 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
 {
     const std::string descriptorName{"DepthwiseConvolution2dQueueDescriptor"};
 
-    ValidateNumInputs(workloadInfo,  descriptorName, 1);
+    uint32_t numInputs = 2;
+    if (m_Parameters.m_BiasEnabled)
+    {
+        numInputs = 3;
+    }
+
+    ValidateNumInputs(workloadInfo,  descriptorName, numInputs);
     ValidateNumOutputs(workloadInfo, descriptorName, 1);
 
     const TensorInfo& inputTensorInfo  = workloadInfo.m_InputTensorInfos[0];
@@ -1391,9 +1397,7 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
     ValidateTensorNumDimensions(inputTensorInfo,  descriptorName, 4, "input");
     ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output");
 
-    ValidatePointer(m_Weight, descriptorName, "weight");
-
-    const TensorInfo& weightTensorInfo = m_Weight->GetTensorInfo();
+    const TensorInfo& weightTensorInfo = workloadInfo.m_InputTensorInfos[1];
     ValidateTensorNumDimensions(weightTensorInfo, descriptorName, 4, "weight");
 
     if (m_Parameters.m_DilationX < 1 || m_Parameters.m_DilationY < 1 )
@@ -1447,9 +1451,7 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
     Optional<TensorInfo> optionalBiasTensorInfo;
     if (m_Parameters.m_BiasEnabled)
     {
-        ValidatePointer(m_Bias, descriptorName, "bias");
-
-        optionalBiasTensorInfo = MakeOptional<TensorInfo>(m_Bias->GetTensorInfo());
+        optionalBiasTensorInfo = MakeOptional<TensorInfo>(workloadInfo.m_InputTensorInfos[2]);
         const TensorInfo& biasTensorInfo = optionalBiasTensorInfo.value();
 
         ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName);
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 5847e8cc21..f624ee6021 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -324,10 +324,13 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
         case LayerType::DepthwiseConvolution2d:
         {
             auto cLayer = PolymorphicDowncast<const DepthwiseConvolution2dLayer*>(&layer);
-            const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
-                                                       dataType);
-            const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType);
-            ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr);
+            const TensorInfo& input   = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
+                                                         dataType);
+            const TensorInfo& output  = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType);
+            const TensorInfo& weights = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(),
+                                                         dataType);
+
+            ARMNN_ASSERT(cLayer->GetInputSlot(1).GetConnection() != nullptr);
 
             const DepthwiseConvolution2dDescriptor& descriptor = cLayer->GetParameters();
 
@@ -335,17 +338,16 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
             Optional<TensorInfo> biases;
             if (descriptor.m_BiasEnabled)
             {
-                biases =
-                    OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType));
+                biases = OverrideDataType(cLayer->GetInputSlot(2).GetConnection()->GetTensorInfo(),
+                                          GetBiasTypeFromWeightsType(dataType));
             }
 
-            result = layerSupportObject.IsDepthwiseConvolutionSupported(
-                                                     input,
-                                                     output,
-                                                     descriptor,
-                                                     OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType),
-                                                     biases,
-                                                     reason);
+            result = layerSupportObject.IsDepthwiseConvolutionSupported(input,
+                                                                        output,
+                                                                        descriptor,
+                                                                        weights,
+                                                                        biases,
+                                                                        reason);
             break;
         }
         case LayerType::Dequantize:
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 06d230b006..991f37d17e 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -21,6 +21,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources
     DataTypeUtils.hpp
     DefaultAsyncExecuteTest.cpp
     DepthToSpaceEndToEndTestImpl.hpp
+    DepthwiseConvolution2dEndToEndTests.hpp
     DequantizeEndToEndTestImpl.hpp
     DetectionPostProcessEndToEndTestImpl.hpp
     DynamicBackendTests.cpp
diff --git a/src/backends/backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp b/src/backends/backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp
new file mode 100644
index 0000000000..1f9b60a4f2
--- /dev/null
+++ b/src/backends/backendsCommon/test/DepthwiseConvolution2dEndToEndTests.hpp
@@ -0,0 +1,183 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "EndToEndTestImpl.hpp"
+#include <armnnUtils/QuantizeHelper.hpp>
+
+#include <ResolveType.hpp>
+
+#include <CommonTestUtils.hpp>
+#include <armnnTestUtils/DataLayoutUtils.hpp>
+
+#include <map>
+#include <vector>
+
+namespace
+{
+
+armnn::INetworkPtr CreateDepthwiseConvolution2dNetwork(const armnn::DepthwiseConvolution2dDescriptor& descriptor,
+                                                       const armnn::TensorInfo& inputInfo,
+                                                       const armnn::TensorInfo& weightsInfo,
+                                                       const armnn::TensorInfo& biasInfo,
+                                                       const armnn::TensorInfo& outputInfo,
+                                                       const armnn::ConstTensor& weights,
+                                                       const armnn::ConstTensor& biases)
+{
+    using namespace armnn;
+
+    INetworkPtr network(INetwork::Create());
+    IConnectableLayer* input = network->AddInputLayer(0, "input");
+    armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
+    armnn::IConnectableLayer* biasLayer = network->AddConstantLayer(biases, "Bias");
+    IConnectableLayer* convolution2d = network->AddDepthwiseConvolution2dLayer(descriptor, "depthwiseConvolution2d");
+    IConnectableLayer* output = network->AddOutputLayer(0, "output");
+
+    Connect(input, convolution2d, inputInfo, 0, 0);
+    Connect(weightsLayer, convolution2d, weightsInfo, 0, 1);
+    Connect(biasLayer, convolution2d, biasInfo, 0, 2);
+    Connect(convolution2d, output, outputInfo, 0, 0);
+
+    return network;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType>
+void DepthwiseConvolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
+                                    armnn::DataLayout dataLayout)
+{
+    using namespace armnn;
+    using T  = ResolveType<ArmnnType>;
+    using BT = ResolveType<ArmnnBType>;
+
+    const float   qScale  = IsQuantizedType<T>() ? 0.25f : 1.0f;
+    const int32_t qOffset = IsQuantizedType<T>() ? 50    : 0;
+
+    unsigned int depthMultiplier = 2;
+
+    unsigned int inputHeight    = 8;
+    unsigned int inputWidth     = 16;
+    unsigned int inputChannels  = 2;
+    unsigned int inputBatchSize = 1;
+
+    unsigned int kernelHeight = 5;
+    unsigned int kernelWidth  = 3;
+
+    unsigned int outputHeight    = inputHeight - kernelHeight + 1 + 2;
+    unsigned int outputWidth     = (inputWidth - kernelWidth + 1)/2;
+    unsigned int outputChannels  = inputChannels * depthMultiplier;
+    unsigned int outputBatchSize = inputBatchSize;
+
+    TensorInfo inputInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, ArmnnType, qScale, qOffset, true);
+    TensorInfo outputInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, ArmnnType, qScale, qOffset);
+    TensorInfo weightsInfo({1, kernelHeight, kernelWidth, outputChannels}, ArmnnType, qScale, qOffset, true);
+    TensorInfo biasesInfo({outputChannels}, ArmnnBType, qScale * qScale, 0, true);
+
+    std::vector<float> inputData =
+    {
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
+   };
+
+    std::vector<float> weightsData =
+    {
+        1.0f,  1.0f, 1.0f,
+        1.0f, -1.0f, 1.0f,
+        1.0f,  1.0f, 1.0f,
+        1.0f,  1.0f, 1.0f,
+        1.0f,  1.0f, 1.0f,
+
+        2.0f,  2.0f, 2.0f,
+        2.0f,  2.0f, 2.0f,
+        2.0f,  2.0f, 2.0f,
+        2.0f,  2.0f, 2.0f,
+        2.0f,  2.0f, 2.0f,
+
+        0.0f,  0.0f, 0.0f,
+        0.0f, -1.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+        0.0f,  1.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f
+    };
+
+    std::vector<float> biasesData = { 0.0f, 2.0f, 1.0f, -1.0f };
+
+    std::vector<float> expectedOutputData =
+    {
+        3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f,
+        5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.5f,
+        5.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
+        2.5f, 2.5f, 2.5f, 2.5f, 2.5f, 2.5f, 2.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
+        4.5f, 4.5f, 4.5f, 4.5f, 4.5f, 4.5f, 4.5f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
+        6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
+        1.0f, 3.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
+   };
+
+    DepthwiseConvolution2dDescriptor descriptor;
+    descriptor.m_PadLeft     = 0;
+    descriptor.m_PadRight    = 0;
+    descriptor.m_PadTop      = 1;
+    descriptor.m_PadBottom   = 0;
+    descriptor.m_StrideX     = 2;
+    descriptor.m_StrideY     = 1;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_DataLayout  = dataLayout;
+
+    // Permute input and output if NCDHW.
+    if (dataLayout == DataLayout::NCHW)
+    {
+        PermuteTensorNhwcToNchw(inputInfo, inputData);
+        PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+    }
+
+    // Quantize data
+    std::vector<T> qInputData          = armnnUtils::QuantizedVector<T>(inputData, qScale, qOffset);
+    std::vector<T> qWeightsData        = armnnUtils::QuantizedVector<T>(weightsData, qScale, qOffset);
+    std::vector<T> qExpectedOutputData = armnnUtils::QuantizedVector<T>(expectedOutputData, qScale, qOffset);
+
+    std::vector<BT> qBiasesData = armnnUtils::QuantizedVector<BT>(biasesData, qScale * qScale, 0);
+
+    ConstTensor weights(weightsInfo, qWeightsData);
+    ConstTensor biases(biasesInfo, qBiasesData);
+
+    INetworkPtr network = CreateDepthwiseConvolution2dNetwork(descriptor,
+                                                              inputInfo,
+                                                              weightsInfo,
+                                                              biasesInfo,
+                                                              outputInfo,
+                                                              weights,
+                                                              biases);
+
+    EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),
+                                                { { 0, qInputData } },
+                                                { { 0, qExpectedOutputData } },
+                                                backends);
+}
diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
index f0f5b632de..1219ac5a33 100644
--- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
+++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
@@ -128,20 +128,16 @@ TEST_CASE("OptimizedViewsSubgraphLayerCountUsingGetINetwork")
     IConnectableLayer* const inputLayer = view.GetINetwork()->AddInputLayer(0, "input");
 
     DepthwiseConvolution2dDescriptor convDescriptor;
-    PreCompiledDescriptor substitutionLayerDescriptor(1, 1);
+    PreCompiledDescriptor substitutionLayerDescriptor(2, 1);
     CompiledBlobPtr blobPtr;
     BackendId backend = Compute::CpuRef;
 
     Layer* convLayer1 = PolymorphicDowncast<Layer*>(
         view.GetINetwork()->AddDepthwiseConvolution2dLayer(convDescriptor,
-                                                           ConstTensor(),
-                                                           Optional<ConstTensor>(),
                                                            "conv1"));
 
     Layer* convLayer2 = PolymorphicDowncast<Layer*>(
         view.GetINetwork()->AddDepthwiseConvolution2dLayer(convDescriptor,
-                                                           ConstTensor(),
-                                                           Optional<ConstTensor>(),
                                                            "conv2"));
 
     IConnectableLayer* const outputLayer = view.GetINetwork()->AddOutputLayer(0, "output");
diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
index bd7cc40f27..4203fed23a 100644
--- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
@@ -1736,19 +1736,38 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
     }
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
 
+    armnn::DepthwiseConvolution2dQueueDescriptor data;
+    armnn::WorkloadInfo info;
+
     armnn::ScopedTensorHandle weightsTensor(kernelDesc);
 
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    //    Needed in Neon and Cl Workload when permuting. Backend TensorHandle in (2) below will not work.
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    //    Cannot PolymorphicDowncast from ScopedTensorHandle->RefTensorHandle.
+    //    Need to PolymorphicDowncast from ITensorHandle->RefTensorHandle.
     AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data());
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data()); // required for ConstantTensor
+
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 
     armnn::ScopedTensorHandle biasTensor(biasDesc);
     if (biasEnabled)
     {
         AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
+
+        biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
+        AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
+        AddInputToWorkload(data, info, biasDesc, biasHandle.get());
     }
 
-    armnn::DepthwiseConvolution2dQueueDescriptor data;
     data.m_Weight = &weightsTensor;
     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
     data.m_Parameters.m_StrideX = strideX;
@@ -1760,12 +1779,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
     data.m_Parameters.m_BiasEnabled = biasEnabled;
     data.m_Parameters.m_DataLayout = layout;
 
-    armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
-
     std::unique_ptr<armnn::IWorkload> workload
             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
+
     inputHandle->Allocate();
     outputHandle->Allocate();
 
@@ -1890,19 +1906,35 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
 
     armnn::DepthwiseConvolution2dQueueDescriptor data;
     armnn::WorkloadInfo info;
-    armnn::ScopedTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedTensorHandle biasTensor(biasDesc);
 
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
-    AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
+    armnn::ScopedTensorHandle weightsTensor(kernelDesc);
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons.
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); // required for QueueDescriptor
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
 
     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 
+    armnn::ScopedTensorHandle biasTensor(biasDesc);
+    if (biasEnabled)
+    {
+        AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
+
+        biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
+        AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
+        AddInputToWorkload(data, info, biasDesc, biasHandle.get());
+    }
+
     data.m_Weight = &weightsTensor;
     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
     data.m_Parameters.m_StrideX = 1;
@@ -1916,6 +1948,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
 
     std::unique_ptr<armnn::IWorkload> workload
             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
+
     inputHandle->Allocate();
     outputHandle->Allocate();
 
@@ -2086,19 +2119,35 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
 
     armnn::DepthwiseConvolution2dQueueDescriptor data;
     armnn::WorkloadInfo info;
-    armnn::ScopedTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedTensorHandle biasTensor(biasDesc);
 
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
-    AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
+    armnn::ScopedTensorHandle weightsTensor(kernelDesc);
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons.
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); // required for QueueDescriptor
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
 
     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 
+    armnn::ScopedTensorHandle biasTensor(biasDesc);
+    if (biasEnabled)
+    {
+        AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
+
+        biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
+        AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
+        AddInputToWorkload(data, info, biasDesc, biasHandle.get());
+    }
+
     data.m_Weight = &weightsTensor;
     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
     data.m_Parameters.m_StrideX = 2;
@@ -2112,6 +2161,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
 
     std::unique_ptr<armnn::IWorkload> workload
             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
+
     inputHandle->Allocate();
     outputHandle->Allocate();
 
@@ -2247,22 +2297,34 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
     }
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
 
     armnn::DepthwiseConvolution2dQueueDescriptor data;
     armnn::WorkloadInfo info;
+
     armnn::ScopedTensorHandle weightsTensor(kernelDesc);
-    armnn::ScopedTensorHandle biasTensor(biasDesc);
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons.
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    AllocateAndCopyDataToITensorHandle(&weightsTensor, originalKernel.data()); // required for QueueDescriptor
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), originalKernel.data()); // required for ConstantTensor
 
-    AllocateAndCopyDataToITensorHandle(&weightsTensor, originalKernel.data());
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 
-    if(biasEnabled)
+    armnn::ScopedTensorHandle biasTensor(biasDesc);
+    if (biasEnabled)
     {
         AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
-    }
 
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+        biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
+        AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
+        AddInputToWorkload(data, info, biasDesc, biasHandle.get());
+    }
 
     data.m_Weight = &weightsTensor;
     data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
@@ -2279,6 +2341,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
 
     std::unique_ptr<armnn::IWorkload> workload
             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
+
     inputHandle->Allocate();
     outputHandle->Allocate();
 
@@ -2970,18 +3033,30 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
     std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
 
     armnn::DepthwiseConvolution2dQueueDescriptor data;
     armnn::WorkloadInfo info;
+
     armnn::ScopedTensorHandle weightsTensor(kernelDesc);
     armnn::ScopedTensorHandle biasTensor(biasDesc);
 
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
+    AddInputToWorkload(data, info, biasDesc, biasHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons.
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data());
     AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data());
+    AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
     AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
 
-    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
-    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
     data.m_Weight = &weightsTensor;
     data.m_Bias = &biasTensor;
     data.m_Parameters.m_StrideX = strideX;
@@ -2994,11 +3069,15 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 
     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc);
+    std::unique_ptr<armnn::ITensorHandle> biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc);
     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
 
     armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
     armnn::WorkloadInfo refInfo = info;
     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
+    SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get());
+    SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get());
     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
 
     std::unique_ptr<armnn::IWorkload> workload
@@ -3007,6 +3086,8 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
             = refWorkloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, refData, refInfo);
 
     outputHandleRef->Allocate();
+    weightsHandleRef->Allocate();
+    biasHandleRef->Allocate();
     inputHandleRef->Allocate();
 
     inputHandle->Allocate();
@@ -3014,6 +3095,8 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
 
     CopyDataToITensorHandle(inputHandle.get(), input.data());
     CopyDataToITensorHandle(inputHandleRef.get(), input.data());
+    CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data());
+    CopyDataToITensorHandle(biasHandleRef.get(), bias.data());
 
     ExecuteWorkload(*workload, memoryManager);
 
@@ -3739,23 +3822,33 @@ LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
     descriptor.m_DataLayout  = layout;
 
     std::unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
+    std::unique_ptr<ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
 
+    DepthwiseConvolution2dQueueDescriptor queueDescriptor;
     WorkloadInfo workloadInfo;
     ScopedTensorHandle weightTensor(kernelInfo);
     ScopedTensorHandle biasTensor(biasInfo);
 
+    AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+    AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
+    AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+    AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
+
+    // AllocateAndCopyDataToITensorHandle() is required twice for the weights AND biases:
+    // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons.
+    // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight).
+    // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method.
+    AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
     AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
+    AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasData.data());
     AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
 
-    DepthwiseConvolution2dQueueDescriptor queueDescriptor;
     queueDescriptor.m_Parameters = descriptor;
     queueDescriptor.m_Weight     = &weightTensor;
     queueDescriptor.m_Bias       = &biasTensor;
 
-    AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
-    AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
-
     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d,
                                                                          queueDescriptor,
                                                                          workloadInfo);
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 018adec781..ed6f221511 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -373,14 +373,14 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
 
                                 if (baseLayer->GetParameters().m_BiasEnabled)
                                 {
-                                    biases = baseLayer->m_Bias->GetTensorInfo();
+                                    biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
                                 }
 
                                 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         baseLayer->GetParameters(),
-                                        baseLayer->m_Weight->GetTensorInfo(),
+                                        baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
                                         biases,
                                         &activationDesc);
 
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 7089f23efa..7a258c38e0 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -225,14 +225,14 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
 
                                 if (baseLayer->GetParameters().m_BiasEnabled)
                                 {
-                                    biases = baseLayer->m_Bias->GetTensorInfo();
+                                    biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
                                 }
 
                                 arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         baseLayer->GetParameters(),
-                                        baseLayer->m_Weight->GetTensorInfo(),
+                                        baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
                                         biases,
                                         &activationDesc);
 
diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp
index 9445cb1c75..fcae77cdaa 100644
--- a/src/backends/neon/NeonTensorHandle.hpp
+++ b/src/backends/neon/NeonTensorHandle.hpp
@@ -241,6 +241,7 @@ private:
                 break;
             case arm_compute::DataType::QSYMM8:
             case arm_compute::DataType::QASYMM8_SIGNED:
+            case arm_compute::DataType::QSYMM8_PER_CHANNEL:
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
                                                                  this->GetTensor());
                 break;
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 496b11db91..29df20f834 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -329,9 +329,9 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dUint8, DepthwiseConvolution2
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false, DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dUint8,
-                     DepthwiseConvolution2dUint8Test,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2dUint8Test,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dQSymm16, DepthwiseConvolution2dInt16Test, true, DataLayout::NCHW)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dNhwc, DepthwiseConvolution2dTest, true, DataLayout::NHWC)
@@ -339,148 +339,176 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dUint8Nhwc, DepthwiseConvolut
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dNhwc, DepthwiseConvolution2dTest, false, DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dUint8Nhwc,
-                     DepthwiseConvolution2dUint8Test,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2dUint8Test,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleDepthwiseConvolution2d3x3Dilation3x3Nhwc,
-                     SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest)
+                              SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3Nhwc,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3BFloat16,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3NhwcBFloat16,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3Int8,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3NhwcInt8,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3Uint8,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3NhwcUint8,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3Int16,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d3x3Dilation3x3NhwcInt16,
-                     DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3Nhwc,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::Float32, DataType::Float32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3BFloat16,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3NhwcBFloat16,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::BFloat16, DataType::BFloat16>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3Int8,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3NhwcInt8,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmS8, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3Uint8,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3NhwcUint8,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QAsymmU8, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3Int16,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
-                     false,
-                     DataLayout::NCHW)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d2x3x3Dilation3x3NhwcInt16,
-                     DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
-                     false,
-                     DataLayout::NHWC)
+                              DepthwiseConvolution2d2x3x3Dilation3x3Test<DataType::QSymmS16, DataType::Signed32>,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dMult4,
-                     DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>,
-                     false,
-                     armnn::DataLayout::NCHW)
+                              DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>,
+                              false,
+                              armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dMult2,
-                     DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>,
-                     false,
-                     armnn::DataLayout::NCHW)
+                              DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>,
+                              false,
+                              armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dMult4BFloat16,
-                     DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>,
-                     false,
-                     armnn::DataLayout::NCHW)
+                              DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>,
+                              false,
+                              armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dMult2BFloat16,
-                     DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>,
-                     false,
-                     armnn::DataLayout::NCHW)
+                              DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>,
+                              false,
+                              armnn::DataLayout::NCHW)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1,
-                     DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NCHW)
+                              DepthwiseConvolution2dDepthMul1Test,
+                              true,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1Uint8,
-                     DepthwiseConvolution2dDepthMul1Uint8Test, true, DataLayout::NCHW)
+                              DepthwiseConvolution2dDepthMul1Uint8Test,
+                              true,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1Int16,
-                     DepthwiseConvolution2dDepthMul1Int16Test, true, DataLayout::NCHW)
+                              DepthwiseConvolution2dDepthMul1Int16Test,
+                              true,
+                              DataLayout::NCHW)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1,
-                     DepthwiseConvolution2dDepthMul1Test, false, DataLayout::NCHW)
+                              DepthwiseConvolution2dDepthMul1Test,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1Uint8,
-                     DepthwiseConvolution2dDepthMul1Uint8Test, false, DataLayout::NCHW)
+                              DepthwiseConvolution2dDepthMul1Uint8Test,
+                              false,
+                              DataLayout::NCHW)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1Nhwc,
-                     DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NHWC)
+                              DepthwiseConvolution2dDepthMul1Test,
+                              true,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1Uint8Nhwc,
-                     DepthwiseConvolution2dDepthMul1Uint8Test, true, DataLayout::NHWC)
+                              DepthwiseConvolution2dDepthMul1Uint8Test,
+                              true,
+                              DataLayout::NHWC)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1Nhwc,
-                     DepthwiseConvolution2dDepthMul1Test, false, DataLayout::NHWC)
+                              DepthwiseConvolution2dDepthMul1Test,
+                              false,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1Uint8Nhwc,
-                     DepthwiseConvolution2dDepthMul1Uint8Test, false, DataLayout::NHWC)
+                              DepthwiseConvolution2dDepthMul1Uint8Test,
+                              false,
+                              DataLayout::NHWC)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dAsymmetric,
-                     DepthwiseConvolution2dAsymmetricTest, true, DataLayout::NCHW)
+                              DepthwiseConvolution2dAsymmetricTest,
+                              true,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dAsymmetric,
-                     DepthwiseConvolution2dAsymmetricTest, false, DataLayout::NCHW)
+                              DepthwiseConvolution2dAsymmetricTest,
+                              false,
+                              DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dAsymmetricNhwc,
-                     DepthwiseConvolution2dAsymmetricTest, true, DataLayout::NHWC)
+                              DepthwiseConvolution2dAsymmetricTest,
+                              true,
+                              DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dAsymmetricNhwc,
-                     DepthwiseConvolution2dAsymmetricTest, false, DataLayout::NHWC)
+                              DepthwiseConvolution2dAsymmetricTest,
+                              false,
+                              DataLayout::NHWC)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul64, DepthwiseConvolution2dDepthMul64Test);
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dPerAxisQuantTestNchw, DepthwiseConvolution2dPerAxisQuantTest,
-                     DataLayout::NCHW);
-ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dPerAxisQuantTestNhwc, DepthwiseConvolution2dPerAxisQuantTest,
-                     DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dPerAxisQuantTestNchw,
+                              DepthwiseConvolution2dPerAxisQuantTest,
+                              DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dPerAxisQuantTestNhwc,
+                              DepthwiseConvolution2dPerAxisQuantTest,
+                              DataLayout::NHWC);
 
 // [ Pooling 2D
 //MaxPooling
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
index ad5edde7e6..c1c3916292 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
@@ -19,16 +19,41 @@ RefDepthwiseConvolution2dWorkload::RefDepthwiseConvolution2dWorkload(
         const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
         : RefBaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
 {
-    m_Weight = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Weight));
-    const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo();
-    m_FilterShape = rFilterInfo.GetShape();
-    m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight->Map(true));
+    WorkloadInfo detailsInfo;
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[1]);
 
     if (descriptor.m_Parameters.m_BiasEnabled)
     {
-        m_Bias = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Bias));
-        const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
-        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[2]);
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("RefDepthwiseConvolution2dWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         detailsInfo,
+                                         this->GetGuid());
+}
+
+void RefDepthwiseConvolution2dWorkload::PostAllocationConfigure()
+{
+    PostAllocationConfigure(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefDepthwiseConvolution2dWorkload::PostAllocationConfigure(std::vector<ITensorHandle*> inputs,
+                                                                std::vector<ITensorHandle*> outputs)
+{
+    IgnoreUnused(outputs);
+
+    const TensorInfo& rFilterInfo = GetTensorInfo(inputs[1]);
+    m_FilterShape = rFilterInfo.GetShape();
+    m_FilterDecoder = MakeDecoder<float>(rFilterInfo);
+
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        const TensorInfo& biasInfo = GetTensorInfo(inputs[2]);
+        m_BiasDecoder = MakeDecoder<float>(biasInfo);
     }
 }
 
@@ -39,6 +64,8 @@ void RefDepthwiseConvolution2dWorkload::Execute() const
 
 void RefDepthwiseConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
 {
+    PostAllocationConfigure(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+
     Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
 }
 
@@ -54,6 +81,12 @@ void RefDepthwiseConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inpu
     const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape();
     const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape();
 
+    m_FilterDecoder->Reset(inputs[1]->Map());
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        m_BiasDecoder->Reset(inputs[2]->Map());
+    }
+
     Convolve(inputShape, *inputDecoder, outputShape, *OutputEncoder,
              m_FilterShape, *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(),
              m_Data.m_Parameters.m_DataLayout, m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft,
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
index 5d4b483fa7..1c7de29b37 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp
@@ -17,11 +17,13 @@ public:
     explicit RefDepthwiseConvolution2dWorkload(const DepthwiseConvolution2dQueueDescriptor &descriptor,
                                                const WorkloadInfo &info);
 
+    void PostAllocationConfigure() override;
 
     void Execute() const override;
     void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)  override;
 
 private:
+    void PostAllocationConfigure(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs);
     void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
 
     std::unique_ptr <ScopedTensorHandle> m_Weight;
-- 
cgit v1.2.1