3 files changed, 34 insertions, 28 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index 95d4690d4f..c4edc2022f 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -24,7 +24,7 @@ DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolut
 {
 }
 
-std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph&                  graph,
+std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph,
                                                                        const IWorkloadFactory& factory) const
 {
     // on this level constant data should not be released..
@@ -59,34 +59,40 @@ std::vector<TensorShape>
 DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
 {
     BOOST_ASSERT(inputShapes.size() == 2);
-    const TensorShape& inputShape = inputShapes[0];
-    const TensorShape filterShape = inputShapes[1];
+    const TensorShape& inputShape  = inputShapes[0];
+    const TensorShape& filterShape = inputShapes[1];
 
     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
 
     DataLayoutIndexed dataLayoutIndex(m_Param.m_DataLayout);
 
-    unsigned int inWidth = inputShape[dataLayoutIndex.GetWidthIndex()];
-    unsigned int inHeight = inputShape[dataLayoutIndex.GetHeightIndex()];
-    unsigned int inBatchSize = inputShape[0];
+    unsigned int inputBatchSize = inputShape[0];
+    unsigned int inputHeight    = inputShape[dataLayoutIndex.GetHeightIndex()];
+    unsigned int inputWidth     = inputShape[dataLayoutIndex.GetWidthIndex()];
+    unsigned int inputChannels  = inputShape[dataLayoutIndex.GetChannelsIndex()];
 
-    unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()];
-    unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
-    unsigned int outWidth =  1 + (readWidth / m_Param.m_StrideX);
+    // Expected filter shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
+    // Namely: [ depth multiplier, input channels, filter height, filter width ]
+    // Output channels = input channels * depthMultiplier
 
-    unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()];
-    unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
-    unsigned int outHeight = 1 + (readHeight / m_Param.m_StrideY);
     unsigned int depthMultiplier = filterShape[0];
 
-    unsigned int outChannels = filterShape[dataLayoutIndex.GetChannelsIndex()] * depthMultiplier;
-    unsigned int outBatchSize = inBatchSize;
+    unsigned int filterHeight = filterShape[2];
+    unsigned int readHeight   = (inputHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - filterHeight;
+    unsigned int outputHeight = 1 + (readHeight / m_Param.m_StrideY);
+
+    unsigned int filterWidth = filterShape[3];
+    unsigned int readWidth   = (inputWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - filterWidth;
+    unsigned int outputWidth = 1 + (readWidth / m_Param.m_StrideX);
+
+    unsigned int outputChannels  = inputChannels * depthMultiplier;
+    unsigned int outputBatchSize = inputBatchSize;
 
     TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
-        TensorShape( { outBatchSize, outHeight, outWidth, outChannels } ) :
-        TensorShape( { outBatchSize, outChannels, outHeight, outWidth });
+                              TensorShape{ outputBatchSize, outputHeight, outputWidth, outputChannels } :
+                              TensorShape{ outputBatchSize, outputChannels, outputHeight, outputWidth };
 
-    return std::vector<TensorShape>({ tensorShape });
+    return std::vector<TensorShape>{ tensorShape };
 }
 
 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 3dc18b9e18..f52f6055ca 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -414,18 +414,18 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
 {
     // Creates the layer we're testing.
     DepthwiseConvolution2dDescriptor layerDesc;
-    layerDesc.m_PadLeft         = 1;
-    layerDesc.m_PadRight        = 2;
-    layerDesc.m_PadTop          = 1;
-    layerDesc.m_PadBottom       = 2;
-    layerDesc.m_StrideX         = 1;
-    layerDesc.m_StrideY         = 1;
-    layerDesc.m_BiasEnabled     = false;
-    layerDesc.m_DataLayout = dataLayout;
+    layerDesc.m_PadLeft     = 1;
+    layerDesc.m_PadRight    = 2;
+    layerDesc.m_PadTop      = 1;
+    layerDesc.m_PadBottom   = 2;
+    layerDesc.m_StrideX     = 1;
+    layerDesc.m_StrideY     = 1;
+    layerDesc.m_BiasEnabled = false;
+    layerDesc.m_DataLayout  = dataLayout;
 
     DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
 
-    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType));
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 2, 4, 4}, DataType)); // [ M, I, H, W ]
     layer->m_Weight->Allocate();
 
     // Creates extra layers.
@@ -457,7 +457,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
 
     BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
-    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 2, 4, 4}, DataType)));
 
     // Returns so we can do extra, backend-specific tests.
     return workload;
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index 29d1702c64..80addb4bfd 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -898,7 +898,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dValidateTensorShapesFromInputsNhwc)
 {
     Graph graph;
     const unsigned int inputShape[] = { 1, 3, 3, 2 };
-    const unsigned int weightsShape[] = { 1, 3, 3, 2 };
+    const unsigned int weightsShape[] = { 1, 2, 3, 3 };
     const unsigned int outputShape[] = { 1, 1, 1, 2 };
     CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape, DataLayout::NHWC);