1 files changed, 41 insertions, 24 deletions
diff --git a/OutputShapeUtils.cpp b/OutputShapeUtils.cpp
index 6a9bf90c..285e25f4 100644
--- a/OutputShapeUtils.cpp
+++ b/OutputShapeUtils.cpp
@@ -43,26 +43,15 @@ TensorShape CalculateMaxShape(const TensorShape& inShape0, const TensorShape& in
     return outputShape;
 }
 
-} // namespace annonymous
-
-
-namespace armnn_driver
-{
-
-using namespace armnn;
-
-bool IsDynamicOutput(const TensorInfo& outputInfo)
-{
-    return outputInfo.GetNumElements() == 0u;
-}
-
-TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
-                                          const TensorShape& kernelShape,
-                                          const Convolution2dDescriptor& descriptor)
+template<typename ConvolutionDescriptor>
+TensorShape InferConvolution2dOutputShapeImpl(const TensorShape& inputShape,
+                                              const TensorShape& kernelShape,
+                                              const ConvolutionDescriptor& descriptor,
+                                              bool isDepthwiseConvolution)
 {
     if (inputShape.GetNumDimensions() != 4)
     {
-        throw InvalidArgumentException("Input shape for Convolution2d must be 4D");
+        throw InvalidArgumentException("Input shape must be 4D");
     }
 
     armnnUtils::DataLayoutIndexed dataLayoutIndex(descriptor.m_DataLayout);
@@ -74,30 +63,58 @@ TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
     const unsigned int wInput = inputShape[wIndex];
     const unsigned int hInput = inputShape[hIndex];
 
-    const unsigned int wKernel  = kernelShape[wIndex];
+    const unsigned int wKernel  = isDepthwiseConvolution ? kernelShape[2] : kernelShape[wIndex];
     const unsigned int wDilated = wKernel + (descriptor.m_DilationX - 1) * (wKernel - 1);
 
     const unsigned int wRead   = (wInput + descriptor.m_PadLeft + descriptor.m_PadRight) - wDilated;
     const unsigned int wOutput = 1 + (wRead / descriptor.m_StrideX);
 
-    const unsigned int hKernel  = kernelShape[hIndex];
+    const unsigned int hKernel  = isDepthwiseConvolution ? kernelShape[3] : kernelShape[hIndex];
     const unsigned int hDilated = hKernel + (descriptor.m_DilationY - 1) * (hKernel - 1);
 
     const unsigned int hRead   = (hInput + descriptor.m_PadTop + descriptor.m_PadBottom) - hDilated;
     const unsigned int hOutput = 1 + (hRead / descriptor.m_StrideY);
 
-    const unsigned int batches  = inputShape[0];
-    const unsigned int channels = kernelShape[0];
-
     TensorShape outputShape(4);
-    outputShape[0]      = batches;
-    outputShape[cIndex] = channels;
+    outputShape[0]      = inputShape[0];
+    outputShape[cIndex] = kernelShape[0];
     outputShape[wIndex] = wOutput;
     outputShape[hIndex] = hOutput;
 
+    if (isDepthwiseConvolution)
+    {
+        outputShape[cIndex] *= inputShape[cIndex];
+    }
+
     return outputShape;
 }
 
+} // anonymous namespace
+
+namespace armnn_driver
+{
+
+using namespace armnn;
+
+bool IsDynamicOutput(const TensorInfo& outputInfo)
+{
+    return outputInfo.GetNumElements() == 0u;
+}
+
+TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
+                                          const TensorShape& kernelShape,
+                                          const Convolution2dDescriptor& descriptor)
+{
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, false);
+}
+
+TensorShape InferDepthwiseConvolution2dOutputShape(const TensorShape& inputShape,
+                                                   const TensorShape& kernelShape,
+                                                   const DepthwiseConvolution2dDescriptor& descriptor)
+{
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, true);
+}
+
 TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                     const armnn::TensorShape& input1Shape)
 {