From 9fd373954d64fbae72d1726bbdfc57a18a3a2f6d Mon Sep 17 00:00:00 2001
From: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Date: Mon, 15 Jul 2019 18:04:32 +0100
Subject: IVGCVSW-3455 Support dynamic output shape in
 hal_1_2::HalPolicy::ConvertDepthwiseConv2d

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Iba64a674d772a76ca071553cb423ed870fae9bfd
---
 1.2/HalPolicy.cpp    | 26 +++++++++++++++++++--
 OutputShapeUtils.cpp | 65 +++++++++++++++++++++++++++++++++-------------------
 OutputShapeUtils.hpp |  4 ++++
 3 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index 69cc4713..0c57636c 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -340,7 +340,6 @@ bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model&
     }
 
     const armnn::TensorInfo& inputInfo  = input.GetTensorInfo();
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
     // ArmNN does not currently support non-fixed weights or bias
     // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
@@ -447,6 +446,22 @@ bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model&
     desc.m_BiasEnabled = true;
     armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
 
+    armnn::TensorInfo outputInfo = GetTensorInfoForOperand(*output);
+    if (IsDynamicOutput(outputInfo))
+    {
+        try
+        {
+            ALOGD("Output shape not set, will infer from inputs");
+            outputInfo.SetShape(InferDepthwiseConvolution2dOutputShape(inputInfo.GetShape(),
+                                                                       weights.GetInfo().GetShape(),
+                                                                       desc));
+        }
+        catch (armnn::Exception& e)
+        {
+            return Fail("%s: Could not infer dynamic output shape: %s", __func__, e.what());
+        }
+    }
+
     bool isSupported = false;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsDepthwiseConvolutionSupported,
@@ -457,6 +472,7 @@ bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model&
                                desc,
                                weights.GetInfo(),
                                biases);
+
     if (!isSupported)
     {
         return false;
@@ -464,6 +480,7 @@ bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model&
 
     armnn::IConnectableLayer* startLayer =
         data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+
     if (!startLayer)
     {
         return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__);
@@ -477,7 +494,12 @@ bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model&
 
     input.Connect(startLayer->GetInputSlot(0));
 
-    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation, 0, *endLayer, model, data);
+    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation,
+                                                            0,
+                                                            *endLayer,
+                                                            model,
+                                                            data,
+                                                            armnn::Optional<armnn::TensorInfo>(outputInfo));
 }
 
 bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
diff --git a/OutputShapeUtils.cpp b/OutputShapeUtils.cpp
index 6a9bf90c..285e25f4 100644
--- a/OutputShapeUtils.cpp
+++ b/OutputShapeUtils.cpp
@@ -43,26 +43,15 @@ TensorShape CalculateMaxShape(const TensorShape& inShape0, const TensorShape& in
     return outputShape;
 }
 
-} // namespace annonymous
-
-
-namespace armnn_driver
-{
-
-using namespace armnn;
-
-bool IsDynamicOutput(const TensorInfo& outputInfo)
-{
-    return outputInfo.GetNumElements() == 0u;
-}
-
-TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
-                                          const TensorShape& kernelShape,
-                                          const Convolution2dDescriptor& descriptor)
+template<typename ConvolutionDescriptor>
+TensorShape InferConvolution2dOutputShapeImpl(const TensorShape& inputShape,
+                                              const TensorShape& kernelShape,
+                                              const ConvolutionDescriptor& descriptor,
+                                              bool isDepthwiseConvolution)
 {
     if (inputShape.GetNumDimensions() != 4)
     {
-        throw InvalidArgumentException("Input shape for Convolution2d must be 4D");
+        throw InvalidArgumentException("Input shape must be 4D");
     }
 
     armnnUtils::DataLayoutIndexed dataLayoutIndex(descriptor.m_DataLayout);
@@ -74,30 +63,58 @@ TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
     const unsigned int wInput = inputShape[wIndex];
     const unsigned int hInput = inputShape[hIndex];
 
-    const unsigned int wKernel  = kernelShape[wIndex];
+    const unsigned int wKernel  = isDepthwiseConvolution ? kernelShape[2] : kernelShape[wIndex];
     const unsigned int wDilated = wKernel + (descriptor.m_DilationX - 1) * (wKernel - 1);
 
     const unsigned int wRead   = (wInput + descriptor.m_PadLeft + descriptor.m_PadRight) - wDilated;
     const unsigned int wOutput = 1 + (wRead / descriptor.m_StrideX);
 
-    const unsigned int hKernel  = kernelShape[hIndex];
+    const unsigned int hKernel  = isDepthwiseConvolution ? kernelShape[3] : kernelShape[hIndex];
     const unsigned int hDilated = hKernel + (descriptor.m_DilationY - 1) * (hKernel - 1);
 
     const unsigned int hRead   = (hInput + descriptor.m_PadTop + descriptor.m_PadBottom) - hDilated;
     const unsigned int hOutput = 1 + (hRead / descriptor.m_StrideY);
 
-    const unsigned int batches  = inputShape[0];
-    const unsigned int channels = kernelShape[0];
-
     TensorShape outputShape(4);
-    outputShape[0]      = batches;
-    outputShape[cIndex] = channels;
+    outputShape[0]      = inputShape[0];
+    outputShape[cIndex] = kernelShape[0];
     outputShape[wIndex] = wOutput;
     outputShape[hIndex] = hOutput;
 
+    if (isDepthwiseConvolution)
+    {
+        outputShape[cIndex] *= inputShape[cIndex];
+    }
+
     return outputShape;
 }
 
+} // anonymous namespace
+
+namespace armnn_driver
+{
+
+using namespace armnn;
+
+bool IsDynamicOutput(const TensorInfo& outputInfo)
+{
+    return outputInfo.GetNumElements() == 0u;
+}
+
+TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
+                                          const TensorShape& kernelShape,
+                                          const Convolution2dDescriptor& descriptor)
+{
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, false);
+}
+
+TensorShape InferDepthwiseConvolution2dOutputShape(const TensorShape& inputShape,
+                                                   const TensorShape& kernelShape,
+                                                   const DepthwiseConvolution2dDescriptor& descriptor)
+{
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, true);
+}
+
 TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                     const armnn::TensorShape& input1Shape)
 {
diff --git a/OutputShapeUtils.hpp b/OutputShapeUtils.hpp
index 58686954..bcb43475 100644
--- a/OutputShapeUtils.hpp
+++ b/OutputShapeUtils.hpp
@@ -16,6 +16,10 @@ armnn::TensorShape InferConvolution2dOutputShape(const armnn::TensorShape& input
                                                  const armnn::TensorShape& kernelShape,
                                                  const armnn::Convolution2dDescriptor& descriptor);
 
+armnn::TensorShape InferDepthwiseConvolution2dOutputShape(const armnn::TensorShape& inputShape,
+                                                          const armnn::TensorShape& kernelShape,
+                                                          const armnn::DepthwiseConvolution2dDescriptor& descriptor);
+
 armnn::TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                            const armnn::TensorShape& input1Shape);
 
-- 
cgit v1.2.1