From dc8ed9d75e54e914a970e137900930fa64a0782b Mon Sep 17 00:00:00 2001
From: Samuel Yap <samuel.yap@arm.com>
Date: Mon, 8 Aug 2022 14:07:42 +0100
Subject: IVGCVSW-7105: BatchMatMul Optional Parameter Support

  * Added transpose parameters to pre-transpose each input tensor's slices
  * Added adjoint parameters to pre-adjoint each input tensor's slices
  * Small refactoring (BatchMatMulDescriptor static helpers and BatchMatMulImpl constructor)
  * Updated input validation and output shape inference for parameters
  * Additional layer unit tests for parameters added
  * Versionings incremented

Signed-off-by: Samuel Yap <samuel.yap@arm.com>
Change-Id: Ibe5242a8a5bf604c13de0dc65844fd6c421cc667
---
 src/armnn/Descriptors.cpp             | 115 +++++++++++++++++-----------------
 src/armnn/layers/BatchMatMulLayer.cpp |  27 +++++---
 2 files changed, 77 insertions(+), 65 deletions(-)

(limited to 'src/armnn')
diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp
index f9576271d5..226d121edc 100644
--- a/src/armnn/Descriptors.cpp
+++ b/src/armnn/Descriptors.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #include "armnn/Descriptors.hpp"
@@ -461,80 +461,79 @@ BatchMatMulDescriptor::GetAxesToMul(
     const TensorShape& tensorXShape,
     const TensorShape& tensorYShape)
 {
-    // May refactor to just work on one input per call - makes it less confusing and also
-    // allows more flexibility (i.e. in Layer output shape inference)
-
-    auto xNumDims = tensorXShape.GetNumDimensions();
-    auto yNumDims = tensorYShape.GetNumDimensions();
-
-    std::pair<unsigned int, unsigned int> xAxes = { xNumDims-2, xNumDims-1 };
-    std::pair<unsigned int, unsigned int> yAxes = { yNumDims-2, yNumDims-1 };
-
-    if(desc.m_DataLayoutX.has_value())
-    {
-        switch(desc.m_DataLayoutX.value())
-        {
-            case DataLayout::NDHWC:
-            case DataLayout::NHWC:
-                xAxes.first -= 1;
-                xAxes.second -= 1;
-                break;
-            case DataLayout::NCDHW:
-            case DataLayout::NCHW:
-            default:
-                break;
-        }
-    }
-
-    if(desc.m_DataLayoutY.has_value())
-    {
-        switch(desc.m_DataLayoutY.value())
-        {
-            case DataLayout::NDHWC:
-            case DataLayout::NHWC:
-                yAxes.first -= 1;
-                yAxes.second -= 1;
-                break;
-            case DataLayout::NCDHW:
-            case DataLayout::NCHW:
-            default:
-                break;
-        }
-    }
-
-    return { xAxes, yAxes};
+    return { GetAxesToMul(desc.m_DataLayoutX, tensorXShape),
+             GetAxesToMul(desc.m_DataLayoutY, tensorYShape) };
 }
-
 std::pair<std::vector<unsigned int>, std::vector<unsigned int>> BatchMatMulDescriptor::GetAxesNotMul(
     const BatchMatMulDescriptor& desc,
     const TensorShape& inputXShape,
     const TensorShape& inputYShape)
 {
-    // May refactor to just work on one input per call - makes it less confusing and also
-    // allows more flexibility (i.e. in Layer output shape inference)
-    auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(desc, inputXShape, inputYShape);
+    return { GetAxesNotMul(desc.m_DataLayoutX, inputXShape),
+             GetAxesNotMul(desc.m_DataLayoutY, inputYShape) };
+}
 
-    std::vector<unsigned int> axesXNotMul;
-    std::vector<unsigned int> axesYNotMul;
+std::pair<unsigned int, unsigned int> BatchMatMulDescriptor::GetAxesToMul(
+    DataLayout dataLayout,
+    const TensorShape& tensorShape)
+{
+    auto numDims = tensorShape.GetNumDimensions();
+    std::pair<unsigned int, unsigned int> axes = { numDims-2, numDims-1 };
+    switch(dataLayout)
+    {
+        case DataLayout::NDHWC:
+        case DataLayout::NHWC:
+            axes.first -= 1;
+            axes.second -= 1;
+            break;
+        case DataLayout::NCDHW:
+        case DataLayout::NCHW:
+        default:
+            break;
+    }
+    return axes;
+}
 
-    for(unsigned int i = 0; i < inputXShape.GetNumDimensions(); i++)
+std::vector<unsigned int> BatchMatMulDescriptor::GetAxesNotMul(
+    DataLayout dataLayout,
+    const TensorShape& tensorShape)
+{
+    auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(dataLayout, tensorShape);
+    std::vector<unsigned int> axesNotMul;
+    for(unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
     {
-        if(i == axesToMul.first.first || i == axesToMul.first.second)
+        if(i == axesToMul.first || i == axesToMul.second)
         {
             continue;
         }
-        axesXNotMul.push_back(i);
+        axesNotMul.push_back(i);
     }
-    for(unsigned int i = 0; i < inputYShape.GetNumDimensions(); i++)
+    return axesNotMul;
+}
+
+PermutationVector BatchMatMulDescriptor::GetPermuteVec(
+    DataLayout dataLayout,
+    const TensorShape& tensorShape)
+{
+    std::vector<unsigned int> vec;
+    auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(dataLayout, tensorShape);
+    for(unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
     {
-        if(i == axesToMul.second.first || i == axesToMul.second.second)
+        if(i == axesToMul.first)
         {
-            continue;
+            vec.push_back(i+1);
+        }
+        else if(i == axesToMul.second)
+        {
+            vec.push_back(i-1);
+        }
+        else
+        {
+            vec.push_back(i);
         }
-        axesYNotMul.push_back(i);
     }
-
-    return { axesXNotMul, axesYNotMul };
+    return PermutationVector(vec.data(),
+                             static_cast<unsigned int>(vec.size()));
 }
 
 }
diff --git a/src/armnn/layers/BatchMatMulLayer.cpp b/src/armnn/layers/BatchMatMulLayer.cpp
index 501de2d091..acd089aef8 100644
--- a/src/armnn/layers/BatchMatMulLayer.cpp
+++ b/src/armnn/layers/BatchMatMulLayer.cpp
@@ -5,6 +5,7 @@
 #include "BatchMatMulLayer.hpp"
 
 #include <armnn/backends/WorkloadFactory.hpp>
+#include <armnnUtils/Permute.hpp>
 #include "layers/LayerCloneBase.hpp"
 
 namespace armnn
@@ -36,12 +37,24 @@ std::vector<TensorShape> BatchMatMulLayer::InferOutputShapes(const std::vector<T
     TensorShape inputXShape = inputShapes[0];
     TensorShape inputYShape = inputShapes[1];
 
-    // Note: Take into account what pre-adjoint or pre-transposing will do to the inferred output shape
+    // Adjoint will not affect the resultant shape, as you would be permuting two axes of equal size
+    if(m_Param.m_TransposeX)
+    {
+        auto permuteVec = BatchMatMulDescriptor::GetPermuteVec(m_Param.m_DataLayoutX,
+                                                               inputXShape);
+        inputXShape = armnnUtils::Permuted(inputXShape, permuteVec);
+    }
+    if(m_Param.m_TransposeY)
+    {
+        auto permuteVec = BatchMatMulDescriptor::GetPermuteVec(m_Param.m_DataLayoutY,
+                                                               inputYShape);
+        inputYShape = armnnUtils::Permuted(inputYShape, permuteVec);
+    }
 
     TensorShape& longerInput = inputXShape.GetNumDimensions() >= inputYShape.GetNumDimensions()?
-                               inputXShape:inputYShape;
+                               inputXShape : inputYShape;
     TensorShape& shorterInput = inputXShape.GetNumDimensions() >= inputYShape.GetNumDimensions()?
-                                inputYShape:inputXShape;
+                                inputYShape : inputXShape;
 
     unsigned int inputNumDimsOffset = longerInput.GetNumDimensions() - shorterInput.GetNumDimensions();
 
@@ -49,10 +62,10 @@ std::vector<TensorShape> BatchMatMulLayer::InferOutputShapes(const std::vector<T
 
     std::vector<unsigned int> tensorDimensions(outputNumDimensions, 0);
 
-    auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(m_Param, inputXShape, inputYShape);
-    const auto& longerAxesToMul = (axesToMul.first.first >= axesToMul.second.first &&
-                             axesToMul.first.second >= axesToMul.second.second) ?
-                                 axesToMul.first : axesToMul.second;
+    const auto& longerInputDataLayout = inputXShape.GetNumDimensions() >= inputYShape.GetNumDimensions()?
+                                        m_Param.m_DataLayoutX : m_Param.m_DataLayoutY;
+    auto longerAxesToMul = BatchMatMulDescriptor::GetAxesToMul(longerInputDataLayout,
+                                                               longerInput);
 
     for (unsigned int i = 0; i < outputNumDimensions; ++i)
     {
-- 
cgit v1.2.1