IVGCVSW-6498 Add Support for Batch MatMul to TfLite Delegate

* Creates delegate/src/BatchMatMul.hpp * Add VisitBatchMatMul function * Add BatchMatMul to switch in armnn_delegate * Creates delegate/src/test/BatchMatMulTest.cpp * Creates delegate/src/test/BatchMatMulTestHelper.hpp * Add Int8 and Fp32 unit tests on ref backend * Add BatchMatMul to delegate supported ops Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: I50e61314cf063f986c8a0f7d508847a96953735e
author: Ryan OShea <ryan.oshea3@arm.com> 2022-09-21 16:09:41 +0100
committer: TeresaARM <teresa.charlinreyes@arm.com> 2022-09-22 10:55:33 +0000
commit: 49ed0df12338b1e99674edeee4200acf8c05750e (patch)
tree: 85f0806dde1d8f24c74a986d732e91904da5899a
parent: 9636a9b109fcbc811ec876ba9ca6512b7fbe2ba0 (diff)
download: armnn-49ed0df12338b1e99674edeee4200acf8c05750e.tar.gz
6 files changed, 974 insertions, 0 deletions
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 01dde4ebce..641e8c7157 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -20,6 +20,7 @@ list(APPEND armnnDelegate_sources
         src/DelegateOptions.cpp
         src/Activation.hpp
         src/ArgMinMax.hpp
+        src/BatchMatMul.hpp
         src/BatchSpace.hpp
         src/Comparison.hpp
         src/Convolution.hpp
@@ -138,6 +139,8 @@ if(BUILD_UNIT_TESTS)
         src/test/ArgMinMaxTest.cpp
         src/test/ArgMinMaxTestHelper.hpp
         src/test/ArmnnDelegateTest.cpp
+        src/test/BatchMatMulTest.cpp
+        src/test/BatchMatMulTestHelper.hpp
         src/test/BatchSpaceTest.cpp
         src/test/BatchSpaceTestHelper.hpp
         src/test/CastTest.cpp
diff --git a/delegate/src/BatchMatMul.hpp b/delegate/src/BatchMatMul.hpp
new file mode 100644
index 0000000000..391301e4d7
--- /dev/null
+++ b/delegate/src/BatchMatMul.hpp
@@ -0,0 +1,99 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+    TfLiteStatus VisitBatchMatMulOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+    {
+        TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+        TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+        const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+        const TfLiteTensor& kTfLiteLHSInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+        const TfLiteTensor& kTfLiteRHSInputTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+
+        if (!IsValid(tfLiteContext, kTfLiteLHSInputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        if (!IsValid(tfLiteContext, kTfLiteRHSInputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+
+        if (IsDynamicTensor(kTfLiteLHSInputTensor) || IsDynamicTensor(kTfLiteRHSInputTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+                    operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
+        const TfLiteTensor& kTfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+        if (IsDynamicTensor(kTfLiteOutputTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+                    operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
+        const armnn::TensorInfo& armnnLHSInputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteLHSInputTensor);
+        const armnn::TensorInfo& armnnRHSInputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteRHSInputTensor);
+        const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteOutputTensor, true);
+
+        armnn::BatchMatMulDescriptor descriptor;
+        auto* params = reinterpret_cast<TfLiteBatchMatMulParams *>(tfLiteNode->builtin_data);
+
+        // Tensorflow params are called adjoint, however they are actually just transposes behind the scene. They do
+        // not perform ajoint.
+        descriptor.m_TransposeX = params->adj_x;
+        descriptor.m_TransposeY = params->adj_y;
+
+        // Check if supported
+        bool isSupported = false;
+        auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+        {
+            FORWARD_LAYER_SUPPORT_FUNC("BATCH_MATMUL",
+                                       tfLiteContext,
+                                       IsBatchMatMulSupported,
+                                       delegateData.m_Backends,
+                                       isSupported,
+                                       armnnLHSInputTensorInfo,
+                                       armnnRHSInputTensorInfo,
+                                       outputTensorInfo,
+                                       descriptor);
+        };
+
+        if (!delegateData.m_Network)
+        {
+            validateFunc(outputTensorInfo, isSupported);
+            return isSupported ? kTfLiteOk : kTfLiteError;
+        }
+
+        armnn::IConnectableLayer* layer = delegateData.m_Network->AddBatchMatMulLayer(descriptor);
+        ARMNN_ASSERT(layer != nullptr);
+
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+        outputSlot.SetTensorInfo(outputTensorInfo);
+        Connect(layer, tfLiteNode, delegateData);
+
+        return kTfLiteOk;
+    }
+} // namespace armnnDelegate
+\ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index c041dd1714..21c66fe706 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -9,6 +9,7 @@
 
 #include "Activation.hpp"
 #include "ArgMinMax.hpp"
+#include "BatchMatMul.hpp"
 #include "BatchSpace.hpp"
 #include "Comparison.hpp"
 #include "Convolution.hpp"
@@ -566,6 +567,12 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
                                         tfLiteNode,
                                         nodeIndex,
                                         kTfLiteBuiltinAveragePool2d);
+        case kTfLiteBuiltinBatchMatmul:
+            return VisitBatchMatMulOperator(delegateData,
+                                            tfLiteContext,
+                                            tfLiteNode,
+                                            nodeIndex,
+                                            kTfLiteBuiltinBatchMatmul);
         case kTfLiteBuiltinBatchToSpaceNd:
             return VisitBatchToSpaceNdOperator(delegateData,
                                                tfLiteContext,
diff --git a/delegate/src/test/BatchMatMulTest.cpp b/delegate/src/test/BatchMatMulTest.cpp
new file mode 100644
index 0000000000..5469bc845c
--- /dev/null
+++ b/delegate/src/test/BatchMatMulTest.cpp
@@ -0,0 +1,657 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchMatMulTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+    void BatchMatMul2DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2, 2 };
+        std::vector<int32_t> RHSInputShape { 2, 2 };
+        std::vector<int32_t> outputShape   { 2, 2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<float> RHSInputValues = { 5, 6,
+                                              7, 8  };
+
+        std::vector<float> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+    void BatchMatMul2DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2, 2 };
+        std::vector<int32_t> RHSInputShape { 2, 2 };
+        std::vector<int32_t> outputShape   { 2, 2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<int8_t> RHSInputValues = { 5, 6,
+                                              7, 8  };
+
+        std::vector<int8_t> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,2,2 };
+        std::vector<int32_t> RHSInputShape { 1,2,2 };
+        std::vector<int32_t> outputShape   { 1,2,2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<float> RHSInputValues = { 5, 6,
+                                              7, 8  };
+
+        std::vector<float> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,2,2 };
+        std::vector<int32_t> RHSInputShape { 1,2,2 };
+        std::vector<int32_t> outputShape   { 1,2,2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<int8_t> RHSInputValues = { 5, 6,
+                                              7, 8  };
+
+        std::vector<int8_t> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul4DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,1,2,2 };
+        std::vector<int32_t> RHSInputShape { 1,1,2,2 };
+        std::vector<int32_t> outputShape   { 1,1,2,2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<float> RHSInputValues = { 5, 6,
+                                              7, 8  };
+
+        std::vector<float> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul4DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,1,2,2};
+        std::vector<int32_t> RHSInputShape { 1,1,2,2 };
+        std::vector<int32_t> outputShape   { 1,1,2,2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4 };
+
+        std::vector<int8_t> RHSInputValues = { 5, 6,
+                                              7, 8 };
+
+        std::vector<int8_t> expectedOutputValues = { 19, 22,
+                                                    43, 50 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DFp32BatchTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 2,2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<float> RHSInputValues = { 5, 6,
+                                              7, 8,
+
+                                              13, 14,
+                                              15, 16 };
+
+        std::vector<float> expectedOutputValues = { 19, 22,
+                                                    43, 50,
+
+                                                    267, 286,
+                                                    323, 346 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DInt8BatchTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 2,2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<int8_t> RHSInputValues = { 5, 6,
+                                              7, 8,
+
+                                              1, 2,
+                                              3, 4 };
+
+        std::vector<int8_t> expectedOutputValues = { 19, 22,
+                                                    43, 50,
+
+                                                    39, 58,
+                                                    47, 70 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DFp32BroadcastTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 1,2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<float> RHSInputValues = { 13, 14,
+                                              15, 16 };
+
+        std::vector<float> expectedOutputValues = {  43, 46,
+                                                     99, 106,
+
+                                                     267, 286,
+                                                     323, 346 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3DInt8BroadcastTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 1,2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<int8_t> RHSInputValues = { 1, 2,
+                                               3, 4 };
+
+        std::vector<int8_t> expectedOutputValues = {  7,  10,
+                                                      15, 22,
+
+                                                      39, 58,
+                                                      47, 70 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3D2DFp32BroadcastTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<float> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<float> RHSInputValues = { 13, 14,
+                                              15, 16 };
+
+        std::vector<float> expectedOutputValues = {  43, 46,
+                                                     99, 106,
+
+                                                     267, 286,
+                                                     323, 346 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul3D2DInt8BroadcastTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,2,2 };
+        std::vector<int32_t> RHSInputShape { 2,2 };
+        std::vector<int32_t> outputShape   { 2,2,2 };
+
+        std::vector<int8_t> LHSInputValues = { 1, 2,
+                                              3, 4,
+
+                                              9, 10,
+                                              11, 12 };
+
+        std::vector<int8_t> RHSInputValues = { 1, 2,
+                                               3, 4 };
+
+        std::vector<int8_t> expectedOutputValues = {  7, 10,
+                                                      15, 22,
+
+                                                      39, 58,
+                                                      47, 70 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul2DFp32TinyTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,1 };
+        std::vector<int32_t> RHSInputShape { 1,1 };
+        std::vector<int32_t> outputShape   { 1,1 };
+
+        std::vector<float> LHSInputValues = { 3 };
+
+        std::vector<float> RHSInputValues = { 5 };
+
+        std::vector<float> expectedOutputValues = { 15 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+    void BatchMatMul2DInt8TinyTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 1,1 };
+        std::vector<int32_t> RHSInputShape { 1,1 };
+        std::vector<int32_t> outputShape   { 1,1 };
+
+        std::vector<int8_t> LHSInputValues = { 3 };
+
+        std::vector<int8_t> RHSInputValues = { 5 };
+
+        std::vector<int8_t> expectedOutputValues = { 15 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                                ::tflite::TensorType_INT8,
+                                backends,
+                                LHSInputShape,
+                                RHSInputShape,
+                                outputShape,
+                                LHSInputValues,
+                                RHSInputValues,
+                                expectedOutputValues,
+                                false,
+                                false);
+    }
+
+    void BatchMatMulNonSquareFp32Test(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,5,3 };
+        std::vector<int32_t> RHSInputShape { 2,3,4 };
+        std::vector<int32_t> outputShape   { 2,5,4 };
+
+        std::vector<float> LHSInputValues = { 8, 8, 4,
+                                              6, 1, 3,
+                                              8, 8, 3,
+                                              8, 9, 8,
+                                              5, 4, 4,
+
+                                              1, 8, 5,
+                                              7, 1, 1,
+                                              8, 7, 9,
+                                              3, 2, 7,
+                                              8, 5, 3 };
+
+        std::vector<float> RHSInputValues = { 6, 2, 3, 2,
+                                              6, 2, 2, 8,
+                                              3, 7, 8, 1,
+
+                                              7, 2, 9, 5,
+                                              2, 3, 1, 3,
+                                              2, 7, 7, 5 };
+
+        std::vector<float> expectedOutputValues = { 108, 60, 72, 84,
+                                                    51, 35, 44, 23,
+                                                    105, 53, 64, 83,
+                                                    126, 90, 106, 96,
+                                                    66, 46, 55, 46,
+
+                                                    33, 61, 52, 54,
+                                                    53, 24, 71, 43,
+                                                    88, 100, 142, 106,
+                                                    39, 61, 78, 56,
+                                                    72, 52, 98, 70 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMulNonSquareInt8Test(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 2,5,3 };
+        std::vector<int32_t> RHSInputShape { 2,3,4 };
+        std::vector<int32_t> outputShape   { 2,5,4 };
+
+        std::vector<int8_t> LHSInputValues = { 8, 8, 4,
+                                              6, 1, 3,
+                                              8, 8, 3,
+                                              8, 9, 8,
+                                              5, 4, 4,
+
+                                              1, 8, 5,
+                                              7, 1, 1,
+                                              8, 7, 9,
+                                              3, 2, 7,
+                                              8, 5, 3 };
+
+        std::vector<int8_t> RHSInputValues = { 6, 2, 3, 2,
+                                              6, 2, 2, 8,
+                                              3, 7, 8, 1,
+
+                                              7, 2, 3, 5,
+                                              2, 3, 1, 3,
+                                              2, 7, 7, 5 };
+
+        std::vector<int8_t> expectedOutputValues = { 108, 60, 72, 84,
+                                                    51, 35, 44, 23,
+                                                    105, 53, 64, 83,
+                                                    126, 90, 106, 96,
+                                                    66, 46, 55, 46,
+
+                                                    33, 61, 46, 54,
+                                                    53, 24, 29, 43,
+                                                    88, 100, 94, 106,
+                                                    39, 61, 60, 56,
+                                                    72, 52, 50, 70 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               false,
+                               false);
+    }
+
+    void BatchMatMul2DFp32SimpleAdjointTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 3,3 };
+        std::vector<int32_t> RHSInputShape { 3,3 };
+        std::vector<int32_t> outputShape   { 3,3 };
+
+        std::vector<float> LHSInputValues = { 3, 1, 1,
+                                              1, 3, -1,
+                                              2, 4, 1 };
+
+        std::vector<float> RHSInputValues = { 1, 0, 0,
+                                              0, 1, 0,
+                                              0, 0, 1 };
+
+        std::vector<float> expectedOutputValues = { 3, 1, 2,
+                                                    1, 3, 4,
+                                                    1, -1, 1 };
+
+        BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_FLOAT32,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               true,
+                               false);
+    }
+
+    void BatchMatMul2DInt8SimpleAdjointTest(std::vector<armnn::BackendId>& backends)
+    {
+        // Set input data
+        std::vector<int32_t> LHSInputShape { 3,3 };
+        std::vector<int32_t> RHSInputShape { 3,3 };
+        std::vector<int32_t> outputShape   { 3,3 };
+
+        std::vector<int8_t> LHSInputValues = { 3, 1, 1,
+                                              1, 3, -1,
+                                              2, 4, 1 };
+
+        std::vector<int8_t> RHSInputValues = { 1, 0, 0,
+                                              0, 1, 0,
+                                              0, 0, 1 };
+
+        std::vector<int8_t> expectedOutputValues = { 3, 1, 2,
+                                                     1, 3, 4,
+                                                     1, -1, 1 };
+
+        BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+                               ::tflite::TensorType_INT8,
+                               backends,
+                               LHSInputShape,
+                               RHSInputShape,
+                               outputShape,
+                               LHSInputValues,
+                               RHSInputValues,
+                               expectedOutputValues,
+                               true,
+                               false);
+    }
+
+    TEST_SUITE("BATCH_MATMUL_CpuRefTests")
+    {
+        TEST_CASE("BATCH_MATMUL_Fp32_CpuRefTests")
+        {
+            std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+            BatchMatMul2DFp32SimpleTest       (backends);
+            BatchMatMul3DFp32SimpleTest       (backends);
+            BatchMatMul4DFp32SimpleTest       (backends);
+            BatchMatMul3DFp32BatchTest        (backends);
+            BatchMatMul3DFp32BroadcastTest    (backends);
+            BatchMatMul3D2DFp32BroadcastTest  (backends);
+            BatchMatMul2DFp32TinyTest         (backends);
+            BatchMatMulNonSquareFp32Test      (backends);
+            BatchMatMul2DFp32SimpleAdjointTest(backends);
+        }
+
+        TEST_CASE("BATCH_MATMUL_Int8_CpuRefTests")
+        {
+            std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+            BatchMatMul2DInt8SimpleTest       (backends);
+            BatchMatMul3DInt8SimpleTest       (backends);
+            BatchMatMul4DInt8SimpleTest       (backends);
+            BatchMatMul3DInt8BatchTest        (backends);
+            BatchMatMul3DInt8BroadcastTest    (backends);
+            BatchMatMul3D2DInt8BroadcastTest  (backends);
+            BatchMatMul2DInt8TinyTest         (backends);
+            BatchMatMulNonSquareInt8Test      (backends);
+            BatchMatMul2DInt8SimpleAdjointTest(backends);
+        }
+    }
+
+}
diff --git a/delegate/src/test/BatchMatMulTestHelper.hpp b/delegate/src/test/BatchMatMulTestHelper.hpp
new file mode 100644
index 0000000000..42c1ed6a1e
--- /dev/null
+++ b/delegate/src/test/BatchMatMulTestHelper.hpp
@@ -0,0 +1,206 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+    std::vector<char> CreateBatchMatMulTfLiteModel(
+            tflite::BuiltinOperator bmmOperatorCode,
+            tflite::TensorType tensorType,
+            const std::vector <int32_t>& LHSInputTensorShape,
+            const std::vector <int32_t>& RHSInputTensorShape,
+            const std::vector <int32_t>& outputTensorShape,
+            bool adjX = false,
+            bool adjY = false,
+            float quantScale = 1.0f,
+            int quantOffset  = 0)
+    {
+        using namespace tflite;
+        flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+        std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+        buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+        auto quantizationParameters =
+                CreateQuantizationParameters(flatBufferBuilder,
+                                             0,
+                                             0,
+                                             flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                             flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+        std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+        tensors[0] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(LHSInputTensorShape.data(),
+                                                                          LHSInputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("LHSInput"),
+                                  quantizationParameters);
+
+        tensors[1] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(RHSInputTensorShape.data(),
+                                                                          RHSInputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("RHSInput"),
+                                  quantizationParameters);
+
+        tensors[2] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                          outputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output"),
+                                  quantizationParameters);
+
+        // create operator
+        tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_BatchMatMulOptions;
+        flatbuffers::Offset<void> operatorBuiltinOptions = CreateBatchMatMulOptions(flatBufferBuilder,
+                                                                                    adjX,
+                                                                                    adjY).Union();
+
+        const std::vector<int32_t> operatorInputs{{0, 1}};
+        const std::vector<int32_t> operatorOutputs{2};
+        flatbuffers::Offset <Operator> bmmOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(),
+                                                                       operatorOutputs.size()),
+                               operatorBuiltinOptionsType,
+                               operatorBuiltinOptions);
+
+        const std::vector<int> subgraphInputs{{0, 1}};
+        const std::vector<int> subgraphOutputs{2};
+        flatbuffers::Offset <SubGraph> subgraph =
+                CreateSubGraph(flatBufferBuilder,
+                               flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(),
+                                                                       subgraphOutputs.size()),
+                               flatBufferBuilder.CreateVector(&bmmOperator, 1));
+
+        flatbuffers::Offset <flatbuffers::String> modelDescription =
+                flatBufferBuilder.CreateString("ArmnnDelegate: BatchMatMul Operator Model");
+        flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, bmmOperatorCode);
+
+        flatbuffers::Offset <Model> flatbufferModel =
+                CreateModel(flatBufferBuilder,
+                            TFLITE_SCHEMA_VERSION,
+                            flatBufferBuilder.CreateVector(&operatorCode, 1),
+                            flatBufferBuilder.CreateVector(&subgraph, 1),
+                            modelDescription,
+                            flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+        flatBufferBuilder.Finish(flatbufferModel);
+
+        return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                                 flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+    }
+
+    template <typename T>
+    void BatchMatMulTest(tflite::BuiltinOperator bmmOperatorCode,
+                       tflite::TensorType tensorType,
+                       std::vector<armnn::BackendId>& backends,
+                       std::vector<int32_t>& LHSInputShape,
+                       std::vector<int32_t>& RHSInputShape,
+                       std::vector<int32_t>& outputShape,
+                       std::vector<T>& LHSInputValues,
+                       std::vector<T>& RHSInputValues,
+                       std::vector<T>& expectedOutputValues,
+                       bool adjX = false,
+                       bool adjY = false,
+                       float quantScale = 1.0f,
+                       int quantOffset  = 0)
+    {
+        using namespace tflite;
+        std::vector<char> modelBuffer = CreateBatchMatMulTfLiteModel(bmmOperatorCode,
+                                                                     tensorType,
+                                                                     LHSInputShape,
+                                                                     RHSInputShape,
+                                                                     outputShape,
+                                                                     adjX,
+                                                                     adjY,
+                                                                     quantScale,
+                                                                     quantOffset);
+
+        const Model* tfLiteModel = GetModel(modelBuffer.data());
+        CHECK(tfLiteModel != nullptr);
+        // Create TfLite Interpreters
+        std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+        CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                      (&armnnDelegateInterpreter) == kTfLiteOk);
+        CHECK(armnnDelegateInterpreter != nullptr);
+        CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+        std::unique_ptr<Interpreter> tfLiteInterpreter;
+        CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                      (&tfLiteInterpreter) == kTfLiteOk);
+        CHECK(tfLiteInterpreter != nullptr);
+        CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+        // Create the ArmNN Delegate
+        armnnDelegate::DelegateOptions delegateOptions(backends);
+        std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                 armnnDelegate::TfLiteArmnnDelegateDelete);
+        CHECK(theArmnnDelegate != nullptr);
+        // Modify armnnDelegateInterpreter to use armnnDelegate
+        CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+        // Set input data
+        auto tfLiteDelegateLHSInputId = tfLiteInterpreter->inputs()[0];
+        auto tfLiteDelegateLHSInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateLHSInputId);
+        auto tfLiteDelegateRHSInputId = tfLiteInterpreter->inputs()[1];
+        auto tfLiteDelegateRHSInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateRHSInputId);
+        for (unsigned int i = 0; i < LHSInputValues.size(); ++i)
+        {
+            tfLiteDelegateLHSInputData[i] = LHSInputValues[i];
+        }
+        for (unsigned int i = 0; i < RHSInputValues.size(); ++i)
+        {
+            tfLiteDelegateRHSInputData[i] = RHSInputValues[i];
+        }
+
+        auto armnnDelegateLHSInputId = armnnDelegateInterpreter->inputs()[0];
+        auto armnnDelegateLHSInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateLHSInputId);
+        auto armnnDelegateRHSInputId = armnnDelegateInterpreter->inputs()[1];
+        auto armnnDelegateRHSInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateRHSInputId);
+        for (unsigned int i = 0; i < LHSInputValues.size(); ++i)
+        {
+            armnnDelegateLHSInputData[i] = LHSInputValues[i];
+        }
+        for (unsigned int i = 0; i < RHSInputValues.size(); ++i)
+        {
+            armnnDelegateRHSInputData[i] = RHSInputValues[i];
+        }
+        // Run EnqueueWorkload
+        CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+        CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+        armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter,
+                                         outputShape, expectedOutputValues);
+    }
+
+} // anonymous namespace
+
+
+
+
diff --git a/docs/05_03_delegate.dox b/docs/05_03_delegate.dox
index ee80268469..9d926275a8 100644
--- a/docs/05_03_delegate.dox
+++ b/docs/05_03_delegate.dox
@@ -45,6 +45,8 @@ The Arm NN SDK TensorFlow Lite delegate currently supports the following operato
 
 - AVERAGE_POOL_3D
 
+- BATCH_MATMUL
+
 - BATCH_TO_SPACE_ND
 
 - CAST
author	Ryan OShea <ryan.oshea3@arm.com>	2022-09-21 16:09:41 +0100
committer	TeresaARM <teresa.charlinreyes@arm.com>	2022-09-22 10:55:33 +0000
commit	49ed0df12338b1e99674edeee4200acf8c05750e (patch)
tree	85f0806dde1d8f24c74a986d732e91904da5899a
parent	9636a9b109fcbc811ec876ba9ca6512b7fbe2ba0 (diff)
download	armnn-49ed0df12338b1e99674edeee4200acf8c05750e.tar.gz