aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2022-09-21 16:09:41 +0100
committerTeresaARM <teresa.charlinreyes@arm.com>2022-09-22 10:55:33 +0000
commit49ed0df12338b1e99674edeee4200acf8c05750e (patch)
tree85f0806dde1d8f24c74a986d732e91904da5899a
parent9636a9b109fcbc811ec876ba9ca6512b7fbe2ba0 (diff)
downloadarmnn-49ed0df12338b1e99674edeee4200acf8c05750e.tar.gz
IVGCVSW-6498 Add Support for Batch MatMul to TfLite Delegate
* Creates delegate/src/BatchMatMul.hpp * Add VisitBatchMatMul function * Add BatchMatMul to switch in armnn_delegate * Creates delegate/src/test/BatchMatMulTest.cpp * Creates delegate/src/test/BatchMatMulTestHelper.hpp * Add Int8 and Fp32 unit tests on ref backend * Add BatchMatMul to delegate supported ops Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: I50e61314cf063f986c8a0f7d508847a96953735e
-rw-r--r--delegate/CMakeLists.txt3
-rw-r--r--delegate/src/BatchMatMul.hpp99
-rw-r--r--delegate/src/armnn_delegate.cpp7
-rw-r--r--delegate/src/test/BatchMatMulTest.cpp657
-rw-r--r--delegate/src/test/BatchMatMulTestHelper.hpp206
-rw-r--r--docs/05_03_delegate.dox2
6 files changed, 974 insertions, 0 deletions
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 01dde4ebce..641e8c7157 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -20,6 +20,7 @@ list(APPEND armnnDelegate_sources
src/DelegateOptions.cpp
src/Activation.hpp
src/ArgMinMax.hpp
+ src/BatchMatMul.hpp
src/BatchSpace.hpp
src/Comparison.hpp
src/Convolution.hpp
@@ -138,6 +139,8 @@ if(BUILD_UNIT_TESTS)
src/test/ArgMinMaxTest.cpp
src/test/ArgMinMaxTestHelper.hpp
src/test/ArmnnDelegateTest.cpp
+ src/test/BatchMatMulTest.cpp
+ src/test/BatchMatMulTestHelper.hpp
src/test/BatchSpaceTest.cpp
src/test/BatchSpaceTestHelper.hpp
src/test/CastTest.cpp
diff --git a/delegate/src/BatchMatMul.hpp b/delegate/src/BatchMatMul.hpp
new file mode 100644
index 0000000000..391301e4d7
--- /dev/null
+++ b/delegate/src/BatchMatMul.hpp
@@ -0,0 +1,99 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+ TfLiteStatus VisitBatchMatMulOperator(DelegateData& delegateData,
+ TfLiteContext* tfLiteContext,
+ TfLiteNode* tfLiteNode,
+ int nodeIndex,
+ int32_t operatorCode)
+ {
+ TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+ TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+ const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+ const TfLiteTensor& kTfLiteLHSInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+ const TfLiteTensor& kTfLiteRHSInputTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+
+ if (!IsValid(tfLiteContext, kTfLiteLHSInputTensor, operatorCode, nodeIndex))
+ {
+ return kTfLiteError;
+ }
+ if (!IsValid(tfLiteContext, kTfLiteRHSInputTensor, operatorCode, nodeIndex))
+ {
+ return kTfLiteError;
+ }
+
+ if (IsDynamicTensor(kTfLiteLHSInputTensor) || IsDynamicTensor(kTfLiteRHSInputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+ operatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const TfLiteTensor& kTfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+ if (IsDynamicTensor(kTfLiteOutputTensor))
+ {
+ TF_LITE_MAYBE_KERNEL_LOG(
+ tfLiteContext,
+ "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+ operatorCode, nodeIndex);
+ return kTfLiteError;
+ }
+
+ const armnn::TensorInfo& armnnLHSInputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteLHSInputTensor);
+ const armnn::TensorInfo& armnnRHSInputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteRHSInputTensor);
+ const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(kTfLiteOutputTensor, true);
+
+ armnn::BatchMatMulDescriptor descriptor;
+ auto* params = reinterpret_cast<TfLiteBatchMatMulParams *>(tfLiteNode->builtin_data);
+
+ // Tensorflow params are called adjoint, however they are actually just transposes behind the scene. They do
+ // not perform ajoint.
+ descriptor.m_TransposeX = params->adj_x;
+ descriptor.m_TransposeY = params->adj_y;
+
+ // Check if supported
+ bool isSupported = false;
+ auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC("BATCH_MATMUL",
+ tfLiteContext,
+ IsBatchMatMulSupported,
+ delegateData.m_Backends,
+ isSupported,
+ armnnLHSInputTensorInfo,
+ armnnRHSInputTensorInfo,
+ outputTensorInfo,
+ descriptor);
+ };
+
+ if (!delegateData.m_Network)
+ {
+ validateFunc(outputTensorInfo, isSupported);
+ return isSupported ? kTfLiteOk : kTfLiteError;
+ }
+
+ armnn::IConnectableLayer* layer = delegateData.m_Network->AddBatchMatMulLayer(descriptor);
+ ARMNN_ASSERT(layer != nullptr);
+
+ armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+ outputSlot.SetTensorInfo(outputTensorInfo);
+ Connect(layer, tfLiteNode, delegateData);
+
+ return kTfLiteOk;
+ }
+} // namespace armnnDelegate \ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index c041dd1714..21c66fe706 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -9,6 +9,7 @@
#include "Activation.hpp"
#include "ArgMinMax.hpp"
+#include "BatchMatMul.hpp"
#include "BatchSpace.hpp"
#include "Comparison.hpp"
#include "Convolution.hpp"
@@ -566,6 +567,12 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
tfLiteNode,
nodeIndex,
kTfLiteBuiltinAveragePool2d);
+ case kTfLiteBuiltinBatchMatmul:
+ return VisitBatchMatMulOperator(delegateData,
+ tfLiteContext,
+ tfLiteNode,
+ nodeIndex,
+ kTfLiteBuiltinBatchMatmul);
case kTfLiteBuiltinBatchToSpaceNd:
return VisitBatchToSpaceNdOperator(delegateData,
tfLiteContext,
diff --git a/delegate/src/test/BatchMatMulTest.cpp b/delegate/src/test/BatchMatMulTest.cpp
new file mode 100644
index 0000000000..5469bc845c
--- /dev/null
+++ b/delegate/src/test/BatchMatMulTest.cpp
@@ -0,0 +1,657 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchMatMulTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+ void BatchMatMul2DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2, 2 };
+ std::vector<int32_t> RHSInputShape { 2, 2 };
+ std::vector<int32_t> outputShape { 2, 2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<float> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<float> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+ void BatchMatMul2DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2, 2 };
+ std::vector<int32_t> RHSInputShape { 2, 2 };
+ std::vector<int32_t> outputShape { 2, 2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<int8_t> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,2,2 };
+ std::vector<int32_t> RHSInputShape { 1,2,2 };
+ std::vector<int32_t> outputShape { 1,2,2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<float> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<float> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,2,2 };
+ std::vector<int32_t> RHSInputShape { 1,2,2 };
+ std::vector<int32_t> outputShape { 1,2,2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<int8_t> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul4DFp32SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,1,2,2 };
+ std::vector<int32_t> RHSInputShape { 1,1,2,2 };
+ std::vector<int32_t> outputShape { 1,1,2,2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<float> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<float> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul4DInt8SimpleTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,1,2,2};
+ std::vector<int32_t> RHSInputShape { 1,1,2,2 };
+ std::vector<int32_t> outputShape { 1,1,2,2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> RHSInputValues = { 5, 6,
+ 7, 8 };
+
+ std::vector<int8_t> expectedOutputValues = { 19, 22,
+ 43, 50 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DFp32BatchTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 2,2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<float> RHSInputValues = { 5, 6,
+ 7, 8,
+
+ 13, 14,
+ 15, 16 };
+
+ std::vector<float> expectedOutputValues = { 19, 22,
+ 43, 50,
+
+ 267, 286,
+ 323, 346 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DInt8BatchTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 2,2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<int8_t> RHSInputValues = { 5, 6,
+ 7, 8,
+
+ 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> expectedOutputValues = { 19, 22,
+ 43, 50,
+
+ 39, 58,
+ 47, 70 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DFp32BroadcastTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 1,2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<float> RHSInputValues = { 13, 14,
+ 15, 16 };
+
+ std::vector<float> expectedOutputValues = { 43, 46,
+ 99, 106,
+
+ 267, 286,
+ 323, 346 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3DInt8BroadcastTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 1,2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<int8_t> RHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> expectedOutputValues = { 7, 10,
+ 15, 22,
+
+ 39, 58,
+ 47, 70 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3D2DFp32BroadcastTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<float> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<float> RHSInputValues = { 13, 14,
+ 15, 16 };
+
+ std::vector<float> expectedOutputValues = { 43, 46,
+ 99, 106,
+
+ 267, 286,
+ 323, 346 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul3D2DInt8BroadcastTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,2,2 };
+ std::vector<int32_t> RHSInputShape { 2,2 };
+ std::vector<int32_t> outputShape { 2,2,2 };
+
+ std::vector<int8_t> LHSInputValues = { 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12 };
+
+ std::vector<int8_t> RHSInputValues = { 1, 2,
+ 3, 4 };
+
+ std::vector<int8_t> expectedOutputValues = { 7, 10,
+ 15, 22,
+
+ 39, 58,
+ 47, 70 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul2DFp32TinyTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,1 };
+ std::vector<int32_t> RHSInputShape { 1,1 };
+ std::vector<int32_t> outputShape { 1,1 };
+
+ std::vector<float> LHSInputValues = { 3 };
+
+ std::vector<float> RHSInputValues = { 5 };
+
+ std::vector<float> expectedOutputValues = { 15 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+ void BatchMatMul2DInt8TinyTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 1,1 };
+ std::vector<int32_t> RHSInputShape { 1,1 };
+ std::vector<int32_t> outputShape { 1,1 };
+
+ std::vector<int8_t> LHSInputValues = { 3 };
+
+ std::vector<int8_t> RHSInputValues = { 5 };
+
+ std::vector<int8_t> expectedOutputValues = { 15 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMulNonSquareFp32Test(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,5,3 };
+ std::vector<int32_t> RHSInputShape { 2,3,4 };
+ std::vector<int32_t> outputShape { 2,5,4 };
+
+ std::vector<float> LHSInputValues = { 8, 8, 4,
+ 6, 1, 3,
+ 8, 8, 3,
+ 8, 9, 8,
+ 5, 4, 4,
+
+ 1, 8, 5,
+ 7, 1, 1,
+ 8, 7, 9,
+ 3, 2, 7,
+ 8, 5, 3 };
+
+ std::vector<float> RHSInputValues = { 6, 2, 3, 2,
+ 6, 2, 2, 8,
+ 3, 7, 8, 1,
+
+ 7, 2, 9, 5,
+ 2, 3, 1, 3,
+ 2, 7, 7, 5 };
+
+ std::vector<float> expectedOutputValues = { 108, 60, 72, 84,
+ 51, 35, 44, 23,
+ 105, 53, 64, 83,
+ 126, 90, 106, 96,
+ 66, 46, 55, 46,
+
+ 33, 61, 52, 54,
+ 53, 24, 71, 43,
+ 88, 100, 142, 106,
+ 39, 61, 78, 56,
+ 72, 52, 98, 70 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMulNonSquareInt8Test(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 2,5,3 };
+ std::vector<int32_t> RHSInputShape { 2,3,4 };
+ std::vector<int32_t> outputShape { 2,5,4 };
+
+ std::vector<int8_t> LHSInputValues = { 8, 8, 4,
+ 6, 1, 3,
+ 8, 8, 3,
+ 8, 9, 8,
+ 5, 4, 4,
+
+ 1, 8, 5,
+ 7, 1, 1,
+ 8, 7, 9,
+ 3, 2, 7,
+ 8, 5, 3 };
+
+ std::vector<int8_t> RHSInputValues = { 6, 2, 3, 2,
+ 6, 2, 2, 8,
+ 3, 7, 8, 1,
+
+ 7, 2, 3, 5,
+ 2, 3, 1, 3,
+ 2, 7, 7, 5 };
+
+ std::vector<int8_t> expectedOutputValues = { 108, 60, 72, 84,
+ 51, 35, 44, 23,
+ 105, 53, 64, 83,
+ 126, 90, 106, 96,
+ 66, 46, 55, 46,
+
+ 33, 61, 46, 54,
+ 53, 24, 29, 43,
+ 88, 100, 94, 106,
+ 39, 61, 60, 56,
+ 72, 52, 50, 70 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ false,
+ false);
+ }
+
+ void BatchMatMul2DFp32SimpleAdjointTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 3,3 };
+ std::vector<int32_t> RHSInputShape { 3,3 };
+ std::vector<int32_t> outputShape { 3,3 };
+
+ std::vector<float> LHSInputValues = { 3, 1, 1,
+ 1, 3, -1,
+ 2, 4, 1 };
+
+ std::vector<float> RHSInputValues = { 1, 0, 0,
+ 0, 1, 0,
+ 0, 0, 1 };
+
+ std::vector<float> expectedOutputValues = { 3, 1, 2,
+ 1, 3, 4,
+ 1, -1, 1 };
+
+ BatchMatMulTest<float>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_FLOAT32,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ true,
+ false);
+ }
+
+ void BatchMatMul2DInt8SimpleAdjointTest(std::vector<armnn::BackendId>& backends)
+ {
+ // Set input data
+ std::vector<int32_t> LHSInputShape { 3,3 };
+ std::vector<int32_t> RHSInputShape { 3,3 };
+ std::vector<int32_t> outputShape { 3,3 };
+
+ std::vector<int8_t> LHSInputValues = { 3, 1, 1,
+ 1, 3, -1,
+ 2, 4, 1 };
+
+ std::vector<int8_t> RHSInputValues = { 1, 0, 0,
+ 0, 1, 0,
+ 0, 0, 1 };
+
+ std::vector<int8_t> expectedOutputValues = { 3, 1, 2,
+ 1, 3, 4,
+ 1, -1, 1 };
+
+ BatchMatMulTest<int8_t>(tflite::BuiltinOperator_BATCH_MATMUL,
+ ::tflite::TensorType_INT8,
+ backends,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ LHSInputValues,
+ RHSInputValues,
+ expectedOutputValues,
+ true,
+ false);
+ }
+
+ TEST_SUITE("BATCH_MATMUL_CpuRefTests")
+ {
+ TEST_CASE("BATCH_MATMUL_Fp32_CpuRefTests")
+ {
+ std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+ BatchMatMul2DFp32SimpleTest (backends);
+ BatchMatMul3DFp32SimpleTest (backends);
+ BatchMatMul4DFp32SimpleTest (backends);
+ BatchMatMul3DFp32BatchTest (backends);
+ BatchMatMul3DFp32BroadcastTest (backends);
+ BatchMatMul3D2DFp32BroadcastTest (backends);
+ BatchMatMul2DFp32TinyTest (backends);
+ BatchMatMulNonSquareFp32Test (backends);
+ BatchMatMul2DFp32SimpleAdjointTest(backends);
+ }
+
+ TEST_CASE("BATCH_MATMUL_Int8_CpuRefTests")
+ {
+ std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+ BatchMatMul2DInt8SimpleTest (backends);
+ BatchMatMul3DInt8SimpleTest (backends);
+ BatchMatMul4DInt8SimpleTest (backends);
+ BatchMatMul3DInt8BatchTest (backends);
+ BatchMatMul3DInt8BroadcastTest (backends);
+ BatchMatMul3D2DInt8BroadcastTest (backends);
+ BatchMatMul2DInt8TinyTest (backends);
+ BatchMatMulNonSquareInt8Test (backends);
+ BatchMatMul2DInt8SimpleAdjointTest(backends);
+ }
+ }
+
+}
diff --git a/delegate/src/test/BatchMatMulTestHelper.hpp b/delegate/src/test/BatchMatMulTestHelper.hpp
new file mode 100644
index 0000000000..42c1ed6a1e
--- /dev/null
+++ b/delegate/src/test/BatchMatMulTestHelper.hpp
@@ -0,0 +1,206 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+ std::vector<char> CreateBatchMatMulTfLiteModel(
+ tflite::BuiltinOperator bmmOperatorCode,
+ tflite::TensorType tensorType,
+ const std::vector <int32_t>& LHSInputTensorShape,
+ const std::vector <int32_t>& RHSInputTensorShape,
+ const std::vector <int32_t>& outputTensorShape,
+ bool adjX = false,
+ bool adjY = false,
+ float quantScale = 1.0f,
+ int quantOffset = 0)
+ {
+ using namespace tflite;
+ flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+ std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+ buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+ auto quantizationParameters =
+ CreateQuantizationParameters(flatBufferBuilder,
+ 0,
+ 0,
+ flatBufferBuilder.CreateVector<float>({ quantScale }),
+ flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+ std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+ tensors[0] = CreateTensor(flatBufferBuilder,
+ flatBufferBuilder.CreateVector<int32_t>(LHSInputTensorShape.data(),
+ LHSInputTensorShape.size()),
+ tensorType,
+ 0,
+ flatBufferBuilder.CreateString("LHSInput"),
+ quantizationParameters);
+
+ tensors[1] = CreateTensor(flatBufferBuilder,
+ flatBufferBuilder.CreateVector<int32_t>(RHSInputTensorShape.data(),
+ RHSInputTensorShape.size()),
+ tensorType,
+ 0,
+ flatBufferBuilder.CreateString("RHSInput"),
+ quantizationParameters);
+
+ tensors[2] = CreateTensor(flatBufferBuilder,
+ flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+ outputTensorShape.size()),
+ tensorType,
+ 0,
+ flatBufferBuilder.CreateString("output"),
+ quantizationParameters);
+
+ // create operator
+ tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_BatchMatMulOptions;
+ flatbuffers::Offset<void> operatorBuiltinOptions = CreateBatchMatMulOptions(flatBufferBuilder,
+ adjX,
+ adjY).Union();
+
+ const std::vector<int32_t> operatorInputs{{0, 1}};
+ const std::vector<int32_t> operatorOutputs{2};
+ flatbuffers::Offset <Operator> bmmOperator =
+ CreateOperator(flatBufferBuilder,
+ 0,
+ flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+ flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(),
+ operatorOutputs.size()),
+ operatorBuiltinOptionsType,
+ operatorBuiltinOptions);
+
+ const std::vector<int> subgraphInputs{{0, 1}};
+ const std::vector<int> subgraphOutputs{2};
+ flatbuffers::Offset <SubGraph> subgraph =
+ CreateSubGraph(flatBufferBuilder,
+ flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+ flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+ flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(),
+ subgraphOutputs.size()),
+ flatBufferBuilder.CreateVector(&bmmOperator, 1));
+
+ flatbuffers::Offset <flatbuffers::String> modelDescription =
+ flatBufferBuilder.CreateString("ArmnnDelegate: BatchMatMul Operator Model");
+ flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, bmmOperatorCode);
+
+ flatbuffers::Offset <Model> flatbufferModel =
+ CreateModel(flatBufferBuilder,
+ TFLITE_SCHEMA_VERSION,
+ flatBufferBuilder.CreateVector(&operatorCode, 1),
+ flatBufferBuilder.CreateVector(&subgraph, 1),
+ modelDescription,
+ flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+ flatBufferBuilder.Finish(flatbufferModel);
+
+ return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+ flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+ }
+
+ template <typename T>
+ void BatchMatMulTest(tflite::BuiltinOperator bmmOperatorCode,
+ tflite::TensorType tensorType,
+ std::vector<armnn::BackendId>& backends,
+ std::vector<int32_t>& LHSInputShape,
+ std::vector<int32_t>& RHSInputShape,
+ std::vector<int32_t>& outputShape,
+ std::vector<T>& LHSInputValues,
+ std::vector<T>& RHSInputValues,
+ std::vector<T>& expectedOutputValues,
+ bool adjX = false,
+ bool adjY = false,
+ float quantScale = 1.0f,
+ int quantOffset = 0)
+ {
+ using namespace tflite;
+ std::vector<char> modelBuffer = CreateBatchMatMulTfLiteModel(bmmOperatorCode,
+ tensorType,
+ LHSInputShape,
+ RHSInputShape,
+ outputShape,
+ adjX,
+ adjY,
+ quantScale,
+ quantOffset);
+
+ const Model* tfLiteModel = GetModel(modelBuffer.data());
+ CHECK(tfLiteModel != nullptr);
+ // Create TfLite Interpreters
+ std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+ CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+ (&armnnDelegateInterpreter) == kTfLiteOk);
+ CHECK(armnnDelegateInterpreter != nullptr);
+ CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+ std::unique_ptr<Interpreter> tfLiteInterpreter;
+ CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+ (&tfLiteInterpreter) == kTfLiteOk);
+ CHECK(tfLiteInterpreter != nullptr);
+ CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+ // Create the ArmNN Delegate
+ armnnDelegate::DelegateOptions delegateOptions(backends);
+ std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+ theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+ armnnDelegate::TfLiteArmnnDelegateDelete);
+ CHECK(theArmnnDelegate != nullptr);
+ // Modify armnnDelegateInterpreter to use armnnDelegate
+ CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+ // Set input data
+ auto tfLiteDelegateLHSInputId = tfLiteInterpreter->inputs()[0];
+ auto tfLiteDelegateLHSInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateLHSInputId);
+ auto tfLiteDelegateRHSInputId = tfLiteInterpreter->inputs()[1];
+ auto tfLiteDelegateRHSInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateRHSInputId);
+ for (unsigned int i = 0; i < LHSInputValues.size(); ++i)
+ {
+ tfLiteDelegateLHSInputData[i] = LHSInputValues[i];
+ }
+ for (unsigned int i = 0; i < RHSInputValues.size(); ++i)
+ {
+ tfLiteDelegateRHSInputData[i] = RHSInputValues[i];
+ }
+
+ auto armnnDelegateLHSInputId = armnnDelegateInterpreter->inputs()[0];
+ auto armnnDelegateLHSInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateLHSInputId);
+ auto armnnDelegateRHSInputId = armnnDelegateInterpreter->inputs()[1];
+ auto armnnDelegateRHSInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateRHSInputId);
+ for (unsigned int i = 0; i < LHSInputValues.size(); ++i)
+ {
+ armnnDelegateLHSInputData[i] = LHSInputValues[i];
+ }
+ for (unsigned int i = 0; i < RHSInputValues.size(); ++i)
+ {
+ armnnDelegateRHSInputData[i] = RHSInputValues[i];
+ }
+ // Run EnqueueWorkload
+ CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+ CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+ armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter,
+ outputShape, expectedOutputValues);
+ }
+
+} // anonymous namespace
+
+
+
+
diff --git a/docs/05_03_delegate.dox b/docs/05_03_delegate.dox
index ee80268469..9d926275a8 100644
--- a/docs/05_03_delegate.dox
+++ b/docs/05_03_delegate.dox
@@ -45,6 +45,8 @@ The Arm NN SDK TensorFlow Lite delegate currently supports the following operato
- AVERAGE_POOL_3D
+- BATCH_MATMUL
+
- BATCH_TO_SPACE_ND
- CAST