aboutsummaryrefslogtreecommitdiff
path: root/src/backends/backendsCommon
diff options
context:
space:
mode:
authorSamuel Yap <samuel.yap@arm.com>2022-07-06 15:36:03 +0100
committerNikhil Raj <nikhil.raj@arm.com>2022-07-27 15:58:31 +0100
commit6b47809e7d6c55d20a05d863ce2f09159f381f85 (patch)
treec33e5820f89e359c80d8773288e8adb075735039 /src/backends/backendsCommon
parent919ec71ea7f44bb2d284eb88cda511c2424358b2 (diff)
downloadarmnn-6b47809e7d6c55d20a05d863ce2f09159f381f85.tar.gz
IVGCVSW-7109: Add Batch MatMul front end support - Reference
* Descriptors added for BatchMatMul * Layer definition added * Input validation added (will likely change when opt. param support comes in) * Ref workload implementation for BatchMatMul added (will also change with opt. param support) * Ref layer tests made for BatchMatMul * CMake and other build files updated Signed-off-by: Samuel Yap <samuel.yap@arm.com> Change-Id: Ic885301da543ee0fbe7922b85e7f9658c4efc617
Diffstat (limited to 'src/backends/backendsCommon')
-rw-r--r--src/backends/backendsCommon/LayerSupportRules.hpp8
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp227
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp16
-rw-r--r--src/backends/backendsCommon/common.mk1
-rw-r--r--src/backends/backendsCommon/test/CMakeLists.txt2
-rw-r--r--src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp2
-rw-r--r--src/backends/backendsCommon/test/LayerTests.hpp1
-rw-r--r--src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp1010
-rw-r--r--src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp85
9 files changed, 1352 insertions, 0 deletions
diff --git a/src/backends/backendsCommon/LayerSupportRules.hpp b/src/backends/backendsCommon/LayerSupportRules.hpp
index e616ecf022..a83fd62867 100644
--- a/src/backends/backendsCommon/LayerSupportRules.hpp
+++ b/src/backends/backendsCommon/LayerSupportRules.hpp
@@ -186,4 +186,12 @@ struct TensorNumDimensionsAreCorrect : public Rule
}
};
+struct TensorNumDimensionsAreGreaterOrEqualTo : public Rule
+{
+ TensorNumDimensionsAreGreaterOrEqualTo(const TensorInfo& info, unsigned int numDimensionsToCompare)
+ {
+ m_Res = info.GetNumDimensions() >= numDimensionsToCompare;
+ }
+};
+
} //namespace armnn \ No newline at end of file
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 606821b5e5..9a4c60f551 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -4143,5 +4143,232 @@ void UnidirectionalSequenceLstmQueueDescriptor::Validate(const WorkloadInfo& wor
}
}
+void BatchMatMulQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ const std::string descriptorName{"BatchMatMulDescriptor"};
+
+ ValidateNumInputs(workloadInfo, descriptorName, 2);
+ ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+ // Inputs must be: both 2D+
+ // For inputs X and Y whose dimensions to be multiplied are (M,N) and (I,J) respectively,
+ // axes N and I must be the same size
+
+ const auto& inputTensorXInfo = workloadInfo.m_InputTensorInfos[0];
+ const auto& inputTensorYInfo = workloadInfo.m_InputTensorInfos[1];
+ const auto& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+ std::vector<DataType> supportedTypes =
+ {
+ DataType::BFloat16,
+ DataType::Float16,
+ DataType::Float32,
+ DataType::QAsymmS8,
+ DataType::QAsymmU8,
+ DataType::QSymmS16
+ };
+
+ ValidateDataTypes(inputTensorXInfo, supportedTypes, descriptorName);
+ ValidateDataTypes(inputTensorYInfo, supportedTypes, descriptorName);
+ ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName);
+
+ if ((inputTensorXInfo.GetNumDimensions() < 2) ||
+ (inputTensorYInfo.GetNumDimensions() < 2))
+ {
+ throw InvalidArgumentException(descriptorName + ": Input tensors are not 2D or greater.");
+ }
+
+ if(m_Parameters.m_DataLayoutX.has_value())
+ {
+ switch(m_Parameters.m_DataLayoutX.value())
+ {
+ case DataLayout::NCHW:
+ case DataLayout::NHWC:
+ if(inputTensorXInfo.GetNumDimensions() != 4)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Input tensor X does not have the correct "
+ "number of dimensions for the Data Layout that it has been assigned.");
+ }
+ break;
+ case DataLayout::NCDHW:
+ case DataLayout::NDHWC:
+ if(inputTensorXInfo.GetNumDimensions() != 5)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Input tensor X does not have the correct "
+ "number of dimensions for the Data Layout that it has been assigned.");
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if(m_Parameters.m_DataLayoutY.has_value())
+ {
+ switch(m_Parameters.m_DataLayoutY.value())
+ {
+ case DataLayout::NCHW:
+ case DataLayout::NHWC:
+ if(inputTensorYInfo.GetNumDimensions() != 4)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Input tensor Y does not have the correct "
+ "number of dimensions for the Data Layout that it has been assigned.");
+ }
+ break;
+ case DataLayout::NCDHW:
+ case DataLayout::NDHWC:
+ if(inputTensorYInfo.GetNumDimensions() != 5)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Input tensor Y does not have the correct "
+ "number of dimensions for the Data Layout that it has been assigned.");
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(m_Parameters,
+ inputTensorXInfo.GetShape(),
+ inputTensorYInfo.GetShape());
+
+ if(inputTensorXInfo.GetShape()[axesToMul.first.second]
+ != inputTensorYInfo.GetShape()[axesToMul.second.first])
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": The final axis of input tensor X must be the same size as "
+ "the second last axis of input tensor Y.");
+ }
+
+ auto axesNotMul = BatchMatMulDescriptor::GetAxesNotMul(m_Parameters,
+ inputTensorXInfo.GetShape(),
+ inputTensorYInfo.GetShape());
+
+ { // Separate scope so we don't pollute the rest of the scope with our temp variables
+ // e.g. NHWC isnt compatible with NCHW as of now
+ DataLayout xLayout;
+ DataLayout yLayout;
+
+ if(m_Parameters.m_DataLayoutX == EmptyOptional())
+ {
+ xLayout = DataLayout::NCHW; // Not equivalent - I'm just concerned with the last 2 axes
+ }
+ else
+ {
+ xLayout = m_Parameters.m_DataLayoutX.value();
+ }
+
+ if(m_Parameters.m_DataLayoutY == EmptyOptional())
+ {
+ yLayout = DataLayout::NCHW;
+ }
+ else
+ {
+ yLayout = m_Parameters.m_DataLayoutY.value();
+ }
+
+ if(xLayout == DataLayout::NCHW || xLayout == DataLayout::NCDHW)
+ {
+ if(yLayout == DataLayout::NHWC || yLayout == DataLayout::NDHWC)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid input tensor data layout combination.");
+ }
+ }
+ if(yLayout == DataLayout::NCHW || yLayout == DataLayout::NCDHW)
+ {
+ if(xLayout == DataLayout::NHWC || xLayout == DataLayout::NDHWC)
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid input tensor data layout combination.");
+ }
+ }
+ }
+
+ // Simulate aligning the ends of the matrix dims and prepending 1's to the beginning of the shorter one
+ unsigned int outputTensorDimSize = std::max(inputTensorXInfo.GetNumDimensions(),
+ inputTensorYInfo.GetNumDimensions());
+ if(outputTensorDimSize-2 > 0)
+ {
+ TensorInfo tiXNotMul = TensorInfo(TensorShape(outputTensorDimSize-2),
+ DataType::Float32);
+ TensorInfo tiYNotMul = TensorInfo(TensorShape(outputTensorDimSize-2),
+ DataType::Float32);
+ TensorInfo tiOutNotMul = TensorInfo(TensorShape(outputTensorDimSize-2),
+ DataType::Float32);
+
+ auto doAxisExtension = [&](std::vector<unsigned int> axisIndices, TensorInfo& ti)
+ {
+ auto sizeDiff = (outputTensorDimSize-2) - axisIndices.size();
+
+ for(unsigned int i = 0; i < sizeDiff; i++)
+ {
+ axisIndices.insert(axisIndices.begin(), 1);
+ }
+
+ for(unsigned int i = 0; i < ti.GetNumDimensions(); i++)
+ {
+ ti.GetShape()[i] = inputTensorXInfo.GetShape()[i];
+ }
+ };
+
+ doAxisExtension(axesNotMul.first, tiXNotMul);
+ doAxisExtension(axesNotMul.second, tiYNotMul);
+
+ for(unsigned int i = 0; i < tiOutNotMul.GetNumDimensions(); i++)
+ {
+ tiOutNotMul.GetShape()[i] = std::max(tiXNotMul.GetShape()[i],
+ tiYNotMul.GetShape()[i]);
+ }
+
+ ValidateBroadcastTensorShapesMatch(tiXNotMul,
+ tiYNotMul,
+ tiOutNotMul,
+ descriptorName,
+ "input_X",
+ "input_Y");
+ }
+
+ // Also check descriptor parameter validity
+ // This will eventually be moved to the start of the function as explained below
+ if ((!m_Parameters.m_TransposeX.empty() && !m_Parameters.m_AdjointX.empty()) ||
+ (!m_Parameters.m_TransposeY.empty() && !m_Parameters.m_AdjointY.empty()))
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid descriptor parameters - Transpose and Adjoint "
+ "vectors cannot both be true for a given input tensor.");
+ }
+
+ if(m_Parameters.m_TransposeX.size() != 0 && m_Parameters.m_TransposeX.size() != inputTensorXInfo.GetNumDimensions())
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid descriptor parameter - Transpose X vector must be "
+ "the same size as tensor input X's dimensionality.");
+ }
+ if(m_Parameters.m_AdjointX.size() != 0 && m_Parameters.m_AdjointX.size() != inputTensorXInfo.GetNumDimensions())
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid descriptor parameter - Adjoint X vector must be "
+ "the same size as tensor input X's dimensionality.");
+ }
+ if(m_Parameters.m_TransposeY.size() != 0 && m_Parameters.m_TransposeY.size() != inputTensorYInfo.GetNumDimensions())
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid descriptor parameter - Transpose Y vector must be "
+ "the same size as tensor input Y's dimensionality.");
+ }
+ if(m_Parameters.m_AdjointY.size() != 0 && m_Parameters.m_AdjointY.size() != inputTensorXInfo.GetNumDimensions())
+ {
+ throw InvalidArgumentException(descriptorName +
+ ": Invalid descriptor parameter - Adjoint Y vector must be "
+ "the same size as tensor input Y's dimensionality.");
+ }
+ // Note: for adjoint/transpose, you'll need to do the validation atop the resultant permutation.
+}
+
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 3660e6e721..70006e4f79 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -133,6 +133,22 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
reason);
break;
}
+ case LayerType::BatchMatMul:
+ {
+ auto cLayer = PolymorphicDowncast<const BatchMatMulLayer*>(&layer);
+ const BatchMatMulDescriptor& descriptor = cLayer->GetParameters();
+
+ const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+ result = layerSupportObject.IsBatchMatMulSupported(
+ OverrideDataType(input0, dataType),
+ OverrideDataType(input1, dataType),
+ OverrideDataType(output, dataType),
+ descriptor,
+ reason);
+ break;
+ }
case LayerType::BatchNormalization:
{
auto cLayer = PolymorphicDowncast<const BatchNormalizationLayer*>(&layer);
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 86de7e331b..007cca57fa 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -46,6 +46,7 @@ COMMON_TEST_SOURCES := \
test/layerTests/ActivationTestImpl.cpp \
test/layerTests/AdditionTestImpl.cpp \
test/layerTests/ArgMinMaxTestImpl.cpp \
+ test/layerTests/BatchMatMulTestImpl.cpp \
test/layerTests/BatchNormalizationTestImpl.cpp \
test/layerTests/CastTestImpl.cpp \
test/layerTests/ChannelShuffleTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 8beb7c4169..c5b97ebf4c 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -68,6 +68,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources
layerTests/AdditionTestImpl.hpp
layerTests/ArgMinMaxTestImpl.cpp
layerTests/ArgMinMaxTestImpl.hpp
+ layerTests/BatchMatMulTestImpl.cpp
+ layerTests/BatchMatMulTestImpl.hpp
layerTests/BatchNormalizationTestImpl.cpp
layerTests/BatchNormalizationTestImpl.hpp
layerTests/BatchToSpaceNdTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index ba8cfd5f68..5fdcd9c57a 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -614,6 +614,8 @@ DECLARE_LAYER_POLICY_1_PARAM(Addition)
DECLARE_LAYER_POLICY_2_PARAM(ArgMinMax)
+DECLARE_LAYER_POLICY_2_PARAM(BatchMatMul)
+
DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization)
DECLARE_LAYER_POLICY_2_PARAM(BatchToSpaceNd)
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 8d73027783..25435b24ec 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -9,6 +9,7 @@
#include <backendsCommon/test/layerTests/ActivationTestImpl.hpp>
#include <backendsCommon/test/layerTests/AdditionTestImpl.hpp>
#include <backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp>
+#include <backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp>
#include <backendsCommon/test/layerTests/BatchNormalizationTestImpl.hpp>
#include <backendsCommon/test/layerTests/BatchToSpaceNdTestImpl.hpp>
#include <backendsCommon/test/layerTests/CastTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp
new file mode 100644
index 0000000000..41add6e6da
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp
@@ -0,0 +1,1010 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchMatMulTestImpl.hpp"
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/Workload.hpp>
+#include <armnn/backends/WorkloadData.hpp>
+#include <armnn/backends/WorkloadFactory.hpp>
+
+#include <armnnTestUtils/WorkloadTestUtils.hpp>
+#include <armnnUtils/QuantizeHelper.hpp>
+#include <armnnTestUtils/TensorCopyUtils.hpp>
+#include <armnn/Optional.hpp>
+
+
+template<armnn::DataType ArmnnType, typename T, std::size_t NumDims>
+LayerTestResult<T, NumDims> BatchMatMulTestImpl(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory,
+ armnn::BatchMatMulDescriptor descriptor,
+ const std::vector<T>& inputX,
+ const std::vector<T>& inputY,
+ const std::vector<T>& outputExpected,
+ const armnn::TensorInfo& inputXInfo,
+ const armnn::TensorInfo& inputYInfo,
+ const armnn::TensorInfo& outputInfo)
+{
+ std::vector<T> outputActual(outputInfo.GetNumElements());
+
+ std::unique_ptr<armnn::ITensorHandle> inputXHandle = tensorHandleFactory.CreateTensorHandle(inputXInfo);
+ std::unique_ptr<armnn::ITensorHandle> inputYHandle = tensorHandleFactory.CreateTensorHandle(inputYInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
+
+ armnn::BatchMatMulQueueDescriptor queueDescriptor;
+ queueDescriptor.m_Parameters = descriptor;
+ armnn::WorkloadInfo workloadInfo;
+
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputXInfo, inputXHandle.get());
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputYInfo, inputYHandle.get());
+ AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+ auto workload = workloadFactory.CreateWorkload(armnn::LayerType::BatchMatMul, queueDescriptor, workloadInfo);
+
+ inputXHandle->Allocate();
+ inputYHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputXHandle.get(), inputX.data());
+ CopyDataToITensorHandle(inputYHandle.get(), inputY.data());
+
+ workload->PostAllocationConfigure();
+ ExecuteWorkload(*workload, memoryManager);
+
+ CopyDataFromITensorHandle(outputActual.data(), outputHandle.get());
+
+ return LayerTestResult<T, NumDims>(outputActual,
+ outputExpected,
+ outputHandle->GetShape(),
+ outputInfo.GetShape());
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> BatchMatMul2DSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 6,
+ 7, 8
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 19, 22,
+ 43, 50
+ }, qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 2>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 2>
+BatchMatMul2DSimpleTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> BatchMatMul3DSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({1,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({1,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 6,
+ 7, 8
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 19, 22,
+ 43, 50
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 3>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3>
+BatchMatMul3DSimpleTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> BatchMatMulNCHWSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NCHW),
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NCHW));
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({1,1,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,1,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({1,1,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 6,
+ 7, 8
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 19, 22,
+ 43, 50
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 4>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
+BatchMatMulNCHWSimpleTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> BatchMatMulNHWCSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NHWC),
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NHWC));
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({1,2,2,1}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,2,2,1}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({1,2,2,1}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 6,
+ 7, 8
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 19, 22,
+ 43, 50
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 4>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
+BatchMatMulNHWCSimpleTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> BatchMatMul3DBatchTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 6,
+ 7, 8,
+
+ 13, 14,
+ 15, 16
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 19, 22,
+ 43, 50,
+
+ 267, 286,
+ 323, 346
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 3>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3>
+BatchMatMul3DBatchTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> BatchMatMul3DBroadcastTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 13, 14,
+ 15, 16
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 43, 46,
+ 99, 106,
+
+ 267, 286,
+ 323, 346
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 3>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3>
+BatchMatMul3DBroadcastTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> BatchMatMul3D2DBroadcastTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 2,
+ 3, 4,
+
+ 9, 10,
+ 11, 12
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 13, 14,
+ 15, 16
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 43, 46,
+ 99, 106,
+
+ 267, 286,
+ 323, 346
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 3>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3>
+BatchMatMul3D2DBroadcastTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 5> BatchMatMulNDHWCNHWCTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NDHWC),
+ armnn::Optional<armnn::DataLayout>(armnn::DataLayout::NHWC));
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({1,1,2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,2,2,2}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({1,1,2,2,2}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 1, 20,
+ 3, 22,
+
+ 2, 21,
+ 4, 23
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5, 24,
+ 7, 26,
+
+ 6, 25,
+ 8, 27
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 23, 1030,
+ 31, 1114,
+
+ 34, 1079,
+ 46, 1167
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 5>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 5>
+BatchMatMulNDHWCNHWCTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> BatchMatMul2DTinyTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({1,1}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({1,1}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({1,1}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 3
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 5
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 15
+ }, qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 2>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 2>
+BatchMatMul2DTinyTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 3> BatchMatMul3DNonSquareTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing
+
+ float qScale = 0.0f;
+ int32_t qOffset = 0;
+
+ switch(ArmnnType)
+ {
+ case armnn::DataType::QAsymmS8:
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
+ qScale = 1.0f;
+ break;
+ default:
+ break;
+ }
+
+ armnn::TensorInfo inputXInfo({2,5,3}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo inputYInfo({2,3,4}, ArmnnType, qScale, qOffset);
+ armnn::TensorInfo outputInfo({2,5,4}, ArmnnType, qScale, qOffset);
+
+ std::vector<T> inputX = armnnUtils::QuantizedVector<T>({
+ 8, 8, 4,
+ 6, 1, 3,
+ 8, 8, 3,
+ 8, 9, 8,
+ 5, 4, 4,
+
+ 1, 8, 5,
+ 7, 1, 1,
+ 8, 7, 9,
+ 3, 2, 7,
+ 8, 5, 3
+ }, qScale, qOffset);
+
+ std::vector<T> inputY = armnnUtils::QuantizedVector<T>({
+ 6, 2, 3, 2,
+ 6, 2, 2, 8,
+ 3, 7, 8, 1,
+
+ 7, 2, 9, 5,
+ 2, 3, 1, 3,
+ 2, 7, 7, 5
+ }, qScale, qOffset);
+
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
+ 108, 60, 72, 84,
+ 51, 35, 44, 23,
+ 105, 53, 64, 83,
+ 126, 90, 106, 96,
+ 66, 46, 55, 46,
+
+ 33, 61, 52, 54,
+ 53, 24, 71, 43,
+ 88, 100, 142, 106,
+ 39, 61, 78, 56,
+ 72, 52, 98, 70
+ },qScale, qOffset);
+
+ return BatchMatMulTestImpl<ArmnnType, T, 3>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ descriptor,
+ inputX,
+ inputY,
+ outputExpected,
+ inputXInfo,
+ inputYInfo,
+ outputInfo);
+}
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::BFloat16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::Float16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::QAsymmS8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::QAsymmU8>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 3>
+BatchMatMul3DNonSquareTest<armnn::DataType::QSymmS16>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory); \ No newline at end of file
diff --git a/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp
new file mode 100644
index 0000000000..9e2139667b
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp
@@ -0,0 +1,85 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnnTestUtils/LayerTestResult.hpp>
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>, std::size_t NumDims>
+LayerTestResult<T, NumDims> BatchMatMulTestImpl(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory,
+ armnn::BatchMatMulDescriptor descriptor,
+ const std::vector<T>& inputX,
+ const std::vector<T>& inputY,
+ const std::vector<T>& outputExpected,
+ const armnn::TensorInfo& inputXInfo,
+ const armnn::TensorInfo& inputYInfo,
+ const armnn::TensorInfo& outputInfo);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> BatchMatMul2DSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> BatchMatMul3DSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchMatMulNCHWSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> BatchMatMulNHWCSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> BatchMatMul3DBatchTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> BatchMatMul3DBroadcastTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> BatchMatMul3D2DBroadcastTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> BatchMatMulNDHWCNHWCTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 2> BatchMatMul2DTinyTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 3> BatchMatMul3DNonSquareTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory); \ No newline at end of file