From 6b47809e7d6c55d20a05d863ce2f09159f381f85 Mon Sep 17 00:00:00 2001 From: Samuel Yap Date: Wed, 6 Jul 2022 15:36:03 +0100 Subject: IVGCVSW-7109: Add Batch MatMul front end support - Reference * Descriptors added for BatchMatMul * Layer definition added * Input validation added (will likely change when opt. param support comes in) * Ref workload implementation for BatchMatMul added (will also change with opt. param support) * Ref layer tests made for BatchMatMul * CMake and other build files updated Signed-off-by: Samuel Yap Change-Id: Ic885301da543ee0fbe7922b85e7f9658c4efc617 --- src/backends/backendsCommon/LayerSupportRules.hpp | 8 + src/backends/backendsCommon/WorkloadData.cpp | 227 +++++ src/backends/backendsCommon/WorkloadFactory.cpp | 16 + src/backends/backendsCommon/common.mk | 1 + src/backends/backendsCommon/test/CMakeLists.txt | 2 + .../test/IsLayerSupportedTestImpl.hpp | 2 + src/backends/backendsCommon/test/LayerTests.hpp | 1 + .../test/layerTests/BatchMatMulTestImpl.cpp | 1010 ++++++++++++++++++++ .../test/layerTests/BatchMatMulTestImpl.hpp | 85 ++ 9 files changed, 1352 insertions(+) create mode 100644 src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp create mode 100644 src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp (limited to 'src/backends/backendsCommon') diff --git a/src/backends/backendsCommon/LayerSupportRules.hpp b/src/backends/backendsCommon/LayerSupportRules.hpp index e616ecf022..a83fd62867 100644 --- a/src/backends/backendsCommon/LayerSupportRules.hpp +++ b/src/backends/backendsCommon/LayerSupportRules.hpp @@ -186,4 +186,12 @@ struct TensorNumDimensionsAreCorrect : public Rule } }; +struct TensorNumDimensionsAreGreaterOrEqualTo : public Rule +{ + TensorNumDimensionsAreGreaterOrEqualTo(const TensorInfo& info, unsigned int numDimensionsToCompare) + { + m_Res = info.GetNumDimensions() >= numDimensionsToCompare; + } +}; + } //namespace armnn \ No newline at end of file diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 606821b5e5..9a4c60f551 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -4143,5 +4143,232 @@ void UnidirectionalSequenceLstmQueueDescriptor::Validate(const WorkloadInfo& wor } } +void BatchMatMulQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + const std::string descriptorName{"BatchMatMulDescriptor"}; + + ValidateNumInputs(workloadInfo, descriptorName, 2); + ValidateNumOutputs(workloadInfo, descriptorName, 1); + + // Inputs must be: both 2D+ + // For inputs X and Y whose dimensions to be multiplied are (M,N) and (I,J) respectively, + // axes N and I must be the same size + + const auto& inputTensorXInfo = workloadInfo.m_InputTensorInfos[0]; + const auto& inputTensorYInfo = workloadInfo.m_InputTensorInfos[1]; + const auto& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; + + std::vector supportedTypes = + { + DataType::BFloat16, + DataType::Float16, + DataType::Float32, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS16 + }; + + ValidateDataTypes(inputTensorXInfo, supportedTypes, descriptorName); + ValidateDataTypes(inputTensorYInfo, supportedTypes, descriptorName); + ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName); + + if ((inputTensorXInfo.GetNumDimensions() < 2) || + (inputTensorYInfo.GetNumDimensions() < 2)) + { + throw InvalidArgumentException(descriptorName + ": Input tensors are not 2D or greater."); + } + + if(m_Parameters.m_DataLayoutX.has_value()) + { + switch(m_Parameters.m_DataLayoutX.value()) + { + case DataLayout::NCHW: + case DataLayout::NHWC: + if(inputTensorXInfo.GetNumDimensions() != 4) + { + throw InvalidArgumentException(descriptorName + + ": Input tensor X does not have the correct " + "number of dimensions for the Data Layout that it has been assigned."); + } + break; + case DataLayout::NCDHW: + case DataLayout::NDHWC: + if(inputTensorXInfo.GetNumDimensions() != 5) + { + throw InvalidArgumentException(descriptorName + + ": Input tensor X does not have the correct " + "number of dimensions for the Data Layout that it has been assigned."); + } + break; + default: + break; + } + } + + if(m_Parameters.m_DataLayoutY.has_value()) + { + switch(m_Parameters.m_DataLayoutY.value()) + { + case DataLayout::NCHW: + case DataLayout::NHWC: + if(inputTensorYInfo.GetNumDimensions() != 4) + { + throw InvalidArgumentException(descriptorName + + ": Input tensor Y does not have the correct " + "number of dimensions for the Data Layout that it has been assigned."); + } + break; + case DataLayout::NCDHW: + case DataLayout::NDHWC: + if(inputTensorYInfo.GetNumDimensions() != 5) + { + throw InvalidArgumentException(descriptorName + + ": Input tensor Y does not have the correct " + "number of dimensions for the Data Layout that it has been assigned."); + } + break; + default: + break; + } + } + + auto axesToMul = BatchMatMulDescriptor::GetAxesToMul(m_Parameters, + inputTensorXInfo.GetShape(), + inputTensorYInfo.GetShape()); + + if(inputTensorXInfo.GetShape()[axesToMul.first.second] + != inputTensorYInfo.GetShape()[axesToMul.second.first]) + { + throw InvalidArgumentException(descriptorName + + ": The final axis of input tensor X must be the same size as " + "the second last axis of input tensor Y."); + } + + auto axesNotMul = BatchMatMulDescriptor::GetAxesNotMul(m_Parameters, + inputTensorXInfo.GetShape(), + inputTensorYInfo.GetShape()); + + { // Separate scope so we don't pollute the rest of the scope with our temp variables + // e.g. NHWC isnt compatible with NCHW as of now + DataLayout xLayout; + DataLayout yLayout; + + if(m_Parameters.m_DataLayoutX == EmptyOptional()) + { + xLayout = DataLayout::NCHW; // Not equivalent - I'm just concerned with the last 2 axes + } + else + { + xLayout = m_Parameters.m_DataLayoutX.value(); + } + + if(m_Parameters.m_DataLayoutY == EmptyOptional()) + { + yLayout = DataLayout::NCHW; + } + else + { + yLayout = m_Parameters.m_DataLayoutY.value(); + } + + if(xLayout == DataLayout::NCHW || xLayout == DataLayout::NCDHW) + { + if(yLayout == DataLayout::NHWC || yLayout == DataLayout::NDHWC) + { + throw InvalidArgumentException(descriptorName + + ": Invalid input tensor data layout combination."); + } + } + if(yLayout == DataLayout::NCHW || yLayout == DataLayout::NCDHW) + { + if(xLayout == DataLayout::NHWC || xLayout == DataLayout::NDHWC) + { + throw InvalidArgumentException(descriptorName + + ": Invalid input tensor data layout combination."); + } + } + } + + // Simulate aligning the ends of the matrix dims and prepending 1's to the beginning of the shorter one + unsigned int outputTensorDimSize = std::max(inputTensorXInfo.GetNumDimensions(), + inputTensorYInfo.GetNumDimensions()); + if(outputTensorDimSize-2 > 0) + { + TensorInfo tiXNotMul = TensorInfo(TensorShape(outputTensorDimSize-2), + DataType::Float32); + TensorInfo tiYNotMul = TensorInfo(TensorShape(outputTensorDimSize-2), + DataType::Float32); + TensorInfo tiOutNotMul = TensorInfo(TensorShape(outputTensorDimSize-2), + DataType::Float32); + + auto doAxisExtension = [&](std::vector axisIndices, TensorInfo& ti) + { + auto sizeDiff = (outputTensorDimSize-2) - axisIndices.size(); + + for(unsigned int i = 0; i < sizeDiff; i++) + { + axisIndices.insert(axisIndices.begin(), 1); + } + + for(unsigned int i = 0; i < ti.GetNumDimensions(); i++) + { + ti.GetShape()[i] = inputTensorXInfo.GetShape()[i]; + } + }; + + doAxisExtension(axesNotMul.first, tiXNotMul); + doAxisExtension(axesNotMul.second, tiYNotMul); + + for(unsigned int i = 0; i < tiOutNotMul.GetNumDimensions(); i++) + { + tiOutNotMul.GetShape()[i] = std::max(tiXNotMul.GetShape()[i], + tiYNotMul.GetShape()[i]); + } + + ValidateBroadcastTensorShapesMatch(tiXNotMul, + tiYNotMul, + tiOutNotMul, + descriptorName, + "input_X", + "input_Y"); + } + + // Also check descriptor parameter validity + // This will eventually be moved to the start of the function as explained below + if ((!m_Parameters.m_TransposeX.empty() && !m_Parameters.m_AdjointX.empty()) || + (!m_Parameters.m_TransposeY.empty() && !m_Parameters.m_AdjointY.empty())) + { + throw InvalidArgumentException(descriptorName + + ": Invalid descriptor parameters - Transpose and Adjoint " + "vectors cannot both be true for a given input tensor."); + } + + if(m_Parameters.m_TransposeX.size() != 0 && m_Parameters.m_TransposeX.size() != inputTensorXInfo.GetNumDimensions()) + { + throw InvalidArgumentException(descriptorName + + ": Invalid descriptor parameter - Transpose X vector must be " + "the same size as tensor input X's dimensionality."); + } + if(m_Parameters.m_AdjointX.size() != 0 && m_Parameters.m_AdjointX.size() != inputTensorXInfo.GetNumDimensions()) + { + throw InvalidArgumentException(descriptorName + + ": Invalid descriptor parameter - Adjoint X vector must be " + "the same size as tensor input X's dimensionality."); + } + if(m_Parameters.m_TransposeY.size() != 0 && m_Parameters.m_TransposeY.size() != inputTensorYInfo.GetNumDimensions()) + { + throw InvalidArgumentException(descriptorName + + ": Invalid descriptor parameter - Transpose Y vector must be " + "the same size as tensor input Y's dimensionality."); + } + if(m_Parameters.m_AdjointY.size() != 0 && m_Parameters.m_AdjointY.size() != inputTensorXInfo.GetNumDimensions()) + { + throw InvalidArgumentException(descriptorName + + ": Invalid descriptor parameter - Adjoint Y vector must be " + "the same size as tensor input Y's dimensionality."); + } + // Note: for adjoint/transpose, you'll need to do the validation atop the resultant permutation. +} + } // namespace armnn \ No newline at end of file diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 3660e6e721..70006e4f79 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -133,6 +133,22 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, reason); break; } + case LayerType::BatchMatMul: + { + auto cLayer = PolymorphicDowncast(&layer); + const BatchMatMulDescriptor& descriptor = cLayer->GetParameters(); + + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = layerSupportObject.IsBatchMatMulSupported( + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + descriptor, + reason); + break; + } case LayerType::BatchNormalization: { auto cLayer = PolymorphicDowncast(&layer); diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 86de7e331b..007cca57fa 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -46,6 +46,7 @@ COMMON_TEST_SOURCES := \ test/layerTests/ActivationTestImpl.cpp \ test/layerTests/AdditionTestImpl.cpp \ test/layerTests/ArgMinMaxTestImpl.cpp \ + test/layerTests/BatchMatMulTestImpl.cpp \ test/layerTests/BatchNormalizationTestImpl.cpp \ test/layerTests/CastTestImpl.cpp \ test/layerTests/ChannelShuffleTestImpl.cpp \ diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 8beb7c4169..c5b97ebf4c 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -68,6 +68,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources layerTests/AdditionTestImpl.hpp layerTests/ArgMinMaxTestImpl.cpp layerTests/ArgMinMaxTestImpl.hpp + layerTests/BatchMatMulTestImpl.cpp + layerTests/BatchMatMulTestImpl.hpp layerTests/BatchNormalizationTestImpl.cpp layerTests/BatchNormalizationTestImpl.hpp layerTests/BatchToSpaceNdTestImpl.hpp diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index ba8cfd5f68..5fdcd9c57a 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -614,6 +614,8 @@ DECLARE_LAYER_POLICY_1_PARAM(Addition) DECLARE_LAYER_POLICY_2_PARAM(ArgMinMax) +DECLARE_LAYER_POLICY_2_PARAM(BatchMatMul) + DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) DECLARE_LAYER_POLICY_2_PARAM(BatchToSpaceNd) diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 8d73027783..25435b24ec 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp new file mode 100644 index 0000000000..41add6e6da --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.cpp @@ -0,0 +1,1010 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "BatchMatMulTestImpl.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include + + +template +LayerTestResult BatchMatMulTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory, + armnn::BatchMatMulDescriptor descriptor, + const std::vector& inputX, + const std::vector& inputY, + const std::vector& outputExpected, + const armnn::TensorInfo& inputXInfo, + const armnn::TensorInfo& inputYInfo, + const armnn::TensorInfo& outputInfo) +{ + std::vector outputActual(outputInfo.GetNumElements()); + + std::unique_ptr inputXHandle = tensorHandleFactory.CreateTensorHandle(inputXInfo); + std::unique_ptr inputYHandle = tensorHandleFactory.CreateTensorHandle(inputYInfo); + std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); + + armnn::BatchMatMulQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + armnn::WorkloadInfo workloadInfo; + + AddInputToWorkload(queueDescriptor, workloadInfo, inputXInfo, inputXHandle.get()); + AddInputToWorkload(queueDescriptor, workloadInfo, inputYInfo, inputYHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); + + auto workload = workloadFactory.CreateWorkload(armnn::LayerType::BatchMatMul, queueDescriptor, workloadInfo); + + inputXHandle->Allocate(); + inputYHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputXHandle.get(), inputX.data()); + CopyDataToITensorHandle(inputYHandle.get(), inputY.data()); + + workload->PostAllocationConfigure(); + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(outputActual.data(), outputHandle.get()); + + return LayerTestResult(outputActual, + outputExpected, + outputHandle->GetShape(), + outputInfo.GetShape()); +} + +template +LayerTestResult BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 6, + 7, 8 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 19, 22, + 43, 50 + }, qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({1,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({1,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 6, + 7, 8 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 19, 22, + 43, 50 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor( + armnn::Optional(armnn::DataLayout::NCHW), + armnn::Optional(armnn::DataLayout::NCHW)); + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({1,1,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,1,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({1,1,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 6, + 7, 8 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 19, 22, + 43, 50 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor( + armnn::Optional(armnn::DataLayout::NHWC), + armnn::Optional(armnn::DataLayout::NHWC)); + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({1,2,2,1}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,2,2,1}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({1,2,2,1}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 6, + 7, 8 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 19, 22, + 43, 50 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 4> +BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4, + + 9, 10, + 11, 12 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 6, + 7, 8, + + 13, 14, + 15, 16 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 19, 22, + 43, 50, + + 267, 286, + 323, 346 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4, + + 9, 10, + 11, 12 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 13, 14, + 15, 16 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 43, 46, + 99, 106, + + 267, 286, + 323, 346 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({2,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 2, + 3, 4, + + 9, 10, + 11, 12 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 13, 14, + 15, 16 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 43, 46, + 99, 106, + + 267, 286, + 323, 346 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor( + armnn::Optional(armnn::DataLayout::NDHWC), + armnn::Optional(armnn::DataLayout::NHWC)); + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({1,1,2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,2,2,2}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({1,1,2,2,2}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 1, 20, + 3, 22, + + 2, 21, + 4, 23 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5, 24, + 7, 26, + + 6, 25, + 8, 27 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 23, 1030, + 31, 1114, + + 34, 1079, + 46, 1167 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 5> +BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({1,1}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({1,1}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({1,1}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 3 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 5 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 15 + }, qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 2> +BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template +LayerTestResult BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + auto descriptor = armnn::BatchMatMulDescriptor(); // Arbitrary layout with no transpose/adjointing + + float qScale = 0.0f; + int32_t qOffset = 0; + + switch(ArmnnType) + { + case armnn::DataType::QAsymmS8: + case armnn::DataType::QAsymmU8: + case armnn::DataType::QSymmS16: + qScale = 1.0f; + break; + default: + break; + } + + armnn::TensorInfo inputXInfo({2,5,3}, ArmnnType, qScale, qOffset); + armnn::TensorInfo inputYInfo({2,3,4}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputInfo({2,5,4}, ArmnnType, qScale, qOffset); + + std::vector inputX = armnnUtils::QuantizedVector({ + 8, 8, 4, + 6, 1, 3, + 8, 8, 3, + 8, 9, 8, + 5, 4, 4, + + 1, 8, 5, + 7, 1, 1, + 8, 7, 9, + 3, 2, 7, + 8, 5, 3 + }, qScale, qOffset); + + std::vector inputY = armnnUtils::QuantizedVector({ + 6, 2, 3, 2, + 6, 2, 2, 8, + 3, 7, 8, 1, + + 7, 2, 9, 5, + 2, 3, 1, 3, + 2, 7, 7, 5 + }, qScale, qOffset); + + std::vector outputExpected = armnnUtils::QuantizedVector({ + 108, 60, 72, 84, + 51, 35, 44, 23, + 105, 53, 64, 83, + 126, 90, 106, 96, + 66, 46, 55, 46, + + 33, 61, 52, 54, + 53, 24, 71, 43, + 88, 100, 142, 106, + 39, 61, 78, 56, + 72, 52, 98, 70 + },qScale, qOffset); + + return BatchMatMulTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + descriptor, + inputX, + inputY, + outputExpected, + inputXInfo, + inputYInfo, + outputInfo); +} + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult, 3> +BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); \ No newline at end of file diff --git a/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp new file mode 100644 index 0000000000..9e2139667b --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/BatchMatMulTestImpl.hpp @@ -0,0 +1,85 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +#include + +template, std::size_t NumDims> +LayerTestResult BatchMatMulTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory, + armnn::BatchMatMulDescriptor descriptor, + const std::vector& inputX, + const std::vector& inputY, + const std::vector& outputExpected, + const armnn::TensorInfo& inputXInfo, + const armnn::TensorInfo& inputYInfo, + const armnn::TensorInfo& outputInfo); + +template> +LayerTestResult BatchMatMul2DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul3DSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMulNCHWSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMulNHWCSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul3DBatchTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul3DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul3D2DBroadcastTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMulNDHWCNHWCTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul2DTinyTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult BatchMatMul3DNonSquareTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); \ No newline at end of file -- cgit v1.2.1