From 0e3fe10bfe1b4f006f6e0c5c2fae8fb5515c7544 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Mon, 23 Jan 2023 19:32:06 +0000 Subject: IVGCVSW-7455 Workaround to allow CLBatchMatMul to parse some 4D models * Added ability to reduce dimension sizes when calling BuildArmComputeTensorInfo or BuildArmComputeTensorShapes, this will attempt to remove leading 1s in order to squeeze the number of dimensions but retain the size. * Changed ClBatchMatMulWorkload to attempt to squeeze the number of dimensions to 3 as the CL Gemm Kernel can only support up to 3 dimensions. Signed-off-by: Mike Kelly Change-Id: I6b3d0886c5b97fdb686838fc3dc292833ddc4643 --- delegate/src/test/BatchMatMulTest.cpp | 3 +- include/armnnUtils/TensorUtils.hpp | 7 ++- src/armnnUtils/TensorUtils.cpp | 36 ++++++++++- src/armnnUtils/test/TensorUtilsTest.cpp | 58 +++++++++++++++++- src/backends/aclCommon/ArmComputeTensorUtils.cpp | 69 +++++++++++++++++++++- src/backends/aclCommon/ArmComputeTensorUtils.hpp | 24 +++++++- src/backends/cl/test/ClLayerTests.cpp | 5 +- .../cl/workloads/ClBatchMatMulWorkload.cpp | 47 ++++++++++----- 8 files changed, 227 insertions(+), 22 deletions(-) diff --git a/delegate/src/test/BatchMatMulTest.cpp b/delegate/src/test/BatchMatMulTest.cpp index d13d8dcf43..06ad2c3be2 100644 --- a/delegate/src/test/BatchMatMulTest.cpp +++ b/delegate/src/test/BatchMatMulTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -677,6 +677,7 @@ namespace armnnDelegate std::vector backends = {armnn::Compute::GpuAcc}; BatchMatMul2DFp32SimpleTest (backends); BatchMatMul3DFp32SimpleTest (backends); + BatchMatMul4DFp32SimpleTest (backends); BatchMatMul3DFp32BatchTest (backends); BatchMatMul3DFp32BroadcastTest (backends); BatchMatMul3D2DFp32BroadcastTest (backends); diff --git a/include/armnnUtils/TensorUtils.hpp b/include/armnnUtils/TensorUtils.hpp index 2d6ec2fea4..a2aa9b0a98 100644 --- a/include/armnnUtils/TensorUtils.hpp +++ b/include/armnnUtils/TensorUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2019,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,10 @@ armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, std::pair FindMinMax(armnn::ITensorHandle* tensorHandle); +armnn::TensorShape ReduceDims(const armnn::TensorShape& tensorInfo, unsigned int dimensions); + +armnn::TensorInfo ReduceDims(const armnn::TensorInfo& tensorInfo, unsigned int dimensions); + armnn::TensorShape ExpandDims(const armnn::TensorShape& tensorShape, int axis); std::vector SqueezeDims(const armnn::TensorShape& tensorShape); diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp index 9e3d719211..03109e0cee 100644 --- a/src/armnnUtils/TensorUtils.cpp +++ b/src/armnnUtils/TensorUtils.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -103,6 +103,40 @@ std::pair FindMinMax(ITensorHandle* tensorHandle) return std::make_pair(min, max); } +TensorShape ReduceDims(const TensorShape& tensorShape, unsigned int dimensions) +{ + if (tensorShape.GetNumDimensions() <= dimensions) + { + return tensorShape; + } + std::vector newShape; + + unsigned int dimsToSkip = tensorShape.GetNumDimensions() - dimensions; + unsigned int dimsSkipped = 0; + bool insertRemainder = false; + + for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i) + { + if (tensorShape[i] == 1 && dimsSkipped < dimsToSkip && !insertRemainder) + { + ++dimsSkipped; + continue; + } + newShape.push_back(tensorShape[i]); + // Once we insert the first dimension we can't skip any more + insertRemainder = true; + } + return TensorShape(static_cast(newShape.size()), newShape.data()); +} + +TensorInfo ReduceDims(const TensorInfo& tensorInfo, unsigned int dimensions) +{ + TensorInfo strippedTensor(tensorInfo); + TensorShape strippedShape = ReduceDims(tensorInfo.GetShape(), dimensions); + strippedTensor.SetShape(strippedShape); + return strippedTensor; +} + TensorShape ExpandDims(const TensorShape& tensorShape, int axis) { unsigned int outputDim = tensorShape.GetNumDimensions() + 1; diff --git a/src/armnnUtils/test/TensorUtilsTest.cpp b/src/armnnUtils/test/TensorUtilsTest.cpp index 16349c554e..a69a0098ce 100644 --- a/src/armnnUtils/test/TensorUtilsTest.cpp +++ b/src/armnnUtils/test/TensorUtilsTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2019,2021-2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2019,2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -126,6 +126,62 @@ TEST_CASE("ExpandDimsInvalidAxisTest") CHECK_THROWS_AS(ExpandDims(inputShape, 4), armnn::InvalidArgumentException); } +TEST_CASE("ReduceDimsShapeAll1s") +{ + armnn::TensorShape inputShape({ 1, 1, 1 }); + + // Invalid expand dimension 4 + armnn::TensorShape outputShape = ReduceDims(inputShape, 2); + CHECK(outputShape.GetNumDimensions() == 2); + CHECK(outputShape[0] == 1); + CHECK(outputShape[1] == 1); +} + +TEST_CASE("ReduceDimsShapeNotEnough1s") +{ + armnn::TensorShape inputShape({ 1, 2, 1 }); + + // Invalid expand dimension 4 + armnn::TensorShape outputShape = ReduceDims(inputShape, 1); + CHECK(outputShape.GetNumDimensions() == 2); + CHECK(outputShape[0] == 2); + CHECK(outputShape[1] == 1); +} + +TEST_CASE("ReduceDimsInfoAll1s") +{ + armnn::TensorInfo inputInfo({ 1, 1, 1 }, DataType::Float32); + + // Invalid expand dimension 4 + armnn::TensorInfo outputInfo = ReduceDims(inputInfo, 2); + CHECK(outputInfo.GetShape().GetNumDimensions() == 2); + CHECK(outputInfo.GetShape()[0] == 1); + CHECK(outputInfo.GetShape()[1] == 1); +} + +TEST_CASE("ReduceDimsInfoNotEnough1s") +{ + armnn::TensorInfo inputInfo({ 1, 2, 1 }, DataType::Float32); + + // Invalid expand dimension 4 + armnn::TensorInfo outputInfo = ReduceDims(inputInfo, 1); + CHECK(outputInfo.GetNumDimensions() == 2); + CHECK(outputInfo.GetShape()[0] == 2); + CHECK(outputInfo.GetShape()[1] == 1); +} + +TEST_CASE("ReduceDimsShapeDimensionGreaterThanSize") +{ + armnn::TensorShape inputShape({ 1, 1, 1 }); + + // Invalid expand dimension 4 + armnn::TensorShape outputShape = ReduceDims(inputShape, 4); + CHECK(outputShape.GetNumDimensions() == 3); + CHECK(outputShape[0] == 1); + CHECK(outputShape[1] == 1); + CHECK(outputShape[2] == 1); +} + TEST_CASE("ExpandDimsInvalidNegativeAxisTest") { armnn::TensorShape inputShape({ 2, 3, 4 }); diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp index 38c7f70da5..e6c5a9b41c 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include @@ -146,6 +146,51 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te return shape; } +std::vector ReduceDimsForACL(const armnn::TensorShape tensorShape, unsigned int dimensions) +{ + std::vector newShape; + + unsigned int dimsToSkip = 0; + + if (tensorShape.GetNumDimensions() > dimensions) + { + dimsToSkip = tensorShape.GetNumDimensions() - dimensions; + } + unsigned int dimsSkipped = 0; + bool insertRemainder = false; + + for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i) + { + if (tensorShape[i] == 1 && dimsSkipped < dimsToSkip && !insertRemainder) + { + ++dimsSkipped; + continue; + } + newShape.insert(newShape.begin(), tensorShape[i]); + // Once we insert the first dimension we can't skip any more + insertRemainder = true; + } + return newShape; +} + +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions) +{ + arm_compute::TensorShape shape; + std::vector strippedShape = ReduceDimsForACL(tensorShape, dimensions); + + for (unsigned int i = 0; i < strippedShape.size(); i++) + { + shape.set(i, strippedShape[i], false); + } + + // prevent arm_compute issue where tensor is flattened to nothing + if (shape.num_dimensions() == 0) + { + shape.set_num_dimensions(1); + } + return shape; +} + // Utility function used to build a TensorInfo object, that can be used to initialise // ARM Compute Tensor and CLTensor allocators. // Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors @@ -174,6 +219,28 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso return aclTensorInfo; } +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions) +{ + bool multiScales = tensorInfo.HasMultipleQuantizationScales(); + const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape(), dimensions); + const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales); + + const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ? + arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) : + arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset()); + + return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo); +} +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, + armnn::DataLayout dataLayout, unsigned int dimensions) +{ + arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo, dimensions); + aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout)); + + return aclTensorInfo; +} + + arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout) { switch(dataLayout) diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp index 6ddecf2aaa..1f07fa949c 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -36,16 +36,38 @@ arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensi /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape); +/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. This will +/// attempt to reduce the number of leading 1s until the dimension length is equal to the dimensions passed in. +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions); + /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given /// armnn::ITensorInfo. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); +/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given +/// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal +/// to the dimensions passed in. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions); + +/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given +/// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal +/// to the dimensions passed in. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, + armnn::DataLayout dataLayout, + unsigned int dimensions); + /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given /// armnn::ITensorInfo. /// armnn::DataLayout. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, armnn::DataLayout dataLayout); +/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given +/// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal +/// to the dimensions passed in. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, + armnn::DataLayout dataLayout, unsigned int dimensions); + /// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout /// armnn::DataLayout. arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout); diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 4ba2a9ec3b..10e2a54c9f 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -80,6 +80,9 @@ ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DSimpleFloat32, ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DSimpleFloat32, ClContextControlFixture, BatchMatMul3DSimpleTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMulNCHWSimpleFloat32, + ClContextControlFixture, + BatchMatMulNCHWSimpleTest); ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBatchFloat32, ClContextControlFixture, BatchMatMul3DBatchTest); diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp index ece87c2672..f21666b90a 100644 --- a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp @@ -13,6 +13,7 @@ #include #include +#include #include @@ -24,6 +25,7 @@ namespace armnn { + arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, const TensorInfo& inputY, const TensorInfo& output, @@ -42,36 +44,41 @@ arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, arm_compute::Status statusPermuteX = arm_compute::Status(arm_compute::ErrorCode::OK); arm_compute::Status statusPermuteY = arm_compute::Status(arm_compute::ErrorCode::OK); - const auto aclInputXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputX, descriptor.m_DataLayoutX); - const auto aclInputYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputY, descriptor.m_DataLayoutY); - const auto aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + // ClGemmMatrixMultiplyNativeKernel used by CLGEMM can only support 3 dimensional + // tensors so try to reduce the dimensions to 3 + const auto aclInputXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputX, descriptor.m_DataLayoutX, 3); + const auto aclInputYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputY, descriptor.m_DataLayoutY, 3); + const auto aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayoutY, 3); arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); if (descriptor.m_TransposeX == true) { - auto permutationXVector = GeneratePermutationVectorOnLastTwoDimensions(inputX.GetNumDimensions()); + armnn::TensorInfo inputXStripped = armnnUtils::ReduceDims(inputX, 3); + + auto permutationXVector = GeneratePermutationVectorOnLastTwoDimensions(inputXStripped.GetNumDimensions()); const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); - const TensorInfo permutedXInfo = armnnUtils::Permuted(inputX, permutationXVector); - aclPermutedXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedXInfo); + const TensorInfo permutedXInfo = armnnUtils::Permuted(inputXStripped, permutationXVector); + aclPermutedXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedXInfo, 3); statusPermuteX = arm_compute::CLPermute::validate(&aclInputXInfo, &aclPermutedXInfo, aclPermutationXVector); } - if ( descriptor.m_TransposeY == true) + if (descriptor.m_TransposeY == true) { - auto permutationYVector = GeneratePermutationVectorOnLastTwoDimensions(inputY.GetNumDimensions()); + armnn::TensorInfo inputYStripped = armnnUtils::ReduceDims(inputY, 3); + + auto permutationYVector = GeneratePermutationVectorOnLastTwoDimensions(inputYStripped.GetNumDimensions()); const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); - const TensorInfo permutedYInfo = armnnUtils::Permuted(inputY, permutationYVector); - aclPermutedYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedYInfo); + const TensorInfo permutedYInfo = armnnUtils::Permuted(inputYStripped, permutationYVector); + aclPermutedYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedYInfo, 3); statusPermuteY = arm_compute::CLPermute::validate(&aclInputYInfo, &aclPermutedYInfo, aclPermutationYVector); - } const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped @@ -133,16 +140,24 @@ ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& d arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); inputX.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX)); + arm_compute::TensorShape inputXTensorInfo = armcomputetensorutils::BuildArmComputeTensorShape( + info.m_InputTensorInfos[0].GetShape(), 3); + inputX.info()->set_tensor_shape(inputXTensorInfo); inputY.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY)); + arm_compute::TensorShape inputYTensorInfo = armcomputetensorutils::BuildArmComputeTensorShape( + info.m_InputTensorInfos[1].GetShape(), 3); + inputY.info()->set_tensor_shape(inputYTensorInfo); arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); if (descriptor.m_Parameters.m_TransposeX == true) { + armnn::TensorInfo strippedInfo = armnnUtils::ReduceDims(info.m_InputTensorInfos[0], 3); + armnn::PermutationVector permutationXVector - = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions()); - const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector); + = GeneratePermutationVectorOnLastTwoDimensions(strippedInfo.GetNumDimensions()); + const TensorInfo permutedXInfo = armnnUtils::Permuted(strippedInfo, permutationXVector); const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo); armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorX); @@ -157,9 +172,11 @@ ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& d if (descriptor.m_Parameters.m_TransposeY == true) { + armnn::TensorInfo strippedInfo = armnnUtils::ReduceDims(info.m_InputTensorInfos[1], 3); + armnn::PermutationVector permutationYVector - = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[1].GetNumDimensions()); - const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[1], permutationYVector); + = GeneratePermutationVectorOnLastTwoDimensions(strippedInfo.GetNumDimensions()); + const TensorInfo permutedYInfo = armnnUtils::Permuted(strippedInfo, permutationYVector); const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo); armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorY); -- cgit v1.2.1