From 490b7becb8029ead26423b0d62e631a929e55d6c Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Tue, 3 Mar 2020 12:39:09 +0000 Subject: IVGCVSW-4375 Add support for Transpose to optimizations * Changed some existing Permutation specific optimizations to also support Transpose * Added MoveTransposeUp optimization * Added TransposeAsReshape optimization * Added tests for Transpose optimizations * Added missing layer tests for Transpose Signed-off-by: Mike Kelly Change-Id: I20d099b284861402ae94aaa5dbf34907327a485f --- Android.mk | 2 +- CMakeLists.txt | 4 +- src/armnn/Network.cpp | 7 +- src/armnn/optimizations/All.hpp | 16 +-- src/armnn/optimizations/MoveTransposeUp.hpp | 83 ++++++++++++++++ .../optimizations/OptimizeInversePermutes.hpp | 10 +- .../PermuteAndBatchToSpaceAsDepthToSpace.cpp | 87 ----------------- .../PermuteAndBatchToSpaceAsDepthToSpace.hpp | 88 ++++++++++++++++- src/armnn/optimizations/SquashEqualSiblings.hpp | 2 + src/armnn/optimizations/TransposeAsReshape.hpp | 81 ++++++++++++++++ .../test/TestNameAndDescriptorLayerVisitor.cpp | 7 ++ .../test/TestNameAndDescriptorLayerVisitor.hpp | 1 + .../test/optimizations/MoveTransposeUpTests.cpp | 93 ++++++++++++++++++ .../optimizations/OptimizeInversePermutesTests.cpp | 27 ++++++ .../PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp | 108 +++++++++++++++++++++ .../test/optimizations/TransposeAsReshapeTests.cpp | 60 ++++++++++++ 16 files changed, 572 insertions(+), 104 deletions(-) create mode 100644 src/armnn/optimizations/MoveTransposeUp.hpp delete mode 100644 src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp create mode 100644 src/armnn/optimizations/TransposeAsReshape.hpp create mode 100644 src/armnn/test/optimizations/MoveTransposeUpTests.cpp create mode 100644 src/armnn/test/optimizations/TransposeAsReshapeTests.cpp diff --git a/Android.mk b/Android.mk index e423f25d7c..e29f0f913e 100644 --- a/Android.mk +++ b/Android.mk @@ -95,7 +95,6 @@ LOCAL_SRC_FILES := \ src/armnn/NetworkUtils.cpp \ src/armnn/Observable.cpp \ src/armnn/Optimizer.cpp \ - src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp \ src/armnn/OutputHandler.cpp \ src/armnn/ProfilingEvent.cpp \ src/armnn/Profiling.cpp \ @@ -352,6 +351,7 @@ LOCAL_SRC_FILES := \ src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp \ src/armnn/test/optimizations/PermuteAsReshapeTests.cpp \ src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp \ + src/armnn/test/optimizations/TransposeAsReshapeTests.cpp \ src/armnn/test/OptimizerTests.cpp \ src/armnn/test/OptionalTest.cpp \ src/armnn/test/ProfilerTests.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 9396316446..f55f391622 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -441,12 +441,12 @@ list(APPEND armnn_sources src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp src/armnn/optimizations/FoldPadIntoConvolution2d.hpp src/armnn/optimizations/MovePermuteUp.hpp + src/armnn/optimizations/MoveTransposeUp.hpp src/armnn/optimizations/Optimization.hpp src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp src/armnn/optimizations/OptimizeInverseConversions.hpp src/armnn/optimizations/OptimizeInversePermutes.hpp src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp - src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp src/armnn/optimizations/PermuteAsReshape.hpp src/armnn/optimizations/SquashEqualSiblings.hpp src/profiling/BufferManager.cpp @@ -619,12 +619,14 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp + src/armnn/test/optimizations/MoveTransposeUpTests.cpp src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp src/armnn/test/optimizations/OptimizeInverseConversionsTests.cpp src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp src/armnn/test/optimizations/PermuteAsReshapeTests.cpp src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp + src/armnn/test/optimizations/TransposeAsReshapeTests.cpp src/armnn/test/OptionalTest.cpp src/armnn/test/ProfilerTests.cpp src/armnn/test/ProfilingEventTest.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 55bf51af00..50a7df6662 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -910,13 +910,18 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, // Perform optimisation passes using namespace optimizations; Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualTransposeSiblings(), SquashEqualReshapeSiblings(), OptimizeInversePermutes(), + OptimizeInverseTransposes(), MovePermuteUp(), + MoveTransposeUp(), PermuteAsReshape(), + TransposeAsReshape(), OptimizeConsecutiveReshapes(), FoldPadIntoConvolution2d(), - PermuteAndBatchToSpaceAsDepthToSpace())); + PermuteAndBatchToSpaceAsDepthToSpace(), + TransposeAndBatchToSpaceAsDepthToSpace())); // Infer the tensor infos for all output slots. Throws an exception on failure optGraph.InferTensorInfos(); diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 4ea3f7f2d4..273c337665 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -4,14 +4,16 @@ // #pragma once +#include "AddDebug.hpp" #include "ConvertConstants.hpp" +#include "ConvertFp32NetworkToFp16.hpp" +#include "FoldPadIntoConvolution2d.hpp" +#include "MovePermuteUp.hpp" +#include "MoveTransposeUp.hpp" +#include "OptimizeConsecutiveReshapes.hpp" +#include "OptimizeInverseConversions.hpp" #include "OptimizeInversePermutes.hpp" #include "PermuteAsReshape.hpp" -#include "OptimizeConsecutiveReshapes.hpp" +#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" #include "SquashEqualSiblings.hpp" -#include "MovePermuteUp.hpp" -#include "OptimizeInverseConversions.hpp" -#include "ConvertFp32NetworkToFp16.hpp" -#include "AddDebug.hpp" -#include "FoldPadIntoConvolution2d.hpp" -#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" \ No newline at end of file +#include "TransposeAsReshape.hpp" \ No newline at end of file diff --git a/src/armnn/optimizations/MoveTransposeUp.hpp b/src/armnn/optimizations/MoveTransposeUp.hpp new file mode 100644 index 0000000000..66543069c8 --- /dev/null +++ b/src/armnn/optimizations/MoveTransposeUp.hpp @@ -0,0 +1,83 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" + +#include + +namespace armnn +{ +namespace optimizations +{ +class MoveTransposeUpImpl +{ +public: + /// Run for every connection between a base Layer (any) and a child TransposeLayer. If the type + /// of the base layer allows it, it moves the permutation to the inputs of the base layer. + /// I.e., adds equivalent permutations before the inputs of the base layer and moves the + /// connections in the output of the child transpose layer to the output of the base layer. + void Run(Graph& graph, InputSlot& connection) const + { + OutputSlot& baseOutput = *connection.GetConnectedOutputSlot(); + + if (baseOutput.GetNumConnections() == 1U) + { + Layer& base = baseOutput.GetOwningLayer(); + + if (CanMoveTransposeToInputs(base)) + { + auto transpose = boost::polymorphic_downcast(&connection.GetOwningLayer()); + const PermutationVector& perm = transpose->GetPermutation(); + + // Inserts an equivalent transpose before every input of the base layer. + for (auto baseInput = base.BeginInputSlots(); baseInput != base.EndInputSlots(); ++baseInput) + { + // Inserts a new transpose layer. + const std::string name = std::string("moved_up-") + transpose->GetName(); + TransposeLayer& permLayer = *graph.InsertNewLayer(*baseInput, perm, name.c_str()); + + // Sets output tensor info for the new layer. + OutputSlot& parentOutput = *permLayer.GetInputSlot(0).GetConnectedOutputSlot(); + const TensorInfo permOutInfo = armnnUtils::TransposeTensorShape(parentOutput.GetTensorInfo(), perm); + permLayer.GetOutputHandler().SetTensorInfo(permOutInfo); + } + + // Sets transposed output tensor info + const TensorInfo& childOutInfo = transpose->GetOutputHandler().GetTensorInfo(); + base.GetOutputHandler().SetTensorInfo(childOutInfo); + + // Bypasses transpose. It will be removed as it's left unconnected. + transpose->GetOutputSlot().MoveAllConnections(base.GetOutputSlot()); + } + } + } + +protected: + MoveTransposeUpImpl() = default; + ~MoveTransposeUpImpl() = default; + +private: + static bool CanMoveTransposeToInputs(const Layer& base) + { + switch (base.GetType()) + { + case LayerType::Activation: + case LayerType::Addition: + case LayerType::FakeQuantization: + case LayerType::Floor: + case LayerType::MemCopy: + case LayerType::Multiplication: + return true; + default: + return false; + } + } +}; + +using MoveTransposeUp = OptimizeForConnection; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/OptimizeInversePermutes.hpp b/src/armnn/optimizations/OptimizeInversePermutes.hpp index 48bfa35440..77d62a50cb 100644 --- a/src/armnn/optimizations/OptimizeInversePermutes.hpp +++ b/src/armnn/optimizations/OptimizeInversePermutes.hpp @@ -13,6 +13,7 @@ namespace armnn namespace optimizations { +template class OptimizeInversePermutesImpl { public: @@ -22,9 +23,9 @@ public: { boost::ignore_unused(graph); Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); - auto child = boost::polymorphic_downcast(&connection.GetOwningLayer()); + auto child = boost::polymorphic_downcast(&connection.GetOwningLayer()); - if (child->IsInverse(*boost::polymorphic_downcast(&base))) + if (child->IsInverse(*boost::polymorphic_downcast(&base))) { // Bypass both layers. Child will be removed as it's left unconnected. // Base layer will be removed if left unconnected. @@ -37,7 +38,10 @@ protected: ~OptimizeInversePermutesImpl() = default; }; -using OptimizeInversePermutes = OptimizeForConnection; +using OptimizeInversePermutes = OptimizeForConnection>; +using OptimizeInverseTransposes = OptimizeForConnection>; } // namespace optimizations } // namespace armnn diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp deleted file mode 100644 index c42162b6c1..0000000000 --- a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// -// Copyright © 2019 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" - -using namespace armnn; -using namespace armnn::optimizations; - -void PermuteAndBatchToSpaceAsDepthToSpaceImpl::Run(Graph& graph, InputSlot& connection) const -{ - // Validate base layer (the Permute) is compatible - Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); - BOOST_ASSERT(base.GetType() == LayerType::Permute); - const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo(); - if (intermediateInfo.GetNumDimensions() != 4) - { - // Must be 4D, otherwise the below checks do not make sense - return; - } - if (!static_cast(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 })) - { - // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension - // that is being decomposed. - return; - } - - // Validate child layer (the BatchToSpace) is compatible - Layer& child = connection.GetOwningLayer(); - BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd); - const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo(); - const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast(child).GetParameters(); - if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC) - { - // The rest of this function assumes NHWC, although in future this restriction could be lifted. - return; - } - if (batchToSpaceDesc.m_Crops != std::vector>{ { 0, 0 }, { 0, 0 } }) - { - // Cropping is not supported in DepthToSpace - return; - } - if (batchToSpaceDesc.m_BlockShape.size() != 2 || - batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1]) - { - // Asymmetric or non-2D block sizes are not supported by DepthToSpace - return; - } - uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0]; - if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1) - { - // The final output must have 1 batch and 1 channel because these dimensions will be swapped around - // once we make the substitution, and it needs to be equivalent. - return; - } - - // Validate the intermediate tensor quantization params. - // These must be identical to either the input or output quantization params, otherwise the intermediate tensor - // may not have sufficient range/precision to preserve the values. - // This would mean that once we perform the substitution this loss of precision will no longer occur, - // so we would have changed the meaning of the network. - bool isIntermediateQuantParamsSameAsInput = - intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() && - intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset(); - bool isIntermediateQuantParamsSameAsOutput = - intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() && - intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset(); - if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput) - { - return; - } - - // Insert equivalent DepthToSpace layer - const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName(); - - // Inserts equivalent reshape before base layer. - const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC); - auto& depthToSpace = *graph.InsertNewLayer(base.GetInputSlot(0), depthToSpaceDesc, name.c_str()); - depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo); - - // Moves connections from child output to new layer. - // Child layer will be removed as it's left unconnected. - // Base layer will be removed if left unconnected. - child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot()); -} diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp index 4a73efca40..21aed869f5 100644 --- a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp +++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp @@ -14,14 +14,94 @@ namespace optimizations /// Replaces Permute leading into BatchToSpace with a DepthToSpace /// in the case where the Permute swaps the batch and channels dimensions /// such that the replacement is valid. +template class PermuteAndBatchToSpaceAsDepthToSpaceImpl { public: - void Run(Graph& graph, InputSlot& connection) const; -}; + void Run(Graph& graph, InputSlot& connection) const + { + // Validate base layer (the Permute) is compatible + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + BOOST_ASSERT(base.GetType() == LayerType::Permute || base.GetType() == LayerType::Transpose); + const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo(); + if (intermediateInfo.GetNumDimensions() != 4) + { + // Must be 4D, otherwise the below checks do not make sense + return; + } + if (!static_cast(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 })) + { + // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension + // that is being decomposed. + return; + } + + // Validate child layer (the BatchToSpace) is compatible + Layer& child = connection.GetOwningLayer(); + BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd); + const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo(); + const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast(child).GetParameters(); + if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC) + { + // The rest of this function assumes NHWC, although in future this restriction could be lifted. + return; + } + if (batchToSpaceDesc.m_Crops != std::vector>{ { 0, 0 }, { 0, 0 } }) + { + // Cropping is not supported in DepthToSpace + return; + } + if (batchToSpaceDesc.m_BlockShape.size() != 2 || + batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1]) + { + // Asymmetric or non-2D block sizes are not supported by DepthToSpace + return; + } + uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0]; + if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1) + { + // The final output must have 1 batch and 1 channel because these dimensions will be swapped around + // once we make the substitution, and it needs to be equivalent. + return; + } + + // Validate the intermediate tensor quantization params. + // These must be identical to either the input or output quantization params, otherwise the intermediate tensor + // may not have sufficient range/precision to preserve the values. + // This would mean that once we perform the substitution this loss of precision will no longer occur, + // so we would have changed the meaning of the network. + bool isIntermediateQuantParamsSameAsInput = + intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() && + intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset(); + bool isIntermediateQuantParamsSameAsOutput = + intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() && + intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset(); + if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput) + { + return; + } -using PermuteAndBatchToSpaceAsDepthToSpace = - OptimizeForConnection; + // Insert equivalent DepthToSpace layer + const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName(); + + // Inserts equivalent reshape before base layer. + const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC); + auto& depthToSpace = *graph.InsertNewLayer(base.GetInputSlot(0), + depthToSpaceDesc, + name.c_str()); + depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo); + + // Moves connections from child output to new layer. + // Child layer will be removed as it's left unconnected. + // Base layer will be removed if left unconnected. + child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot()); + } +}; +using PermuteAndBatchToSpaceAsDepthToSpace = OptimizeForConnection>; +using TransposeAndBatchToSpaceAsDepthToSpace = OptimizeForConnection>; } // namespace optimizations } // namespace armnn diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp index 12637ba9b6..d5a8a5d81e 100644 --- a/src/armnn/optimizations/SquashEqualSiblings.hpp +++ b/src/armnn/optimizations/SquashEqualSiblings.hpp @@ -64,6 +64,8 @@ protected: }; using SquashEqualPermuteSiblings = OptimizeForConnection>; +using SquashEqualTransposeSiblings = OptimizeForConnection>; using SquashEqualReshapeSiblings = OptimizeForConnection>; } // namespace optimizations diff --git a/src/armnn/optimizations/TransposeAsReshape.hpp b/src/armnn/optimizations/TransposeAsReshape.hpp new file mode 100644 index 0000000000..4bb2f192f3 --- /dev/null +++ b/src/armnn/optimizations/TransposeAsReshape.hpp @@ -0,0 +1,81 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class TransposeAsReshapeImpl +{ +public: + /// Run for every TransposeLayer. Replaces it with a ReshapeLayer if they are equivalent. + void Run(Graph& graph, TransposeLayer& transpose) const + { + if (IsReshape(transpose)) + { + const TensorInfo& outInfo = transpose.GetOutputHandler().GetTensorInfo(); + + const std::string name = std::string("as_reshape-") + transpose.GetName(); + const ReshapeDescriptor descriptor{outInfo.GetShape()}; + // Inserts NewLayer so layers don't need to be re-sorted. + auto reshape = graph.InsertNewLayer(transpose.GetInputSlot(0), descriptor, name.c_str()); + reshape->GetOutputHandler().SetTensorInfo(outInfo); + + // Bypass transpose. It will be deleted since it's left unconnected. + transpose.GetOutputSlot().MoveAllConnections(reshape->GetOutputSlot()); + } + } + +protected: + TransposeAsReshapeImpl() = default; + ~TransposeAsReshapeImpl() = default; + +private: + static bool IsReshape(const TransposeLayer& layer) + { + const TensorShape& outShape = layer.GetOutputHandler().GetTensorInfo().GetShape(); + const PermutationVector& permutation = layer.GetPermutation(); + + const unsigned int numDimensions = permutation.GetSize(); + std::map permuteMappings; + for (unsigned int i = 0; i < permutation.GetSize(); ++i) + { + permuteMappings[permutation[i]] = i; + } + + std::vector permuteVector; + for (unsigned int i = 0; i < permutation.GetSize(); ++i) + { + permuteVector.push_back(permuteMappings.at(i)); + } + + unsigned int lastGtOne = 0; + while ((lastGtOne < numDimensions) && (outShape[(permuteVector[lastGtOne])] == 1U)) + { + ++lastGtOne; + } + + bool isReshape = true; + for (unsigned int i = lastGtOne + 1U; isReshape && (i < numDimensions); ++i) + { + if (outShape[permuteVector[i]] > 1U) + { + isReshape = permuteVector[lastGtOne] < permuteVector[i]; + lastGtOne = i; + } + } + + return isReshape; + } +}; + +using TransposeAsReshape = OptimizeForType; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp b/src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp index efe50a5b58..431db2aa0d 100644 --- a/src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp +++ b/src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp @@ -247,6 +247,12 @@ armnn::StridedSliceDescriptor GetDescriptor() return descriptor; } +template<> +armnn::TransposeDescriptor GetDescriptor() +{ + return armnn::TransposeDescriptor({ 0, 1, 2, 3 }); +} + } // anonymous namespace BOOST_AUTO_TEST_SUITE(TestNameAndDescriptorLayerVisitor) @@ -275,5 +281,6 @@ TEST_SUITE_NAME_AND_DESCRIPTOR_LAYER_VISITOR(SpaceToDepth) TEST_SUITE_NAME_AND_DESCRIPTOR_LAYER_VISITOR(Splitter) TEST_SUITE_NAME_AND_DESCRIPTOR_LAYER_VISITOR(Stack) TEST_SUITE_NAME_AND_DESCRIPTOR_LAYER_VISITOR(StridedSlice) +TEST_SUITE_NAME_AND_DESCRIPTOR_LAYER_VISITOR(Transpose) BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp b/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp index f792bc3554..b9877a8111 100644 --- a/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp +++ b/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp @@ -67,3 +67,4 @@ DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Splitter) DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Stack) DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(StandIn) DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(StridedSlice) +DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Transpose) diff --git a/src/armnn/test/optimizations/MoveTransposeUpTests.cpp b/src/armnn/test/optimizations/MoveTransposeUpTests.cpp new file mode 100644 index 0000000000..e2fb3abffb --- /dev/null +++ b/src/armnn/test/optimizations/MoveTransposeUpTests.cpp @@ -0,0 +1,93 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../TestUtils.hpp" + +#include + +#include + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; + +BOOST_AUTO_TEST_CASE(MoveTransposeUpTest) +{ + const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float32); + const armnn::TensorInfo transposed({ 1, 3, 5, 2 }, armnn::DataType::Float32); + + armnn::Graph graph; + + armnn::LayerBindingId inputId = 0; + + armnn::Layer* head = graph.AddLayer(0, "output"); + + std::string transposeLayerName = "original_transpose"; + + // Insert transpose + head = graph.InsertNewLayer(head->GetInputSlot(0), + armnn::TransposeDescriptor({ 0, 3, 1, 2 }), + transposeLayerName.c_str()); + + head->GetOutputHandler().SetTensorInfo(transposed); + + // Inserts layers that don't care about data format. + head = graph.InsertNewLayer(head->GetInputSlot(0), armnn::ActivationDescriptor{}, ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + // Inserts input for 2nd input of Addition. + graph.InsertNewLayer(head->GetInputSlot(1), inputId++, "") + ->GetOutputHandler() + .SetTensorInfo(info); + + head = graph.InsertNewLayer(head->GetInputSlot(0), + armnn::FakeQuantizationDescriptor{}, ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + // Inserts input for 2nd input of Multiplication. + graph.InsertNewLayer(head->GetInputSlot(1), inputId++, "") + ->GetOutputHandler() + .SetTensorInfo(info); + + // Inserts input. + graph.InsertNewLayer(head->GetInputSlot(0), inputId++, "") + ->GetOutputHandler() + .SetTensorInfo(info); + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(MoveTransposeUp())); + + // The transpose is moved to the top. New transposes for layers with multiple inputs. + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType)); + + std::list testRelatedLayers = { transposeLayerName }; + + BOOST_TEST(CheckRelatedLayers(graph, testRelatedLayers)); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp b/src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp index dcf955956d..21f791c5ff 100644 --- a/src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp +++ b/src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp @@ -39,4 +39,31 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutesTest) &IsLayerOfType)); } +BOOST_AUTO_TEST_CASE(OptimizeInverseTransposesTest) +{ + armnn::Graph graph; + + auto output = graph.AddLayer(0, "output"); + + graph.InsertNewLayer(output->GetInputSlot(0), 0, "input"); + + // Inserts two permutes, one the inverse of the other. + graph.InsertNewLayer(output->GetInputSlot(0), + armnn::TransposeDescriptor({ 0, 3, 1, 2 }), + "transpose0312"); + graph.InsertNewLayer(output->GetInputSlot(0), + armnn::TransposeDescriptor({ 0, 2, 3, 1 }), + "transpose0231"); + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInverseTransposes())); + + // The permutes are removed. + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType)); +} + BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp index 74ee18b482..c2180a63ca 100644 --- a/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp +++ b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp @@ -49,6 +49,37 @@ INetworkPtr CreateTestNetwork() return network; } +/// Shared function for the below tests, so that we test the same network in both cases. +INetworkPtr CreateTransposeTestNetwork() +{ + // Create a network + INetworkPtr network = INetwork::Create(); + + auto input = network->AddInputLayer(0, "input"); + const TensorInfo inputInfo({ 1, 2, 3, 4 }, DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(inputInfo); + + // Insert Permute which swaps batches and channels dimensions + auto permute = network->AddTransposeLayer(TransposeDescriptor(PermutationVector{ 3, 1, 2, 0 }), "permute"); + const TensorInfo permuteInfo({ 4, 2, 3, 1 }, DataType::Float32); + permute->GetOutputSlot(0).SetTensorInfo(permuteInfo); + input->GetOutputSlot(0).Connect(permute->GetInputSlot(0)); + + // Insert BatchToSpace + BatchToSpaceNdDescriptor batchToSpaceDesc; + batchToSpaceDesc.m_BlockShape = { 2, 2 }; + batchToSpaceDesc.m_DataLayout = DataLayout::NHWC; + auto batchToSpace = network->AddBatchToSpaceNdLayer(batchToSpaceDesc, "batchToSpace"); + const TensorInfo batchToSpaceInfo({ 1, 4, 6, 1 }, DataType::Float32); + batchToSpace->GetOutputSlot(0).SetTensorInfo(batchToSpaceInfo); + permute->GetOutputSlot(0).Connect(batchToSpace->GetInputSlot(0)); + + auto output = network->AddOutputLayer(0, "output"); + batchToSpace->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + return network; +} + } // namespace /// Tests that the optimization performed by PermuteAndBatchToSpaceAsDepthToSpace is as expected. @@ -81,6 +112,36 @@ BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceOptimizerTest) BOOST_TEST(CheckRelatedLayers(graph, testRelatedLayers)); } +/// Tests that the optimization performed by PermuteAndBatchToSpaceAsDepthToSpace is as expected. +/// Note this does not ensure the correctness of the optimization - that is done in the below test. +BOOST_AUTO_TEST_CASE(TransposeAndBatchToSpaceAsDepthToSpaceOptimizerTest) +{ + INetworkPtr network = CreateTransposeTestNetwork(); + Graph graph = static_cast(network.get())->GetGraph(); + + // Confirm initial graph is as we expect + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType)); + + // Perform the optimization which should merge the two layers into a DepthToSpace + armnn::Optimizer::Pass(graph, MakeOptimizations(TransposeAndBatchToSpaceAsDepthToSpace())); + + // Check that the replacement has been made as expected + auto checkDepthToSpace = [](const Layer* const layer) -> bool { + return IsLayerOfType(layer) && + static_cast(layer)->GetParameters().m_BlockSize == 2 && + static_cast(layer)->GetParameters().m_DataLayout == DataLayout::NHWC && + layer->GetOutputHandler().GetTensorInfo() == TensorInfo({ 1, 4, 6, 1 }, DataType::Float32); + }; + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, checkDepthToSpace, + &IsLayerOfType)); + + // Check the new layer has the two merged layers listed as related layers + std::list testRelatedLayers = { "batchToSpace", "permute" }; + BOOST_TEST(CheckRelatedLayers(graph, testRelatedLayers)); +} + // This unit test needs the reference backend, it's not available if the reference backend is not built #if defined(ARMNNREF_ENABLED) @@ -130,6 +191,53 @@ BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceCorrectnessTest) }; BOOST_TEST(outputData == expectedOutput); } + +/// Tests that a optimization performed by PermuteAndBatchToSpaceAsDepthToSpace does not change the behaviour +/// of the network (i.e. it still produces the correct output). +BOOST_AUTO_TEST_CASE(TransposeAndBatchToSpaceAsDepthToSpaceCorrectnessTest) +{ + INetworkPtr network = CreateTransposeTestNetwork(); + + IRuntimePtr runtime = IRuntime::Create(IRuntime::CreationOptions()); + IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, { Compute::CpuRef }, runtime->GetDeviceSpec()); + + // Confirm that the optimization has actually taken place + const Graph& optGraph = static_cast(optimizedNetwork.get())->GetGraph(); + BOOST_TEST(CheckSequence(optGraph.cbegin(), optGraph.cend(), &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType)); + + // Load the graph into a runtime so we can check it produces the correct output + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optimizedNetwork)); + + std::vector inputData{ + // Each row here is a row of pixels where each pixel has 4 channels + // clang-format off + 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f, 100.0f, 200.0f, 300.0f, 400.0f, + -1.0f, -2.0f, -3.0f, -4.0f, -10.0f, -20.0f, -30.0f, -40.0f, -100.0f, -200.0f, -300.0f, -400.0f, + // clang-format on + }; + ConstTensor input(TensorInfo({ 1, 2, 3, 4 }, DataType::Float32), inputData); + InputTensors inputs = { { 0, input } }; + std::vector outputData(4 * 6); + Tensor output(TensorInfo({ 1, 4, 6, 1 }, DataType::Float32), outputData.data()); + OutputTensors outputs = { { 0, output } }; + runtime->EnqueueWorkload(netId, inputs, outputs); + + // Check the output is as expected. + // Note this output has been generated by running the network *without* the optimization. + std::vector expectedOutput = { + // Rows and columns here match exactly with the tensor, as there is only 1 channel. + // clang-format off + 1.0f, 2.0f, 10.0f, 20.0f, 100.0f, 200.0f, + 3.0f, 4.0f, 30.0f, 40.0f, 300.0f, 400.0f, + + -1.0f, -2.0f, -10.0f, -20.0f, -100.0f, -200.0f, + -3.0f, -4.0f, -30.0f, -40.0f, -300.0f, -400.0f, + // clang-format on + }; + BOOST_TEST(outputData == expectedOutput); +} #endif BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/armnn/test/optimizations/TransposeAsReshapeTests.cpp b/src/armnn/test/optimizations/TransposeAsReshapeTests.cpp new file mode 100644 index 0000000000..3c6ed6eea8 --- /dev/null +++ b/src/armnn/test/optimizations/TransposeAsReshapeTests.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../TestUtils.hpp" + +#include + +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; + +BOOST_AUTO_TEST_CASE(TransposeAsReshapeTest) +{ + armnn::Graph graph; + + std::string transposeLayerName = "transpose"; + + const armnn::TensorInfo infoIn({ 1, 2, 3, 1 }, armnn::DataType::Float32); + const armnn::TensorInfo infoOut({ 1, 1, 2, 3 }, armnn::DataType::Float32); + + auto output = graph.AddLayer(0, "output"); + + graph.InsertNewLayer(output->GetInputSlot(0), 0, "input") + ->GetOutputHandler() + .SetTensorInfo(infoIn); + + // Inserts transpose. + graph + .InsertNewLayer(output->GetInputSlot(0), armnn::TransposeDescriptor({ 0, 3, 1, 2 }), + transposeLayerName.c_str()) + ->GetOutputHandler() + .SetTensorInfo(infoOut); + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(TransposeAsReshape())); + + // The transpose is replaced by an equivalent reshape. + + auto checkReshape = [&infoOut](const armnn::Layer* const layer) -> bool { + const auto reshapeLayer = static_cast(layer); + return IsLayerOfType(layer) && + (reshapeLayer->GetParameters().m_TargetShape == infoOut.GetShape()) && + (reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == infoOut.GetShape()); + }; + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, checkReshape, + &IsLayerOfType)); + + std::list testRelatedLayers = { transposeLayerName }; + BOOST_TEST(CheckRelatedLayers(graph, testRelatedLayers)); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file -- cgit v1.2.1