aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-03-03 12:39:09 +0000
committermike.kelly <mike.kelly@arm.com>2020-03-03 15:05:30 +0000
commit490b7becb8029ead26423b0d62e631a929e55d6c (patch)
tree31148ace54164f62927062b662b2526f22a02e95 /src/armnn/optimizations
parent4a9e24bfc51eec7e593470091fb7e6e435ae3991 (diff)
downloadarmnn-490b7becb8029ead26423b0d62e631a929e55d6c.tar.gz
IVGCVSW-4375 Add support for Transpose to optimizations
* Changed some existing Permutation specific optimizations to also support Transpose * Added MoveTransposeUp optimization * Added TransposeAsReshape optimization * Added tests for Transpose optimizations * Added missing layer tests for Transpose Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: I20d099b284861402ae94aaa5dbf34907327a485f
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r--src/armnn/optimizations/All.hpp16
-rw-r--r--src/armnn/optimizations/MoveTransposeUp.hpp83
-rw-r--r--src/armnn/optimizations/OptimizeInversePermutes.hpp10
-rw-r--r--src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp87
-rw-r--r--src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp88
-rw-r--r--src/armnn/optimizations/SquashEqualSiblings.hpp2
-rw-r--r--src/armnn/optimizations/TransposeAsReshape.hpp81
7 files changed, 266 insertions, 101 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 4ea3f7f2d4..273c337665 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -4,14 +4,16 @@
//
#pragma once
+#include "AddDebug.hpp"
#include "ConvertConstants.hpp"
+#include "ConvertFp32NetworkToFp16.hpp"
+#include "FoldPadIntoConvolution2d.hpp"
+#include "MovePermuteUp.hpp"
+#include "MoveTransposeUp.hpp"
+#include "OptimizeConsecutiveReshapes.hpp"
+#include "OptimizeInverseConversions.hpp"
#include "OptimizeInversePermutes.hpp"
#include "PermuteAsReshape.hpp"
-#include "OptimizeConsecutiveReshapes.hpp"
+#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp"
#include "SquashEqualSiblings.hpp"
-#include "MovePermuteUp.hpp"
-#include "OptimizeInverseConversions.hpp"
-#include "ConvertFp32NetworkToFp16.hpp"
-#include "AddDebug.hpp"
-#include "FoldPadIntoConvolution2d.hpp"
-#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" \ No newline at end of file
+#include "TransposeAsReshape.hpp" \ No newline at end of file
diff --git a/src/armnn/optimizations/MoveTransposeUp.hpp b/src/armnn/optimizations/MoveTransposeUp.hpp
new file mode 100644
index 0000000000..66543069c8
--- /dev/null
+++ b/src/armnn/optimizations/MoveTransposeUp.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Optimization.hpp"
+
+#include <armnnUtils/Transpose.hpp>
+
+namespace armnn
+{
+namespace optimizations
+{
+class MoveTransposeUpImpl
+{
+public:
+ /// Run for every connection between a base Layer (any) and a child TransposeLayer. If the type
+ /// of the base layer allows it, it moves the permutation to the inputs of the base layer.
+ /// I.e., adds equivalent permutations before the inputs of the base layer and moves the
+ /// connections in the output of the child transpose layer to the output of the base layer.
+ void Run(Graph& graph, InputSlot& connection) const
+ {
+ OutputSlot& baseOutput = *connection.GetConnectedOutputSlot();
+
+ if (baseOutput.GetNumConnections() == 1U)
+ {
+ Layer& base = baseOutput.GetOwningLayer();
+
+ if (CanMoveTransposeToInputs(base))
+ {
+ auto transpose = boost::polymorphic_downcast<TransposeLayer*>(&connection.GetOwningLayer());
+ const PermutationVector& perm = transpose->GetPermutation();
+
+ // Inserts an equivalent transpose before every input of the base layer.
+ for (auto baseInput = base.BeginInputSlots(); baseInput != base.EndInputSlots(); ++baseInput)
+ {
+ // Inserts a new transpose layer.
+ const std::string name = std::string("moved_up-") + transpose->GetName();
+ TransposeLayer& permLayer = *graph.InsertNewLayer<TransposeLayer>(*baseInput, perm, name.c_str());
+
+ // Sets output tensor info for the new layer.
+ OutputSlot& parentOutput = *permLayer.GetInputSlot(0).GetConnectedOutputSlot();
+ const TensorInfo permOutInfo = armnnUtils::TransposeTensorShape(parentOutput.GetTensorInfo(), perm);
+ permLayer.GetOutputHandler().SetTensorInfo(permOutInfo);
+ }
+
+ // Sets transposed output tensor info
+ const TensorInfo& childOutInfo = transpose->GetOutputHandler().GetTensorInfo();
+ base.GetOutputHandler().SetTensorInfo(childOutInfo);
+
+ // Bypasses transpose. It will be removed as it's left unconnected.
+ transpose->GetOutputSlot().MoveAllConnections(base.GetOutputSlot());
+ }
+ }
+ }
+
+protected:
+ MoveTransposeUpImpl() = default;
+ ~MoveTransposeUpImpl() = default;
+
+private:
+ static bool CanMoveTransposeToInputs(const Layer& base)
+ {
+ switch (base.GetType())
+ {
+ case LayerType::Activation:
+ case LayerType::Addition:
+ case LayerType::FakeQuantization:
+ case LayerType::Floor:
+ case LayerType::MemCopy:
+ case LayerType::Multiplication:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+using MoveTransposeUp = OptimizeForConnection<Layer, TransposeLayer, MoveTransposeUpImpl>;
+
+} // namespace optimizations
+} // namespace armnn
diff --git a/src/armnn/optimizations/OptimizeInversePermutes.hpp b/src/armnn/optimizations/OptimizeInversePermutes.hpp
index 48bfa35440..77d62a50cb 100644
--- a/src/armnn/optimizations/OptimizeInversePermutes.hpp
+++ b/src/armnn/optimizations/OptimizeInversePermutes.hpp
@@ -13,6 +13,7 @@ namespace armnn
namespace optimizations
{
+template <typename PermuteType>
class OptimizeInversePermutesImpl
{
public:
@@ -22,9 +23,9 @@ public:
{
boost::ignore_unused(graph);
Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
- auto child = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer());
+ auto child = boost::polymorphic_downcast<PermuteType*>(&connection.GetOwningLayer());
- if (child->IsInverse(*boost::polymorphic_downcast<PermuteLayer*>(&base)))
+ if (child->IsInverse(*boost::polymorphic_downcast<PermuteType*>(&base)))
{
// Bypass both layers. Child will be removed as it's left unconnected.
// Base layer will be removed if left unconnected.
@@ -37,7 +38,10 @@ protected:
~OptimizeInversePermutesImpl() = default;
};
-using OptimizeInversePermutes = OptimizeForConnection<PermuteLayer, PermuteLayer, OptimizeInversePermutesImpl>;
+using OptimizeInversePermutes = OptimizeForConnection<PermuteLayer, PermuteLayer,
+ OptimizeInversePermutesImpl<PermuteLayer>>;
+using OptimizeInverseTransposes = OptimizeForConnection<TransposeLayer, TransposeLayer,
+ OptimizeInversePermutesImpl<TransposeLayer>>;
} // namespace optimizations
} // namespace armnn
diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp
deleted file mode 100644
index c42162b6c1..0000000000
--- a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-//
-// Copyright © 2019 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp"
-
-using namespace armnn;
-using namespace armnn::optimizations;
-
-void PermuteAndBatchToSpaceAsDepthToSpaceImpl::Run(Graph& graph, InputSlot& connection) const
-{
- // Validate base layer (the Permute) is compatible
- Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
- BOOST_ASSERT(base.GetType() == LayerType::Permute);
- const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo();
- const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo();
- if (intermediateInfo.GetNumDimensions() != 4)
- {
- // Must be 4D, otherwise the below checks do not make sense
- return;
- }
- if (!static_cast<PermuteLayer&>(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 }))
- {
- // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension
- // that is being decomposed.
- return;
- }
-
- // Validate child layer (the BatchToSpace) is compatible
- Layer& child = connection.GetOwningLayer();
- BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd);
- const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo();
- const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast<BatchToSpaceNdLayer&>(child).GetParameters();
- if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC)
- {
- // The rest of this function assumes NHWC, although in future this restriction could be lifted.
- return;
- }
- if (batchToSpaceDesc.m_Crops != std::vector<std::pair<unsigned int, unsigned int>>{ { 0, 0 }, { 0, 0 } })
- {
- // Cropping is not supported in DepthToSpace
- return;
- }
- if (batchToSpaceDesc.m_BlockShape.size() != 2 ||
- batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1])
- {
- // Asymmetric or non-2D block sizes are not supported by DepthToSpace
- return;
- }
- uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0];
- if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1)
- {
- // The final output must have 1 batch and 1 channel because these dimensions will be swapped around
- // once we make the substitution, and it needs to be equivalent.
- return;
- }
-
- // Validate the intermediate tensor quantization params.
- // These must be identical to either the input or output quantization params, otherwise the intermediate tensor
- // may not have sufficient range/precision to preserve the values.
- // This would mean that once we perform the substitution this loss of precision will no longer occur,
- // so we would have changed the meaning of the network.
- bool isIntermediateQuantParamsSameAsInput =
- intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() &&
- intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset();
- bool isIntermediateQuantParamsSameAsOutput =
- intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() &&
- intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset();
- if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput)
- {
- return;
- }
-
- // Insert equivalent DepthToSpace layer
- const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName();
-
- // Inserts equivalent reshape before base layer.
- const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC);
- auto& depthToSpace = *graph.InsertNewLayer<DepthToSpaceLayer>(base.GetInputSlot(0), depthToSpaceDesc, name.c_str());
- depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo);
-
- // Moves connections from child output to new layer.
- // Child layer will be removed as it's left unconnected.
- // Base layer will be removed if left unconnected.
- child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot());
-}
diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
index 4a73efca40..21aed869f5 100644
--- a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
+++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
@@ -14,14 +14,94 @@ namespace optimizations
/// Replaces Permute leading into BatchToSpace with a DepthToSpace
/// in the case where the Permute swaps the batch and channels dimensions
/// such that the replacement is valid.
+template <typename PermuteType>
class PermuteAndBatchToSpaceAsDepthToSpaceImpl
{
public:
- void Run(Graph& graph, InputSlot& connection) const;
-};
+ void Run(Graph& graph, InputSlot& connection) const
+ {
+ // Validate base layer (the Permute) is compatible
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ BOOST_ASSERT(base.GetType() == LayerType::Permute || base.GetType() == LayerType::Transpose);
+ const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo();
+ if (intermediateInfo.GetNumDimensions() != 4)
+ {
+ // Must be 4D, otherwise the below checks do not make sense
+ return;
+ }
+ if (!static_cast<PermuteType&>(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 }))
+ {
+ // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension
+ // that is being decomposed.
+ return;
+ }
+
+ // Validate child layer (the BatchToSpace) is compatible
+ Layer& child = connection.GetOwningLayer();
+ BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd);
+ const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo();
+ const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast<BatchToSpaceNdLayer&>(child).GetParameters();
+ if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC)
+ {
+ // The rest of this function assumes NHWC, although in future this restriction could be lifted.
+ return;
+ }
+ if (batchToSpaceDesc.m_Crops != std::vector<std::pair<unsigned int, unsigned int>>{ { 0, 0 }, { 0, 0 } })
+ {
+ // Cropping is not supported in DepthToSpace
+ return;
+ }
+ if (batchToSpaceDesc.m_BlockShape.size() != 2 ||
+ batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1])
+ {
+ // Asymmetric or non-2D block sizes are not supported by DepthToSpace
+ return;
+ }
+ uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0];
+ if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1)
+ {
+ // The final output must have 1 batch and 1 channel because these dimensions will be swapped around
+ // once we make the substitution, and it needs to be equivalent.
+ return;
+ }
+
+ // Validate the intermediate tensor quantization params.
+ // These must be identical to either the input or output quantization params, otherwise the intermediate tensor
+ // may not have sufficient range/precision to preserve the values.
+ // This would mean that once we perform the substitution this loss of precision will no longer occur,
+ // so we would have changed the meaning of the network.
+ bool isIntermediateQuantParamsSameAsInput =
+ intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() &&
+ intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset();
+ bool isIntermediateQuantParamsSameAsOutput =
+ intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() &&
+ intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset();
+ if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput)
+ {
+ return;
+ }
-using PermuteAndBatchToSpaceAsDepthToSpace =
- OptimizeForConnection<PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl>;
+ // Insert equivalent DepthToSpace layer
+ const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName();
+
+ // Inserts equivalent reshape before base layer.
+ const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC);
+ auto& depthToSpace = *graph.InsertNewLayer<DepthToSpaceLayer>(base.GetInputSlot(0),
+ depthToSpaceDesc,
+ name.c_str());
+ depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo);
+
+ // Moves connections from child output to new layer.
+ // Child layer will be removed as it's left unconnected.
+ // Base layer will be removed if left unconnected.
+ child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot());
+ }
+};
+using PermuteAndBatchToSpaceAsDepthToSpace = OptimizeForConnection<PermuteLayer, BatchToSpaceNdLayer,
+ PermuteAndBatchToSpaceAsDepthToSpaceImpl<PermuteLayer>>;
+using TransposeAndBatchToSpaceAsDepthToSpace = OptimizeForConnection<TransposeLayer, BatchToSpaceNdLayer,
+ PermuteAndBatchToSpaceAsDepthToSpaceImpl<TransposeLayer>>;
} // namespace optimizations
} // namespace armnn
diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp
index 12637ba9b6..d5a8a5d81e 100644
--- a/src/armnn/optimizations/SquashEqualSiblings.hpp
+++ b/src/armnn/optimizations/SquashEqualSiblings.hpp
@@ -64,6 +64,8 @@ protected:
};
using SquashEqualPermuteSiblings = OptimizeForConnection<Layer, PermuteLayer, SquashEqualSiblingsImpl<PermuteLayer>>;
+using SquashEqualTransposeSiblings = OptimizeForConnection<Layer, TransposeLayer,
+ SquashEqualSiblingsImpl<TransposeLayer>>;
using SquashEqualReshapeSiblings = OptimizeForConnection<Layer, ReshapeLayer, SquashEqualSiblingsImpl<ReshapeLayer>>;
} // namespace optimizations
diff --git a/src/armnn/optimizations/TransposeAsReshape.hpp b/src/armnn/optimizations/TransposeAsReshape.hpp
new file mode 100644
index 0000000000..4bb2f192f3
--- /dev/null
+++ b/src/armnn/optimizations/TransposeAsReshape.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Optimization.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class TransposeAsReshapeImpl
+{
+public:
+ /// Run for every TransposeLayer. Replaces it with a ReshapeLayer if they are equivalent.
+ void Run(Graph& graph, TransposeLayer& transpose) const
+ {
+ if (IsReshape(transpose))
+ {
+ const TensorInfo& outInfo = transpose.GetOutputHandler().GetTensorInfo();
+
+ const std::string name = std::string("as_reshape-") + transpose.GetName();
+ const ReshapeDescriptor descriptor{outInfo.GetShape()};
+ // Inserts NewLayer so layers don't need to be re-sorted.
+ auto reshape = graph.InsertNewLayer<ReshapeLayer>(transpose.GetInputSlot(0), descriptor, name.c_str());
+ reshape->GetOutputHandler().SetTensorInfo(outInfo);
+
+ // Bypass transpose. It will be deleted since it's left unconnected.
+ transpose.GetOutputSlot().MoveAllConnections(reshape->GetOutputSlot());
+ }
+ }
+
+protected:
+ TransposeAsReshapeImpl() = default;
+ ~TransposeAsReshapeImpl() = default;
+
+private:
+ static bool IsReshape(const TransposeLayer& layer)
+ {
+ const TensorShape& outShape = layer.GetOutputHandler().GetTensorInfo().GetShape();
+ const PermutationVector& permutation = layer.GetPermutation();
+
+ const unsigned int numDimensions = permutation.GetSize();
+ std::map<unsigned int, unsigned int> permuteMappings;
+ for (unsigned int i = 0; i < permutation.GetSize(); ++i)
+ {
+ permuteMappings[permutation[i]] = i;
+ }
+
+ std::vector<unsigned int> permuteVector;
+ for (unsigned int i = 0; i < permutation.GetSize(); ++i)
+ {
+ permuteVector.push_back(permuteMappings.at(i));
+ }
+
+ unsigned int lastGtOne = 0;
+ while ((lastGtOne < numDimensions) && (outShape[(permuteVector[lastGtOne])] == 1U))
+ {
+ ++lastGtOne;
+ }
+
+ bool isReshape = true;
+ for (unsigned int i = lastGtOne + 1U; isReshape && (i < numDimensions); ++i)
+ {
+ if (outShape[permuteVector[i]] > 1U)
+ {
+ isReshape = permuteVector[lastGtOne] < permuteVector[i];
+ lastGtOne = i;
+ }
+ }
+
+ return isReshape;
+ }
+};
+
+using TransposeAsReshape = OptimizeForType<TransposeLayer, TransposeAsReshapeImpl>;
+
+} // namespace optimizations
+} // namespace armnn