aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.mk1
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/armnn/Network.cpp3
-rw-r--r--src/armnn/optimizations/All.hpp1
-rw-r--r--src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp87
-rw-r--r--src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp27
-rw-r--r--src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp132
7 files changed, 253 insertions, 1 deletions
diff --git a/Android.mk b/Android.mk
index 3e665a039f..4c3789c9e0 100644
--- a/Android.mk
+++ b/Android.mk
@@ -91,6 +91,7 @@ LOCAL_SRC_FILES := \
src/armnn/NetworkUtils.cpp \
src/armnn/Observable.cpp \
src/armnn/Optimizer.cpp \
+ src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp \
src/armnn/ProfilingEvent.cpp \
src/armnn/Profiling.cpp \
src/armnn/Runtime.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3e4d9c08d2..a2febe3066 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -420,6 +420,8 @@ list(APPEND armnn_sources
src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
src/armnn/optimizations/OptimizeInverseConversions.hpp
src/armnn/optimizations/OptimizeInversePermutes.hpp
+ src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
+ src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp
src/armnn/optimizations/PermuteAsReshape.hpp
src/armnn/optimizations/SquashEqualSiblings.hpp
src/profiling/CommandHandlerFunctor.cpp
@@ -560,6 +562,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp
src/armnn/test/optimizations/OptimizeInverseConversionsTests.cpp
src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp
+ src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp
src/armnn/test/optimizations/PermuteAsReshapeTests.cpp
src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp
src/armnn/test/OptionalTest.cpp
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index a668274c4d..cf9a138084 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -818,7 +818,8 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
MovePermuteUp(),
PermuteAsReshape(),
OptimizeConsecutiveReshapes(),
- FoldPadIntoConvolution2d()));
+ FoldPadIntoConvolution2d(),
+ PermuteAndBatchToSpaceAsDepthToSpace()));
// Infer the tensor infos for all output slots. Throws an exception on failure
optGraph.InferTensorInfos();
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 68965fd23c..4ea3f7f2d4 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -14,3 +14,4 @@
#include "ConvertFp32NetworkToFp16.hpp"
#include "AddDebug.hpp"
#include "FoldPadIntoConvolution2d.hpp"
+#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" \ No newline at end of file
diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp
new file mode 100644
index 0000000000..c42162b6c1
--- /dev/null
+++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp
@@ -0,0 +1,87 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp"
+
+using namespace armnn;
+using namespace armnn::optimizations;
+
+void PermuteAndBatchToSpaceAsDepthToSpaceImpl::Run(Graph& graph, InputSlot& connection) const
+{
+ // Validate base layer (the Permute) is compatible
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ BOOST_ASSERT(base.GetType() == LayerType::Permute);
+ const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo();
+ if (intermediateInfo.GetNumDimensions() != 4)
+ {
+ // Must be 4D, otherwise the below checks do not make sense
+ return;
+ }
+ if (!static_cast<PermuteLayer&>(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 }))
+ {
+ // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension
+ // that is being decomposed.
+ return;
+ }
+
+ // Validate child layer (the BatchToSpace) is compatible
+ Layer& child = connection.GetOwningLayer();
+ BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd);
+ const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo();
+ const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast<BatchToSpaceNdLayer&>(child).GetParameters();
+ if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC)
+ {
+ // The rest of this function assumes NHWC, although in future this restriction could be lifted.
+ return;
+ }
+ if (batchToSpaceDesc.m_Crops != std::vector<std::pair<unsigned int, unsigned int>>{ { 0, 0 }, { 0, 0 } })
+ {
+ // Cropping is not supported in DepthToSpace
+ return;
+ }
+ if (batchToSpaceDesc.m_BlockShape.size() != 2 ||
+ batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1])
+ {
+ // Asymmetric or non-2D block sizes are not supported by DepthToSpace
+ return;
+ }
+ uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0];
+ if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1)
+ {
+ // The final output must have 1 batch and 1 channel because these dimensions will be swapped around
+ // once we make the substitution, and it needs to be equivalent.
+ return;
+ }
+
+ // Validate the intermediate tensor quantization params.
+ // These must be identical to either the input or output quantization params, otherwise the intermediate tensor
+ // may not have sufficient range/precision to preserve the values.
+ // This would mean that once we perform the substitution this loss of precision will no longer occur,
+ // so we would have changed the meaning of the network.
+ bool isIntermediateQuantParamsSameAsInput =
+ intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() &&
+ intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset();
+ bool isIntermediateQuantParamsSameAsOutput =
+ intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() &&
+ intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset();
+ if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput)
+ {
+ return;
+ }
+
+ // Insert equivalent DepthToSpace layer
+ const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName();
+
+ // Inserts equivalent reshape before base layer.
+ const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC);
+ auto& depthToSpace = *graph.InsertNewLayer<DepthToSpaceLayer>(base.GetInputSlot(0), depthToSpaceDesc, name.c_str());
+ depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo);
+
+ // Moves connections from child output to new layer.
+ // Child layer will be removed as it's left unconnected.
+ // Base layer will be removed if left unconnected.
+ child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot());
+}
diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
new file mode 100644
index 0000000000..4a73efca40
--- /dev/null
+++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Optimization.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+/// Replaces Permute leading into BatchToSpace with a DepthToSpace
+/// in the case where the Permute swaps the batch and channels dimensions
+/// such that the replacement is valid.
+class PermuteAndBatchToSpaceAsDepthToSpaceImpl
+{
+public:
+ void Run(Graph& graph, InputSlot& connection) const;
+};
+
+using PermuteAndBatchToSpaceAsDepthToSpace =
+ OptimizeForConnection<PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl>;
+
+} // namespace optimizations
+} // namespace armnn
diff --git a/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp
new file mode 100644
index 0000000000..ec1dd511c9
--- /dev/null
+++ b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp
@@ -0,0 +1,132 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../TestUtils.hpp"
+
+#include <Network.hpp>
+#include <Optimizer.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Optimizer)
+using namespace armnn::optimizations;
+
+namespace
+{
+
+/// Shared function for the below tests, so that we test the same network in both cases.
+INetworkPtr CreateTestNetwork()
+{
+ // Create a network
+ INetworkPtr network = INetwork::Create();
+
+ auto input = network->AddInputLayer(0, "input");
+ const TensorInfo inputInfo({ 1, 2, 3, 4 }, DataType::Float32);
+ input->GetOutputSlot(0).SetTensorInfo(inputInfo);
+
+ // Insert Permute which swaps batches and channels dimensions
+ auto permute = network->AddPermuteLayer(PermuteDescriptor(PermutationVector{ 3, 1, 2, 0 }), "permute");
+ const TensorInfo permuteInfo({ 4, 2, 3, 1 }, DataType::Float32);
+ permute->GetOutputSlot(0).SetTensorInfo(permuteInfo);
+ input->GetOutputSlot(0).Connect(permute->GetInputSlot(0));
+
+ // Insert BatchToSpace
+ BatchToSpaceNdDescriptor batchToSpaceDesc;
+ batchToSpaceDesc.m_BlockShape = { 2, 2 };
+ batchToSpaceDesc.m_DataLayout = DataLayout::NHWC;
+ auto batchToSpace = network->AddBatchToSpaceNdLayer(batchToSpaceDesc, "batchToSpace");
+ const TensorInfo batchToSpaceInfo({ 1, 4, 6, 1 }, DataType::Float32);
+ batchToSpace->GetOutputSlot(0).SetTensorInfo(batchToSpaceInfo);
+ permute->GetOutputSlot(0).Connect(batchToSpace->GetInputSlot(0));
+
+ auto output = network->AddOutputLayer(0, "output");
+ batchToSpace->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ return network;
+}
+
+} // namespace
+
+/// Tests that the optimization performed by PermuteAndBatchToSpaceAsDepthToSpace is as expected.
+/// Note this does not ensure the correctness of the optimization - that is done in the below test.
+BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceOptimizerTest)
+{
+ INetworkPtr network = CreateTestNetwork();
+ Graph graph = static_cast<Network*>(network.get())->GetGraph();
+
+ // Confirm initial graph is as we expect
+ BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>, &IsLayerOfType<PermuteLayer>,
+ &IsLayerOfType<BatchToSpaceNdLayer>, &IsLayerOfType<OutputLayer>));
+
+ // Perform the optimization which should merge the two layers into a DepthToSpace
+ armnn::Optimizer::Pass(graph, MakeOptimizations(PermuteAndBatchToSpaceAsDepthToSpace()));
+
+ // Check that the replacement has been made as expected
+ auto checkDepthToSpace = [](const Layer* const layer) -> bool {
+ return IsLayerOfType<DepthToSpaceLayer>(layer) &&
+ static_cast<const DepthToSpaceLayer*>(layer)->GetParameters().m_BlockSize == 2 &&
+ static_cast<const DepthToSpaceLayer*>(layer)->GetParameters().m_DataLayout == DataLayout::NHWC &&
+ layer->GetOutputHandler().GetTensorInfo() == TensorInfo({ 1, 4, 6, 1 }, DataType::Float32);
+ };
+
+ BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>, checkDepthToSpace,
+ &IsLayerOfType<OutputLayer>));
+
+ // Check the new layer has the two merged layers listed as related layers
+ std::list<std::string> testRelatedLayers = { "batchToSpace", "permute" };
+ BOOST_TEST(CheckRelatedLayers<DepthToSpaceLayer>(graph, testRelatedLayers));
+}
+
+/// Tests that a optimization performed by PermuteAndBatchToSpaceAsDepthToSpace does not change the behaviour
+/// of the network (i.e. it still produces the correct output).
+BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceCorrectnessTest)
+{
+ INetworkPtr network = CreateTestNetwork();
+
+ IRuntimePtr runtime = IRuntime::Create(IRuntime::CreationOptions());
+
+ IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, { Compute::CpuRef }, runtime->GetDeviceSpec());
+
+ // Confirm that the optimization has actually taken place
+ const Graph& optGraph = static_cast<OptimizedNetwork*>(optimizedNetwork.get())->GetGraph();
+ BOOST_TEST(CheckSequence(optGraph.cbegin(), optGraph.cend(), &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<DepthToSpaceLayer>, &IsLayerOfType<OutputLayer>));
+
+ // Load the graph into a runtime so we can check it produces the correct output
+ NetworkId netId;
+ runtime->LoadNetwork(netId, std::move(optimizedNetwork));
+
+ std::vector<float> inputData{
+ // Each row here is a row of pixels where each pixel has 4 channels
+ // clang-format off
+ 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f, 100.0f, 200.0f, 300.0f, 400.0f,
+ -1.0f, -2.0f, -3.0f, -4.0f, -10.0f, -20.0f, -30.0f, -40.0f, -100.0f, -200.0f, -300.0f, -400.0f,
+ // clang-format on
+ };
+ ConstTensor input(TensorInfo({ 1, 2, 3, 4 }, DataType::Float32), inputData);
+ InputTensors inputs = { { 0, input } };
+ std::vector<float> outputData(4 * 6);
+ Tensor output(TensorInfo({ 1, 4, 6, 1 }, DataType::Float32), outputData.data());
+ OutputTensors outputs = { { 0, output } };
+ runtime->EnqueueWorkload(netId, inputs, outputs);
+
+ // Check the output is as expected.
+ // Note this output has been generated by running the network *without* the optimization.
+ std::vector<float> expectedOutput = {
+ // Rows and columns here match exactly with the tensor, as there is only 1 channel.
+ // clang-format off
+ 1.0f, 2.0f, 10.0f, 20.0f, 100.0f, 200.0f,
+ 3.0f, 4.0f, 30.0f, 40.0f, 300.0f, 400.0f,
+
+ -1.0f, -2.0f, -10.0f, -20.0f, -100.0f, -200.0f,
+ -3.0f, -4.0f, -30.0f, -40.0f, -300.0f, -400.0f,
+ // clang-format on
+ };
+ BOOST_TEST(outputData == expectedOutput);
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file