diff options
-rw-r--r-- | Android.mk | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 3 | ||||
-rw-r--r-- | src/armnn/optimizations/All.hpp | 1 | ||||
-rw-r--r-- | src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp | 87 | ||||
-rw-r--r-- | src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp | 27 | ||||
-rw-r--r-- | src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp | 132 |
7 files changed, 253 insertions, 1 deletions
diff --git a/Android.mk b/Android.mk index 3e665a039f..4c3789c9e0 100644 --- a/Android.mk +++ b/Android.mk @@ -91,6 +91,7 @@ LOCAL_SRC_FILES := \ src/armnn/NetworkUtils.cpp \ src/armnn/Observable.cpp \ src/armnn/Optimizer.cpp \ + src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp \ src/armnn/ProfilingEvent.cpp \ src/armnn/Profiling.cpp \ src/armnn/Runtime.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e4d9c08d2..a2febe3066 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,6 +420,8 @@ list(APPEND armnn_sources src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp src/armnn/optimizations/OptimizeInverseConversions.hpp src/armnn/optimizations/OptimizeInversePermutes.hpp + src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp + src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp src/armnn/optimizations/PermuteAsReshape.hpp src/armnn/optimizations/SquashEqualSiblings.hpp src/profiling/CommandHandlerFunctor.cpp @@ -560,6 +562,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp src/armnn/test/optimizations/OptimizeInverseConversionsTests.cpp src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp + src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp src/armnn/test/optimizations/PermuteAsReshapeTests.cpp src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp src/armnn/test/OptionalTest.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index a668274c4d..cf9a138084 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -818,7 +818,8 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, MovePermuteUp(), PermuteAsReshape(), OptimizeConsecutiveReshapes(), - FoldPadIntoConvolution2d())); + FoldPadIntoConvolution2d(), + PermuteAndBatchToSpaceAsDepthToSpace())); // Infer the tensor infos for all output slots. Throws an exception on failure optGraph.InferTensorInfos(); diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 68965fd23c..4ea3f7f2d4 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -14,3 +14,4 @@ #include "ConvertFp32NetworkToFp16.hpp" #include "AddDebug.hpp" #include "FoldPadIntoConvolution2d.hpp" +#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp"
\ No newline at end of file diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp new file mode 100644 index 0000000000..c42162b6c1 --- /dev/null +++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.cpp @@ -0,0 +1,87 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" + +using namespace armnn; +using namespace armnn::optimizations; + +void PermuteAndBatchToSpaceAsDepthToSpaceImpl::Run(Graph& graph, InputSlot& connection) const +{ + // Validate base layer (the Permute) is compatible + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + BOOST_ASSERT(base.GetType() == LayerType::Permute); + const TensorInfo& inputInfo = base.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& intermediateInfo = base.GetOutputSlot(0).GetTensorInfo(); + if (intermediateInfo.GetNumDimensions() != 4) + { + // Must be 4D, otherwise the below checks do not make sense + return; + } + if (!static_cast<PermuteLayer&>(base).GetParameters().m_DimMappings.IsEqual(PermutationVector{ 3, 1, 2, 0 })) + { + // Must swap batch and channels dimensions, otherwise it is not the (original) channels dimension + // that is being decomposed. + return; + } + + // Validate child layer (the BatchToSpace) is compatible + Layer& child = connection.GetOwningLayer(); + BOOST_ASSERT(child.GetType() == LayerType::BatchToSpaceNd); + const TensorInfo& outputInfo = child.GetOutputSlot(0).GetTensorInfo(); + const BatchToSpaceNdDescriptor& batchToSpaceDesc = static_cast<BatchToSpaceNdLayer&>(child).GetParameters(); + if (batchToSpaceDesc.m_DataLayout != DataLayout::NHWC) + { + // The rest of this function assumes NHWC, although in future this restriction could be lifted. + return; + } + if (batchToSpaceDesc.m_Crops != std::vector<std::pair<unsigned int, unsigned int>>{ { 0, 0 }, { 0, 0 } }) + { + // Cropping is not supported in DepthToSpace + return; + } + if (batchToSpaceDesc.m_BlockShape.size() != 2 || + batchToSpaceDesc.m_BlockShape[0] != batchToSpaceDesc.m_BlockShape[1]) + { + // Asymmetric or non-2D block sizes are not supported by DepthToSpace + return; + } + uint32_t blockSize = batchToSpaceDesc.m_BlockShape[0]; + if (outputInfo.GetShape()[0] != 1 || outputInfo.GetShape()[3] != 1) + { + // The final output must have 1 batch and 1 channel because these dimensions will be swapped around + // once we make the substitution, and it needs to be equivalent. + return; + } + + // Validate the intermediate tensor quantization params. + // These must be identical to either the input or output quantization params, otherwise the intermediate tensor + // may not have sufficient range/precision to preserve the values. + // This would mean that once we perform the substitution this loss of precision will no longer occur, + // so we would have changed the meaning of the network. + bool isIntermediateQuantParamsSameAsInput = + intermediateInfo.GetQuantizationScale() == inputInfo.GetQuantizationScale() && + intermediateInfo.GetQuantizationOffset() == inputInfo.GetQuantizationOffset(); + bool isIntermediateQuantParamsSameAsOutput = + intermediateInfo.GetQuantizationScale() == outputInfo.GetQuantizationScale() && + intermediateInfo.GetQuantizationOffset() == outputInfo.GetQuantizationOffset(); + if (!isIntermediateQuantParamsSameAsInput && !isIntermediateQuantParamsSameAsOutput) + { + return; + } + + // Insert equivalent DepthToSpace layer + const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName(); + + // Inserts equivalent reshape before base layer. + const DepthToSpaceDescriptor depthToSpaceDesc(blockSize, DataLayout::NHWC); + auto& depthToSpace = *graph.InsertNewLayer<DepthToSpaceLayer>(base.GetInputSlot(0), depthToSpaceDesc, name.c_str()); + depthToSpace.GetOutputHandler().SetTensorInfo(outputInfo); + + // Moves connections from child output to new layer. + // Child layer will be removed as it's left unconnected. + // Base layer will be removed if left unconnected. + child.GetOutputSlot().MoveAllConnections(depthToSpace.GetOutputSlot()); +} diff --git a/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp new file mode 100644 index 0000000000..4a73efca40 --- /dev/null +++ b/src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +/// Replaces Permute leading into BatchToSpace with a DepthToSpace +/// in the case where the Permute swaps the batch and channels dimensions +/// such that the replacement is valid. +class PermuteAndBatchToSpaceAsDepthToSpaceImpl +{ +public: + void Run(Graph& graph, InputSlot& connection) const; +}; + +using PermuteAndBatchToSpaceAsDepthToSpace = + OptimizeForConnection<PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp new file mode 100644 index 0000000000..ec1dd511c9 --- /dev/null +++ b/src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp @@ -0,0 +1,132 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../TestUtils.hpp" + +#include <Network.hpp> +#include <Optimizer.hpp> + +#include <boost/test/unit_test.hpp> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; + +namespace +{ + +/// Shared function for the below tests, so that we test the same network in both cases. +INetworkPtr CreateTestNetwork() +{ + // Create a network + INetworkPtr network = INetwork::Create(); + + auto input = network->AddInputLayer(0, "input"); + const TensorInfo inputInfo({ 1, 2, 3, 4 }, DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(inputInfo); + + // Insert Permute which swaps batches and channels dimensions + auto permute = network->AddPermuteLayer(PermuteDescriptor(PermutationVector{ 3, 1, 2, 0 }), "permute"); + const TensorInfo permuteInfo({ 4, 2, 3, 1 }, DataType::Float32); + permute->GetOutputSlot(0).SetTensorInfo(permuteInfo); + input->GetOutputSlot(0).Connect(permute->GetInputSlot(0)); + + // Insert BatchToSpace + BatchToSpaceNdDescriptor batchToSpaceDesc; + batchToSpaceDesc.m_BlockShape = { 2, 2 }; + batchToSpaceDesc.m_DataLayout = DataLayout::NHWC; + auto batchToSpace = network->AddBatchToSpaceNdLayer(batchToSpaceDesc, "batchToSpace"); + const TensorInfo batchToSpaceInfo({ 1, 4, 6, 1 }, DataType::Float32); + batchToSpace->GetOutputSlot(0).SetTensorInfo(batchToSpaceInfo); + permute->GetOutputSlot(0).Connect(batchToSpace->GetInputSlot(0)); + + auto output = network->AddOutputLayer(0, "output"); + batchToSpace->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + return network; +} + +} // namespace + +/// Tests that the optimization performed by PermuteAndBatchToSpaceAsDepthToSpace is as expected. +/// Note this does not ensure the correctness of the optimization - that is done in the below test. +BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceOptimizerTest) +{ + INetworkPtr network = CreateTestNetwork(); + Graph graph = static_cast<Network*>(network.get())->GetGraph(); + + // Confirm initial graph is as we expect + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>, &IsLayerOfType<PermuteLayer>, + &IsLayerOfType<BatchToSpaceNdLayer>, &IsLayerOfType<OutputLayer>)); + + // Perform the optimization which should merge the two layers into a DepthToSpace + armnn::Optimizer::Pass(graph, MakeOptimizations(PermuteAndBatchToSpaceAsDepthToSpace())); + + // Check that the replacement has been made as expected + auto checkDepthToSpace = [](const Layer* const layer) -> bool { + return IsLayerOfType<DepthToSpaceLayer>(layer) && + static_cast<const DepthToSpaceLayer*>(layer)->GetParameters().m_BlockSize == 2 && + static_cast<const DepthToSpaceLayer*>(layer)->GetParameters().m_DataLayout == DataLayout::NHWC && + layer->GetOutputHandler().GetTensorInfo() == TensorInfo({ 1, 4, 6, 1 }, DataType::Float32); + }; + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>, checkDepthToSpace, + &IsLayerOfType<OutputLayer>)); + + // Check the new layer has the two merged layers listed as related layers + std::list<std::string> testRelatedLayers = { "batchToSpace", "permute" }; + BOOST_TEST(CheckRelatedLayers<DepthToSpaceLayer>(graph, testRelatedLayers)); +} + +/// Tests that a optimization performed by PermuteAndBatchToSpaceAsDepthToSpace does not change the behaviour +/// of the network (i.e. it still produces the correct output). +BOOST_AUTO_TEST_CASE(PermuteAndBatchToSpaceAsDepthToSpaceCorrectnessTest) +{ + INetworkPtr network = CreateTestNetwork(); + + IRuntimePtr runtime = IRuntime::Create(IRuntime::CreationOptions()); + + IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, { Compute::CpuRef }, runtime->GetDeviceSpec()); + + // Confirm that the optimization has actually taken place + const Graph& optGraph = static_cast<OptimizedNetwork*>(optimizedNetwork.get())->GetGraph(); + BOOST_TEST(CheckSequence(optGraph.cbegin(), optGraph.cend(), &IsLayerOfType<InputLayer>, + &IsLayerOfType<DepthToSpaceLayer>, &IsLayerOfType<OutputLayer>)); + + // Load the graph into a runtime so we can check it produces the correct output + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optimizedNetwork)); + + std::vector<float> inputData{ + // Each row here is a row of pixels where each pixel has 4 channels + // clang-format off + 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f, 100.0f, 200.0f, 300.0f, 400.0f, + -1.0f, -2.0f, -3.0f, -4.0f, -10.0f, -20.0f, -30.0f, -40.0f, -100.0f, -200.0f, -300.0f, -400.0f, + // clang-format on + }; + ConstTensor input(TensorInfo({ 1, 2, 3, 4 }, DataType::Float32), inputData); + InputTensors inputs = { { 0, input } }; + std::vector<float> outputData(4 * 6); + Tensor output(TensorInfo({ 1, 4, 6, 1 }, DataType::Float32), outputData.data()); + OutputTensors outputs = { { 0, output } }; + runtime->EnqueueWorkload(netId, inputs, outputs); + + // Check the output is as expected. + // Note this output has been generated by running the network *without* the optimization. + std::vector<float> expectedOutput = { + // Rows and columns here match exactly with the tensor, as there is only 1 channel. + // clang-format off + 1.0f, 2.0f, 10.0f, 20.0f, 100.0f, 200.0f, + 3.0f, 4.0f, 30.0f, 40.0f, 300.0f, 400.0f, + + -1.0f, -2.0f, -10.0f, -20.0f, -100.0f, -200.0f, + -3.0f, -4.0f, -30.0f, -40.0f, -300.0f, -400.0f, + // clang-format on + }; + BOOST_TEST(outputData == expectedOutput); +} + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file |