diff options
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/Network.cpp | 5 | ||||
-rw-r--r-- | src/armnn/optimizations/All.hpp | 1 | ||||
-rw-r--r-- | src/armnn/optimizations/PermuteDepthwiseConv2dWeights.hpp | 81 | ||||
-rw-r--r-- | src/armnn/test/optimizations/PermuteDepthwiseConv2dWeightsTests.cpp | 116 |
4 files changed, 203 insertions, 0 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 549222bd7a..d2ebd4cde6 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1167,6 +1167,11 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr, auto backendObjPtr = backends.find(selectedBackend)->second.get(); ARMNN_ASSERT(backendObjPtr); + if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc) + { + Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights())); + } + // Select sub-graphs based on backend SubgraphViewSelector::Subgraphs subgraphs = SubgraphViewSelector::SelectSubgraphs(optGraph, diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 2bc54d993d..38c4ac9462 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -18,6 +18,7 @@ #include "OptimizeInversePermutes.hpp" #include "PermuteAsReshape.hpp" #include "PermuteAndBatchToSpaceAsDepthToSpace.hpp" +#include "PermuteDepthwiseConv2dWeights.hpp" #include "RedirectMembersToConstantInputs.hpp" #include "SquashEqualSiblings.hpp" #include "TransposeAsReshape.hpp"
\ No newline at end of file diff --git a/src/armnn/optimizations/PermuteDepthwiseConv2dWeights.hpp b/src/armnn/optimizations/PermuteDepthwiseConv2dWeights.hpp new file mode 100644 index 0000000000..d49ddb9f68 --- /dev/null +++ b/src/armnn/optimizations/PermuteDepthwiseConv2dWeights.hpp @@ -0,0 +1,81 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +#include <armnnUtils/Permute.hpp> + +#include <fmt/format.h> + +namespace armnn +{ +namespace optimizations +{ + +class PermuteDepthwiseConv2dWeightsImpl +{ +public: + + void Run(Graph& graph, Layer& layer) const + { + if (layer.GetType() == LayerType::DepthwiseConvolution2d) + { + AddPermuteLayer(graph, PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&layer)); + } + } + +protected: + PermuteDepthwiseConv2dWeightsImpl() = default; + ~PermuteDepthwiseConv2dWeightsImpl() = default; + +private: + /// ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout + /// + /// ACL format for weights for depthwise is: + /// - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN) + /// - [1, C, H, W] for [N, C, H, W] input/output layout + /// + /// Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL. + static void AddPermuteLayer(Graph& graph, DepthwiseConvolution2dLayer* layer) + { + TensorInfo inputInfo = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + TensorInfo weightInfo = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(); + if (layer->GetParameters().m_DataLayout == armnn::DataLayout::NHWC) + { + // No permutation required. Input and weights data layouts are the same. + return; + } + else if (layer->GetParameters().m_DataLayout == armnn::DataLayout::NCHW) + { + // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different. + // [ 1, H, W, I*M] --> [ 1, I * M, H, W ] + PermutationVector permutationVector = { 0, 2, 3, 1 }; + TensorInfo weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector); + + // Inserts NewLayer so layers don't need to be re-sorted. + PermuteLayer* permuteLayer = + graph.InsertNewLayer<PermuteLayer>(layer->GetInputSlot(1), + PermuteDescriptor(permutationVector), + "permute_layer"); + permuteLayer->GetOutputSlot().SetTensorInfo(weightsPermuted); + + // Assign Permute BackendId to be the same as the Depthwise Conv2d BackendId. + // Needed as backends have already been assigned at this stage. + permuteLayer->SetBackendId(layer->GetBackendId()); + } + else + { + throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}", + GetDataLayoutName(layer->GetParameters().m_DataLayout))); + } + } +}; + +using PermuteDepthwiseConv2dWeights = OptimizeForType<Layer, PermuteDepthwiseConv2dWeightsImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/test/optimizations/PermuteDepthwiseConv2dWeightsTests.cpp b/src/armnn/test/optimizations/PermuteDepthwiseConv2dWeightsTests.cpp new file mode 100644 index 0000000000..24dab7f779 --- /dev/null +++ b/src/armnn/test/optimizations/PermuteDepthwiseConv2dWeightsTests.cpp @@ -0,0 +1,116 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../armnnTestUtils/GraphUtils.hpp" +#include "../armnnTestUtils/TestUtils.hpp" + +#include <armnn/INetwork.hpp> + +#include <doctest/doctest.h> + +using namespace armnn; + +namespace +{ +#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED) +armnn::INetworkPtr CreateSimpleDepthwiseConv2dNetwork(const armnn::TensorInfo& inputTensorInfo, + const armnn::TensorInfo& outputTensorInfo, + const armnn::TensorInfo& weightsTensorInfo, + armnn::DepthwiseConvolution2dDescriptor descriptor) +{ + armnn::INetworkPtr network(armnn::INetwork::Create()); + + armnn::IConnectableLayer* inputLayer = network->AddInputLayer(0, "input"); + armnn::IConnectableLayer* weightsInputLayer = network->AddInputLayer(1, "weights_input"); + armnn::IConnectableLayer* depthwiseLayer = network->AddDepthwiseConvolution2dLayer(descriptor, "depthwise_conv2d"); + armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "output"); + + inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + weightsInputLayer->GetOutputSlot(0).SetTensorInfo(weightsTensorInfo); + depthwiseLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + inputLayer->GetOutputSlot(0).Connect(depthwiseLayer->GetInputSlot(0)); + weightsInputLayer->GetOutputSlot(0).Connect(depthwiseLayer->GetInputSlot(1)); + depthwiseLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + return network; +} + +void PermuteDepthwiseConv2dWeightsTestRunner(INetworkPtr& network, + const TensorShape& outputShape, + Compute backendId) +{ + // Create ArmNN runtime + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + CHECK(graph.GetNumLayers() == 5); + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<InputLayer>, + &IsLayerOfType<PermuteLayer>, + &IsLayerOfType<DepthwiseConvolution2dLayer>, + &IsLayerOfType<OutputLayer>)); + + armnn::Layer* const permuteLayer = GetFirstLayerWithName(graph, "permute_layer"); + CHECK(permuteLayer); + + // Swap original shape to compare with new shape. + unsigned int weightsShape[] = {outputShape[0], outputShape[1], outputShape[2], outputShape[3]}; + + // Tensorshape and the data type are correct + // [ 1, H, W, I*M] --> [ 1, I * M, H, W ] + TensorShape newShape = permuteLayer->GetOutputSlot().GetTensorInfo().GetShape(); + CHECK((newShape[0] == weightsShape[0])); + CHECK((newShape[1] == weightsShape[3])); + CHECK((newShape[2] == weightsShape[1])); + CHECK((newShape[3] == weightsShape[2])); +} + +void PermuteDepthwiseConv2dWeightsTest(Compute backendId) +{ + armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 3 }, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({ 1, 2 }, armnn::DataType::Float32); + armnn::TensorInfo weightsTensorInfo({ 2, 6 }, armnn::DataType::Float32); + + DepthwiseConvolution2dDescriptor descriptor; + descriptor.m_BiasEnabled = false; + + armnn::INetworkPtr network = CreateSimpleDepthwiseConv2dNetwork(inputTensorInfo, + outputTensorInfo, + weightsTensorInfo, + descriptor); + + PermuteDepthwiseConv2dWeightsTestRunner(network, + weightsTensorInfo.GetShape(), + backendId); +} +#endif +} + +#if defined(ARMCOMPUTECL_ENABLED) +TEST_SUITE("Optimizer_PermuteDepthwiseConv2dWeightsGpuAcc") +{ +TEST_CASE("PermuteDepthwiseConv2dWeightsGpuAccTest") +{ + PermuteDepthwiseConv2dWeightsTest(Compute::GpuAcc); +} +} +#endif + +#if defined(ARMCOMPUTENEON_ENABLED) +TEST_SUITE("Optimizer_PermuteDepthwiseConv2dWeightsCpuAcc") +{ +TEST_CASE("PermuteDepthwiseConv2dWeightsCpuAccTest") +{ + PermuteDepthwiseConv2dWeightsTest(Compute::CpuAcc); +} +} +#endif |