diff options
author | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-01-11 13:25:59 +0000 |
---|---|---|
committer | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-01-15 08:59:50 +0000 |
commit | 4912402497a51c6afe0898b3900f87feefa006a6 (patch) | |
tree | 4e9b5161781d2b0be041aec17227193da5977443 /src/armnn | |
parent | d0a1608e2c41639d8f3e3f9305d79c5f92c9cff8 (diff) | |
download | armnn-4912402497a51c6afe0898b3900f87feefa006a6.tar.gz |
IVGCVSW-2454 Merge together the pluggable backends work (was in a
separate branch) and master
* Brings in all the changes done for the pluggable backends
* Added sub-graph support and tests
* Added precompiled layer support and tests
* Moved BackendSettings to a separate file
* Removed the backend-specific code
* Ported DebugLayer and associated functionality
* Included fixes to make those changes work with master
Change-Id: Id7028fa7917527b844628d5aff5732e3d94c0488
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/BackendSettings.hpp | 87 | ||||
-rw-r--r-- | src/armnn/Graph.cpp | 59 | ||||
-rw-r--r-- | src/armnn/Graph.hpp | 6 | ||||
-rw-r--r-- | src/armnn/ISubGraphConverter.hpp | 22 | ||||
-rw-r--r-- | src/armnn/InternalTypes.cpp | 1 | ||||
-rw-r--r-- | src/armnn/InternalTypes.hpp | 3 | ||||
-rw-r--r-- | src/armnn/LayerSupportCommon.hpp | 1 | ||||
-rw-r--r-- | src/armnn/LayersFwd.hpp | 2 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 340 | ||||
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 55 | ||||
-rw-r--r-- | src/armnn/NetworkUtils.hpp | 9 | ||||
-rw-r--r-- | src/armnn/SubGraph.cpp | 30 | ||||
-rw-r--r-- | src/armnn/SubGraph.hpp | 13 | ||||
-rw-r--r-- | src/armnn/SubGraphSelector.cpp | 2 | ||||
-rw-r--r-- | src/armnn/TypeUtils.hpp | 20 | ||||
-rw-r--r-- | src/armnn/layers/MergerLayer.cpp | 2 | ||||
-rw-r--r-- | src/armnn/layers/PreCompiledLayer.cpp | 56 | ||||
-rw-r--r-- | src/armnn/layers/PreCompiledLayer.hpp | 42 | ||||
-rw-r--r-- | src/armnn/test/CreateWorkload.hpp | 128 | ||||
-rw-r--r-- | src/armnn/test/SubGraphTests.cpp | 407 |
20 files changed, 1171 insertions, 114 deletions
diff --git a/src/armnn/BackendSettings.hpp b/src/armnn/BackendSettings.hpp new file mode 100644 index 0000000000..931a0681db --- /dev/null +++ b/src/armnn/BackendSettings.hpp @@ -0,0 +1,87 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/BackendId.hpp> +#include <vector> + +namespace armnn +{ + +struct BackendSettings +{ + BackendIdVector m_PreferredBackends; + BackendIdSet m_SupportedBackends; + BackendIdSet m_SelectedBackends; + BackendIdSet m_IgnoredBackends; + + BackendSettings() = default; + + BackendSettings(const BackendIdVector& preferredBackends, + const IDeviceSpec& deviceSpec) + { + Initialize(preferredBackends, deviceSpec); + } + + bool IsBackendPreferred(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_PreferredBackends); + } + + bool IsBackendSupported(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_SupportedBackends); + } + + bool IsBackendSelected(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_SelectedBackends); + } + + bool IsBackendIgnored(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_IgnoredBackends); + } + + bool IsCpuRefUsed() const + { + BackendId cpuBackendId(Compute::CpuRef); + return IsBackendSupported(cpuBackendId) && IsBackendPreferred(cpuBackendId); + } + + BackendIdVector GetAvailablePreferredBackends() const + { + BackendIdVector availablePreferredBackends; + for (const BackendId& backend : m_PreferredBackends) + { + if (IsBackendSupported(backend) && !IsBackendIgnored(backend)) + { + availablePreferredBackends.push_back(backend); + } + } + return availablePreferredBackends; + } + +private: + void Initialize(const BackendIdVector& preferredBackends, + const IDeviceSpec& deviceSpec) + { + // Copy preferred backends from input + m_PreferredBackends = preferredBackends; + + // Obtain list of supported backends + const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec); + m_SupportedBackends = spec.GetSupportedBackends(); + } + + template<typename Collection> + bool IsBackendInCollection(const BackendId& backend, const Collection& collection) const + { + return std::find(collection.begin(), collection.end(), backend) != collection.end(); + } +}; + +} //namespace armnn diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index 83d82a5ffe..831d85e404 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -297,6 +297,65 @@ void Graph::AddCopyLayers() } } +void Graph::SubstituteSubGraph(std::unique_ptr<SubGraph> subGraph, IConnectableLayer* substituteLayer) +{ + BOOST_ASSERT(subGraph != nullptr); + BOOST_ASSERT(substituteLayer != nullptr); + + ReplaceSubGraphConnections(*subGraph, substituteLayer); + EraseSubGraphLayers(*subGraph); +} + +void Graph::ReplaceSubGraphConnections(const SubGraph& subGraph, IConnectableLayer* substituteLayer) +{ + BOOST_ASSERT(substituteLayer != nullptr); + BOOST_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), substituteLayer) != m_Layers.end(), + "Substitue layer is not a member of graph"); + + const SubGraph::InputSlots& subGraphInputSlots = subGraph.GetInputSlots(); + const SubGraph::OutputSlots& subGraphOutputSlots = subGraph.GetOutputSlots(); + + const unsigned int numInputSlots = boost::numeric_cast<unsigned int>(subGraphInputSlots.size()); + const unsigned int numOutputSlots = boost::numeric_cast<unsigned int>(subGraphOutputSlots.size()); + + BOOST_ASSERT(numInputSlots == substituteLayer->GetNumInputSlots()); + BOOST_ASSERT(numOutputSlots == substituteLayer->GetNumOutputSlots()); + + // Disconnect the sub-graph and replace it with the substitute layer + // Step 1: process input slots + for(unsigned int inputSlotIdx = 0u; inputSlotIdx < numInputSlots; ++inputSlotIdx) + { + InputSlot* subGraphInputSlot = subGraphInputSlots.at(inputSlotIdx); + BOOST_ASSERT(subGraphInputSlot != nullptr); + + IOutputSlot* connectedOutputSlot = subGraphInputSlot->GetConnection(); + BOOST_ASSERT(connectedOutputSlot != nullptr); + connectedOutputSlot->Disconnect(*subGraphInputSlot); + + IInputSlot& substituteInputSlot = substituteLayer->GetInputSlot(inputSlotIdx); + connectedOutputSlot->Connect(substituteInputSlot); + } + + // Step 2: process output slots + for(unsigned int outputSlotIdx = 0u; outputSlotIdx < numOutputSlots; ++outputSlotIdx) + { + OutputSlot* subGraphOutputSlot = subGraphOutputSlots.at(outputSlotIdx); + BOOST_ASSERT(subGraphOutputSlot != nullptr); + + OutputSlot* substituteOutputSlot = boost::polymorphic_downcast<OutputSlot*>( + &substituteLayer->GetOutputSlot(outputSlotIdx)); + subGraphOutputSlot->MoveAllConnections(*substituteOutputSlot); + } +} + +void Graph::EraseSubGraphLayers(const SubGraph &subGraph) +{ + for (auto layer : subGraph.GetLayers()) + { + EraseLayer(layer); + } +} + void Graph::InferTensorInfos() { for (auto&& layer : TopologicalSort()) diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp index 7ace2e0670..8f93f56b4a 100644 --- a/src/armnn/Graph.hpp +++ b/src/armnn/Graph.hpp @@ -6,6 +6,7 @@ #include "LayersFwd.hpp" #include "IGraphObservable.hpp" +#include "SubGraph.hpp" #include <armnn/Types.hpp> #include <armnn/TensorFwd.hpp> @@ -159,6 +160,8 @@ public: /// and relinking them via an intermediary copy layers. void AddCopyLayers(); + void SubstituteSubGraph(std::unique_ptr<SubGraph> subGraph, IConnectableLayer* substituteLayer); + void InferTensorInfos(); void AttachObservable(IGraphObservable* const observable, GraphEvent notifyOnEvent) { @@ -210,6 +213,9 @@ private: std::unordered_set<LayerBindingId> m_OutputIds; std::unordered_map<const Layer*, Iterator> m_PosInGraphMap; + void ReplaceSubGraphConnections(const SubGraph& subGraph, IConnectableLayer* substituteLayer); + void EraseSubGraphLayers(const SubGraph &subGraph); + /// Mutable to allow sorting on const object. mutable LayersList m_Layers; mutable bool m_LayersInOrder; diff --git a/src/armnn/ISubGraphConverter.hpp b/src/armnn/ISubGraphConverter.hpp new file mode 100644 index 0000000000..1d40c6737a --- /dev/null +++ b/src/armnn/ISubGraphConverter.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <memory> + +namespace armnn +{ + +class ISubGraphConverter +{ +public: + virtual ~ISubGraphConverter() {}; + + virtual std::shared_ptr<void> GetOutput() = 0; +}; + +} + diff --git a/src/armnn/InternalTypes.cpp b/src/armnn/InternalTypes.cpp index 9ffd73ac08..16a19722df 100644 --- a/src/armnn/InternalTypes.cpp +++ b/src/armnn/InternalTypes.cpp @@ -44,6 +44,7 @@ char const* GetLayerTypeAsCString(LayerType type) case LayerType::Pad: return "Pad"; case LayerType::Permute: return "Permute"; case LayerType::Pooling2d: return "Pooling2d"; + case LayerType::PreCompiled: return "PreCompiled"; case LayerType::Reshape: return "Reshape"; case LayerType::Rsqrt: return "Rsqrt"; case LayerType::ResizeBilinear: return "ResizeBilinear"; diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp index f4996db73e..f05ea25597 100644 --- a/src/armnn/InternalTypes.hpp +++ b/src/armnn/InternalTypes.hpp @@ -44,6 +44,7 @@ enum class LayerType Pad, Permute, Pooling2d, + PreCompiled, Reshape, ResizeBilinear, Rsqrt, @@ -53,7 +54,7 @@ enum class LayerType StridedSlice, // Last layer goes here. LastLayer, - Subtraction = LastLayer, + Subtraction = LastLayer }; const char* GetLayerTypeAsCString(LayerType type); diff --git a/src/armnn/LayerSupportCommon.hpp b/src/armnn/LayerSupportCommon.hpp index d6dda4f93d..c309f8c6c7 100644 --- a/src/armnn/LayerSupportCommon.hpp +++ b/src/armnn/LayerSupportCommon.hpp @@ -7,6 +7,7 @@ #include <armnn/DescriptorsFwd.hpp> #include <armnn/Types.hpp> #include <armnn/Tensor.hpp> +#include <armnn/Optional.hpp> namespace armnn { diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp index 9f55233e8e..8b4ee0804b 100644 --- a/src/armnn/LayersFwd.hpp +++ b/src/armnn/LayersFwd.hpp @@ -36,6 +36,7 @@ #include "layers/PadLayer.hpp" #include "layers/PermuteLayer.hpp" #include "layers/Pooling2dLayer.hpp" +#include "layers/PreCompiledLayer.hpp" #include "layers/ReshapeLayer.hpp" #include "layers/ResizeBilinearLayer.hpp" #include "layers/RsqrtLayer.hpp" @@ -102,6 +103,7 @@ DECLARE_LAYER(Output) DECLARE_LAYER(Pad) DECLARE_LAYER(Permute) DECLARE_LAYER(Pooling2d) +DECLARE_LAYER(PreCompiled) DECLARE_LAYER(Reshape) DECLARE_LAYER(ResizeBilinear) DECLARE_LAYER(Rsqrt) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 187d04eb2b..7b9cb3db7f 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -2,11 +2,14 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #include "Network.hpp" #include "Graph.hpp" #include "Layer.hpp" #include "DeviceSpec.hpp" #include "Optimizer.hpp" +#include "SubGraphSelector.hpp" +#include "BackendSettings.hpp" #include "optimizations/All.hpp" #include <backendsCommon/CpuTensorHandle.hpp> @@ -71,6 +74,41 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const return m_Graph->SerializeToDot(stream); } +struct OptimizationResult +{ + bool m_Warning; + bool m_Error; + + OptimizationResult() + : m_Warning(false) + , m_Error(false) + {} +}; + +void ReportError(const std::string& errorMessage, + Optional<std::vector<std::string>&> errorMessages) +{ + std::stringstream fullErrorMessage; + fullErrorMessage << "ERROR: " << errorMessage; + BOOST_LOG_TRIVIAL(warning) << fullErrorMessage.str(); + if (errorMessages) + { + errorMessages.value().push_back(fullErrorMessage.str()); + } +} + +void ReportWarning(const std::string& warningMessage, + Optional<std::vector<std::string>&> warningMessages) +{ + std::stringstream fullWarningMessage; + fullWarningMessage << "WARNING: " << warningMessage; + BOOST_LOG_TRIVIAL(warning) << fullWarningMessage.str(); + if (warningMessages) + { + warningMessages.value().push_back(fullWarningMessage.str()); + } +} + bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages) { bool noErrors = true; @@ -82,108 +120,50 @@ bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string if (0.f == info.GetQuantizationScale()) { noErrors = false; std::stringstream ss; - ss << "ERROR: output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType()) + ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType()) << " (" << layer->GetNameStr() << ") is of type" << " Quantized 8 bit but its scale parameter has not been set"; - BOOST_LOG_TRIVIAL(warning) << ss.str() ; - if (errMessages) { - errMessages.value().push_back(ss.str()); - } + ReportError(ss.str(), errMessages); } } } return noErrors; } -IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, - const std::vector<BackendId>& backendPreferences, - const IDeviceSpec& deviceSpec, - const OptimizerOptions& options, - Optional<std::vector<std::string>&> errMessages) +OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, + BackendSettings& backendSettings, + Graph::Iterator& firstLayer, + Graph::Iterator& lastLayer, + Optional<std::vector<std::string>&> errMessages) { - if (backendPreferences.empty()) { - throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); - } - const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); - std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); - - auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); + OptimizationResult result; - OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get()); - - // Perform optimisation passes - using namespace optimizations; - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(), - SquashEqualReshapeSiblings(), - OptimizeInversePermutes(), - MovePermuteUp(), - PermuteAsReshape(), - OptimizeConsecutiveReshapes())); - - // Infer the tensor infos for all output slots. Throws an exception on failure. - optNetObjPtr->GetGraph().InferTensorInfos(); - - // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 - if (options.m_ReduceFp32ToFp16) + // Helper lambda to compose meaningful error message before returning with error + auto ReturnWithError = [&](const Layer* layer) { - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter())); - } - - // if debug optimization is set, then print out data after each layer - if (options.m_Debug) - { - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(InsertDebugLayer())); - } - - // We know that DeviceSpec should be the only implementation of IDeviceSpec. - const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec); - auto const& supportedBackends = spec.GetSupportedBackends(); - - // determine which of the preferred backends we have available for use - // and whether we have specified CpuRef as one of those backends. - bool cpuRefUsed = false; - std::vector<BackendId> availablePreferredBackends; - for (const auto& backend : backendPreferences) - { - // Check if the backend is in the available backend devices. - if (supportedBackends.count(backend) > 0) - { - availablePreferredBackends.push_back(backend); - if (backend == armnn::Compute::CpuRef) { - cpuRefUsed = true; - } - } - } - if (availablePreferredBackends.empty()) { std::stringstream failureMsg; - failureMsg << "ERROR: None of the preferred backends " << backendPreferences - << " are supported. Current platform provides " << supportedBackends; - BOOST_LOG_TRIVIAL(warning) << failureMsg.str(); - if (errMessages) { - errMessages.value().push_back(failureMsg.str()); - } - return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); - } + failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) + << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends; + ReportError(failureMsg.str(), errMessages); + + result.m_Error = true; + return result; + }; - auto ReturnWithError = [&](Layer* layer) + auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends(); + if (availablePreferredBackends.empty()) { std::stringstream failureMsg; - failureMsg << "ERROR: Layer of type " << GetLayerTypeAsCString(layer->GetType()) - << " is not supported on any preferred backend " << backendPreferences; - BOOST_LOG_TRIVIAL(warning) << failureMsg.str(); - if (errMessages) { - errMessages.value().push_back(failureMsg.str()); - } - return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); - }; + failureMsg << "No preferred backends are available"; + ReportError(failureMsg.str(), errMessages); - // The backends that we choose to run layers on - std::unordered_set<BackendId> chosenBackends; + result.m_Error = true; + return result; + } - // Assign a compute device for all nodes - bool bErrorFound = false; - for (auto&& layer : optNetObjPtr->GetGraph()) + for (auto it = firstLayer; it != lastLayer; ++it) { + auto layer = *it; DataType dataType = layer->GetDataType(); std::string reasonIfUnsupported; bool found = false; @@ -191,8 +171,9 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, { // don't bomb immediately, find all the quantized outputs // which haven't had a scale set and report them all back. - bErrorFound = true; + result.m_Error = true; } + for (const auto& backend : availablePreferredBackends) { // need to set the compute device on the layer @@ -273,38 +254,36 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } } std::stringstream warningMsg; - warningMsg << "WARNING: Layer of type " << GetLayerTypeAsCString(layer->GetType()) + warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << " is not supported on requested backend " << layer->GetBackendId().Get() << " for data type " << GetDataTypeName(dataType) << " (reason: " << reasonIfUnsupported << "), falling back to the next backend."; - std::string wMsg = warningMsg.str(); - BOOST_LOG_TRIVIAL(warning) << wMsg; - if (errMessages) { - errMessages.value().push_back(wMsg); - } + ReportWarning(warningMsg.str(), errMessages); } else { found = true; - chosenBackends.insert(backend); + backendSettings.m_SelectedBackends.insert(backend); break; } } // If the layer is unsupported by any devices, log and return a null network. - if (!found) { + if (!found) + { // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a // fallback we should set the compute device on the layer to CpuRef (these are not // available as accelerated operations, or are only available under certain // conditions, currently they comprise MemCopy, Constant, Permute) armnn::LayerType layerType = layer->GetType(); - if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy || - layerType == armnn::LayerType::Constant || - layerType == armnn::LayerType::Permute)) + if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy || + layerType == armnn::LayerType::Constant || + layerType == armnn::LayerType::Permute)) { - layer->SetBackendId(armnn::Compute::CpuRef); - chosenBackends.insert(armnn::Compute::CpuRef); + BackendId cpuBackendId(armnn::Compute::CpuRef); + layer->SetBackendId(cpuBackendId); + backendSettings.m_SelectedBackends.insert(cpuBackendId); } else { @@ -312,14 +291,175 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } } } - if (bErrorFound) + + return result; +} + +OptimizationResult InsertPreCompiledLayers(OptimizedNetwork* optNetObjPtr, + const IBackendInternalUniquePtr& backendObjPtr, + BackendSettings& backendSettings, + Optional<std::vector<std::string>&> errMessages) +{ + BOOST_ASSERT(backendObjPtr); + + OptimizationResult result; + + // Select sub-graphs based on backend + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs(optNetObjPtr->GetGraph(), + // select layers assigned to requested backend + [&](const Layer& layer) + { + return layer.GetType() != LayerType::Input && + layer.GetType() != LayerType::Output && + layer.GetBackendId() == backendObjPtr->GetId(); + }); + + if (subGraphs.empty()) + { + // No sub-graphs found -> return with no error + return result; + } + + // Convert sub-graphs and substitute them with pre-compiled layers + unsigned int index = 0u; + for (auto& subGraph : subGraphs) + { + // Create a pre-compiled layer + PreCompiledLayer* preCompiledLayer = CreatePreCompiledLayer(optNetObjPtr->GetGraph(), + *subGraph, + index++, + backendObjPtr); + if (preCompiledLayer) + { + // Substitute sub-graph with pre-compiled layer in graph + optNetObjPtr->GetGraph().SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + } + else + { + // Failed to create pre-compiled layer from sub-graph -> + // re-assign sub-graph layers to other available backends + std::stringstream warningMsg; + warningMsg << "Sub-graph #" << index << " failed to compile on " + << backendObjPtr->GetId() << ". Re-assigning backends to " + << subGraph->GetLayers().size() << " layers inside sub-graph"; + ReportWarning(warningMsg.str(), errMessages); + + backendSettings.m_IgnoredBackends = { backendObjPtr->GetId() }; + + Graph::Iterator firstLayer = subGraph->begin(); + Graph::Iterator lastLayer = subGraph->end(); + OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr, + backendSettings, + firstLayer, + lastLayer, + errMessages); + + if (reassignmentResult.m_Error) + { + result.m_Error = true; + return result; + } + } + } + + return result; +} + +IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, + const std::vector<BackendId>& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options, + Optional<std::vector<std::string>&> errMessages) +{ + if (backendPreferences.empty()) + { + throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); + } + + const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); + std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); + + auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); + + OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get()); + + // Perform optimisation passes + using namespace optimizations; + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualReshapeSiblings(), + OptimizeInversePermutes(), + MovePermuteUp(), + PermuteAsReshape(), + OptimizeConsecutiveReshapes())); + + // Infer the tensor infos for all output slots. Throws an exception on failure. + optNetObjPtr->GetGraph().InferTensorInfos(); + + // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 + if (options.m_ReduceFp32ToFp16) + { + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter())); + } + + // Initialize backend settings + BackendSettings backendSettings(backendPreferences, deviceSpec); + if (backendSettings.GetAvailablePreferredBackends().empty()) { + std::stringstream failureMsg; + failureMsg << "None of the preferred backends " << backendPreferences + << " are supported. Current platform provides " << backendSettings.m_SupportedBackends; + ReportError(failureMsg.str(), errMessages); + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + } + + // Assign an available backend to each layer + Graph::Iterator firstLayer = optNetObjPtr->GetGraph().begin(); + Graph::Iterator lastLayer = optNetObjPtr->GetGraph().end(); + OptimizationResult assigBackendsResult = AssignBackends(optNetObjPtr, + backendSettings, + firstLayer, + lastLayer, + errMessages); + if (assigBackendsResult.m_Error) + { + // Failed to assign a backend to each layer return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); } Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(), OptimizeInverseConversionsFp32())); + // Insert pre-compiled layers where required by the backend + // TODO: This is a dummy/default backend id used for making the code build until + // we've properly refactored the optimizer + const BackendId backendId(Compute::Undefined); + auto const& backendRegistry = BackendRegistryInstance(); + if (backendRegistry.IsBackendRegistered(backendId)) + { + // Obtain a backend object using the registered factory + auto backendFactory = backendRegistry.GetFactory(backendId); + auto backendObjPtr = backendFactory(); + + OptimizationResult insertPreCompiledLayersResult = InsertPreCompiledLayers(optNetObjPtr, + backendObjPtr, + backendSettings, + errMessages); + if (insertPreCompiledLayersResult.m_Error) + { + // Failed to insert pre-compiled layers + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + } + } + + // If the debug flag is set, then insert a DebugLayer after each layer. + // NOTE: This optimization can only happen strictly after the PreCompiled layers have + // already been inserted + if (options.m_Debug) + { + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(InsertDebugLayer())); + } + optNetObjPtr->GetGraph().AddCopyLayers(); // Convert constants @@ -327,7 +467,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat())); // Run backend specific optimizations - for (auto&& chosenBackend : chosenBackends) + for (auto&& chosenBackend : backendSettings.m_SelectedBackends) { auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend); auto backendPtr = factoryFun(); diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 9a4ce87b59..735a6244d5 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -5,6 +5,12 @@ #include "NetworkUtils.hpp" +#include "SubGraphSelector.hpp" + +#include <armnn/Exceptions.hpp> + +#include <backendsCommon/BackendRegistry.hpp> + namespace armnn { @@ -74,7 +80,6 @@ std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& g return convertLayers; } - std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer) { std::vector<DebugLayer*> debugLayers; @@ -97,10 +102,58 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer) debugLayer->GetOutputSlot().SetTensorInfo(debugInfo); + // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef + debugLayer->SetBackendId(Compute::CpuRef); + debugLayers.emplace_back(debugLayer); } return debugLayers; } +PreCompiledLayer* CreatePreCompiledLayer(Graph& graph, + const SubGraph& subGraph, + unsigned int subGraphIndex, + const IBackendInternalUniquePtr& backendObjPtr) +{ + BOOST_ASSERT(backendObjPtr); + + IBackendInternal::ISubGraphConverterPtr converter = + backendObjPtr->CreateSubGraphConverter(std::make_shared<SubGraph>(subGraph)); + if (!converter) + { + return nullptr; + } + + try + { + // Attempt to convert and compile sub-graph + auto preCompiledObject = converter->GetOutput(); + } + catch (std::exception&) + { + return nullptr; + } + + // Create pre-compiled layer + std::string name = "pre-compiled" + std::to_string(subGraphIndex); + PreCompiledLayer* preCompiledLayer = graph.AddLayer<PreCompiledLayer>( + PreCompiledDescriptor(subGraph.GetNumInputSlots(), subGraph.GetNumOutputSlots()), name.c_str()); + + // Copy output tensor infos from sub-graph + for (unsigned int i = 0u; i < subGraph.GetNumOutputSlots(); i++) + { + preCompiledLayer->GetOutputSlot(i).SetTensorInfo(subGraph.GetOutputSlot(i)->GetTensorInfo()); + } + + // Assign pre-compiled object to layer + preCompiledLayer->SetPreCompiledObject(converter->GetOutput()); + + // Set the backend-id for the pre-compiled layer + BackendId backendId = backendObjPtr->GetId(); + preCompiledLayer->SetBackendId(backendId); + + return preCompiledLayer; +} + } // namespace armnn diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp index b81d5cb5e7..1a520b7195 100644 --- a/src/armnn/NetworkUtils.hpp +++ b/src/armnn/NetworkUtils.hpp @@ -5,7 +5,11 @@ #pragma once +#include "DeviceSpec.hpp" #include "Graph.hpp" +#include "SubGraph.hpp" + +#include <backendsCommon/IBackendInternal.hpp> namespace armnn { @@ -16,4 +20,9 @@ std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& g std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer); +PreCompiledLayer* CreatePreCompiledLayer(Graph& graph, + const SubGraph& subGraph, + unsigned int subGraphIndex, + const IBackendInternalUniquePtr& backendObject); + } // namespace armnn diff --git a/src/armnn/SubGraph.cpp b/src/armnn/SubGraph.cpp index 5d41f32932..74a1838ef0 100644 --- a/src/armnn/SubGraph.cpp +++ b/src/armnn/SubGraph.cpp @@ -69,4 +69,34 @@ const SubGraph::Layers & SubGraph::GetLayers() const return m_Layers; } +SubGraph::Layers::iterator SubGraph::begin() +{ + return m_Layers.begin(); +} + +SubGraph::Layers::iterator SubGraph::end() +{ + return m_Layers.end(); +} + +SubGraph::Layers::const_iterator SubGraph::begin() const +{ + return m_Layers.begin(); +} + +SubGraph::Layers::const_iterator SubGraph::end() const +{ + return m_Layers.end(); +} + +SubGraph::Layers::const_iterator SubGraph::cbegin() const +{ + return begin(); +} + +SubGraph::Layers::const_iterator SubGraph::cend() const +{ + return end(); +} + } // namespace armnn diff --git a/src/armnn/SubGraph.hpp b/src/armnn/SubGraph.hpp index 312bb115eb..d22377daff 100644 --- a/src/armnn/SubGraph.hpp +++ b/src/armnn/SubGraph.hpp @@ -8,7 +8,7 @@ #include "Layer.hpp" #include <vector> -#include <unordered_set> +#include <list> namespace armnn { @@ -24,7 +24,7 @@ class SubGraph final public: using InputSlots = std::vector<InputSlot *>; using OutputSlots = std::vector<OutputSlot *>; - using Layers = std::unordered_set<Layer *>; + using Layers = std::list<Layer*>; SubGraph(); SubGraph(InputSlots && inputs, @@ -44,6 +44,15 @@ public: unsigned int GetNumInputSlots() const; unsigned int GetNumOutputSlots() const; + Layers::iterator begin(); + Layers::iterator end(); + + Layers::const_iterator begin() const; + Layers::const_iterator end() const; + + Layers::const_iterator cbegin() const; + Layers::const_iterator cend() const; + private: InputSlots m_InputSlots; OutputSlots m_OutputSlots; diff --git a/src/armnn/SubGraphSelector.cpp b/src/armnn/SubGraphSelector.cpp index b87e2b73b1..d0542fd41f 100644 --- a/src/armnn/SubGraphSelector.cpp +++ b/src/armnn/SubGraphSelector.cpp @@ -166,7 +166,7 @@ SubGraphSelector::SelectSubGraphs(Graph& graph, { infoPtr->CollectNonSelectedOutputSlots(outputs, selector); infoPtr->CollectNonSelectedInputs(inputs, selector); - layers.insert(infoPtr->m_Layer); + layers.push_back(infoPtr->m_Layer); } result.emplace_back( std::make_unique<SubGraph>( diff --git a/src/armnn/TypeUtils.hpp b/src/armnn/TypeUtils.hpp index 01a0e6479a..5bb040f780 100644 --- a/src/armnn/TypeUtils.hpp +++ b/src/armnn/TypeUtils.hpp @@ -11,16 +11,9 @@ namespace armnn { - template<DataType DT> struct ResolveTypeImpl; -template<> -struct ResolveTypeImpl<DataType::QuantisedAsymm8> -{ - using Type = uint8_t; -}; - template <> struct ResolveTypeImpl<DataType::Float16> { @@ -34,6 +27,18 @@ struct ResolveTypeImpl<DataType::Float32> }; template<> +struct ResolveTypeImpl<DataType::QuantisedAsymm8> +{ + using Type = uint8_t; +}; + +template<> +struct ResolveTypeImpl<DataType::Signed32> +{ + using Type = int32_t; +}; + +template<> struct ResolveTypeImpl<DataType::Boolean> { using Type = bool; @@ -42,5 +47,4 @@ struct ResolveTypeImpl<DataType::Boolean> template<DataType DT> using ResolveType = typename ResolveTypeImpl<DT>::Type; - } //namespace armnn diff --git a/src/armnn/layers/MergerLayer.cpp b/src/armnn/layers/MergerLayer.cpp index 85dc0e7609..b4b5d3c2ef 100644 --- a/src/armnn/layers/MergerLayer.cpp +++ b/src/armnn/layers/MergerLayer.cpp @@ -180,7 +180,7 @@ void MergerLayer::ValidateTensorShapesFromInputs() VerifyLayerConnections(m_Param.GetNumViews(), CHECK_LOCATION()); std::vector<TensorShape> inputShapes; - for (uint i = 0; i < GetNumInputSlots(); ++i) + for (unsigned int i = 0; i < GetNumInputSlots(); ++i) { inputShapes.push_back(GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape()); } diff --git a/src/armnn/layers/PreCompiledLayer.cpp b/src/armnn/layers/PreCompiledLayer.cpp new file mode 100644 index 0000000000..c443f9ae79 --- /dev/null +++ b/src/armnn/layers/PreCompiledLayer.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PreCompiledLayer.hpp" + +#include "LayerCloneBase.hpp" + +#include "backendsCommon/Workload.hpp" + +#include <armnn/TypesUtils.hpp> + +namespace armnn +{ + +PreCompiledLayer::PreCompiledLayer(const PreCompiledDescriptor& param, const char* name) + : LayerWithParameters(param.m_NumInputSlots, param.m_NumOutputSlots, LayerType::PreCompiled, param, name) + , m_PreCompiledObject(nullptr) +{} + +PreCompiledLayer::~PreCompiledLayer() +{} + +PreCompiledLayer* PreCompiledLayer::Clone(Graph& graph) const +{ + PreCompiledLayer* clone = CloneBase<PreCompiledLayer>(graph, m_Param, GetName()); + clone->m_PreCompiledObject = this->m_PreCompiledObject; + return clone; +} + +std::unique_ptr<IWorkload> PreCompiledLayer::CreateWorkload(const armnn::Graph& graph, + const armnn::IWorkloadFactory& factory) const +{ + PreCompiledQueueDescriptor descriptor; + descriptor.m_PreCompiledObject = m_PreCompiledObject; + return factory.CreatePreCompiled(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void PreCompiledLayer::ValidateTensorShapesFromInputs() +{ + // NOTE: since the PreCompiledLayer is an internal layer created from a valid SubGraph, + // we do not need to validate its input shapes +} + +std::shared_ptr<void> PreCompiledLayer::GetPreCompiledObject() const +{ + return m_PreCompiledObject; +} + +void PreCompiledLayer::SetPreCompiledObject(const std::shared_ptr<void>& preCompiledObject) +{ + m_PreCompiledObject = preCompiledObject; +} + +} // namespace armnn diff --git a/src/armnn/layers/PreCompiledLayer.hpp b/src/armnn/layers/PreCompiledLayer.hpp new file mode 100644 index 0000000000..fd28d0e7a8 --- /dev/null +++ b/src/armnn/layers/PreCompiledLayer.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "LayerWithParameters.hpp" +#include <backendsCommon/WorkloadFactory.hpp> + +#include <armnn/Descriptors.hpp> + +#include <memory> + +namespace armnn +{ + +class PreCompiledLayer : public LayerWithParameters<PreCompiledDescriptor> +{ +public: + PreCompiledLayer(const PreCompiledDescriptor& param, const char* name); + ~PreCompiledLayer(); + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + PreCompiledLayer* Clone(Graph &graph) const override; + + void ValidateTensorShapesFromInputs() override; + + std::shared_ptr<void> GetPreCompiledObject() const; + + void SetPreCompiledObject(const std::shared_ptr<void>& preCompiledObject); + +private: + PreCompiledLayer(const PreCompiledLayer& other) = delete; + PreCompiledLayer& operator=(const PreCompiledLayer& other) = delete; + + std::shared_ptr<void> m_PreCompiledObject; +}; + +} // namespace armnn diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index f52f6055ca..acc5cbdb1a 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -14,6 +14,8 @@ #include <Graph.hpp> #include <DataLayoutIndexed.hpp> +#include <Network.hpp> +#include <TypeUtils.hpp> #include <utility> @@ -1093,4 +1095,130 @@ std::unique_ptr<MergerWorkload> CreateMergerWorkloadTest(armnn::IWorkloadFactory return std::move(workloadMerger); } +template <typename PreCompiledWorkload, armnn::DataType dataType> +std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest( + armnn::IWorkloadFactory& factory, + armnn::Graph& graph, + bool biasEnabled = false) +{ + // To create a PreCompiled layer, create a network and Optimize it. + armnn::Network net; + + // Add an input layer + armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW + // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC + // this test is using NHWC, so the weights shape is OHWI + TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0); + unsigned int weightsLength = weightsTensorInfo.GetNumElements(); + + using WeightType = armnn::ResolveType<dataType>; + std::vector<WeightType> convWeightsData(weightsLength); + for (unsigned int i = 0; i < weightsLength; ++i) + { + convWeightsData[i] = static_cast<WeightType>(i); + } + + armnn::ConstTensor weights(weightsTensorInfo, convWeightsData); + + // Add a layer that can be used in the PreCompiled layer + armnn::Convolution2dDescriptor convDesc2d; + convDesc2d.m_StrideX = 1; + convDesc2d.m_StrideY = 1; + convDesc2d.m_BiasEnabled = biasEnabled; + convDesc2d.m_DataLayout = armnn::DataLayout::NHWC; + + armnn::IConnectableLayer* convLayer = nullptr; + const std::string convLayerName("conv layer"); + + if (biasEnabled) + { + constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QuantisedAsymm8) ? + armnn::DataType::Signed32 : armnn::DataType::Float32; + + TensorInfo biasTensorInfo(TensorShape({1, 1, 1, 16}), biasDataType, 0.9f * 0.9f, 0); + unsigned int biasLength = biasTensorInfo.GetNumElements(); + + using BiasType = armnn::ResolveType<biasDataType>; + std::vector<BiasType> biasData(biasLength); + std::fill(biasData.begin(), biasData.end(), static_cast<BiasType>(0)); + + armnn::ConstTensor biases(biasTensorInfo, biasData); + + // Create convolution layer with biases + convLayer = net.AddConvolution2dLayer(convDesc2d, weights, biases, convLayerName.c_str()); + } + else + { + // Create convolution layer without biases + convLayer = net.AddConvolution2dLayer(convDesc2d, weights, convLayerName.c_str()); + } + + BOOST_TEST(convLayer); + + // Add an output layer + armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + // set the tensors in the network (NHWC format) + TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType); + if (dataType == armnn::DataType::QuantisedAsymm8) + { + inputTensorInfo.SetQuantizationOffset(0); + inputTensorInfo.SetQuantizationScale(0.9f); + } + + TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType); + if (dataType == armnn::DataType::QuantisedAsymm8) + { + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(0.9f); + } + + // Connect the layers + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimize the network for the backend supported by the factory + std::vector<armnn::BackendId> backends = {factory.GetBackendId()}; + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + armnn::OptimizerOptions optimizerOptions; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), + optimizerOptions); + BOOST_CHECK(optimizedNet != nullptr); + + // Find the PreCompiled layer in the optimised graph + armnn::Graph& optimisedGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph(); + Layer* preCompiledLayer = nullptr; + for (auto& layer : optimisedGraph) + { + if (layer->GetType() == LayerType::PreCompiled) + { + preCompiledLayer = layer; + } + } + BOOST_TEST(preCompiledLayer); + + // Create the TensorHandles. + CreateTensorHandles(optimisedGraph, factory); + + // Make the workload and check it. + auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, optimisedGraph, factory); + + PreCompiledQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // Returns the workload so we can do extra, backend-specific tests. + // NOTE: We need to return the optimised network as well, otherwise it gets + // out of scope and the tensor handles get destructed + return std::make_pair(std::move(optimizedNet), std::move(workload)); +} + } diff --git a/src/armnn/test/SubGraphTests.cpp b/src/armnn/test/SubGraphTests.cpp index e516ac0fa6..9e49197ea6 100644 --- a/src/armnn/test/SubGraphTests.cpp +++ b/src/armnn/test/SubGraphTests.cpp @@ -17,6 +17,20 @@ using namespace armnn; namespace { +bool AreAnySubGraphLayersPresentInGraph(const SubGraph::Layers &subGraphLayers, const Graph &graph) +{ + for(auto&& layer : subGraphLayers) + { + auto posInGraph = std::find(graph.begin(), graph.end(), layer); + if(posInGraph != graph.end()) + { + return true; + } + } + + return false; +} + // // this helper only works if all layers where the inputs connect to are not selected // @@ -112,6 +126,235 @@ void CompareSubGraphs(SubGraphSelector::SubGraphPtr & result, } // namespace <anonymous> +BOOST_AUTO_TEST_SUITE(SubGraphSubstitution) + +BOOST_AUTO_TEST_CASE(SingleInputSingleOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({convLayer1}), CreateOutputsFrom({convLayer2}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn = subGraph->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraphOutputConn = subGraph->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(1, 1); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn); +} + +BOOST_AUTO_TEST_CASE(MultiInputSingleOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer<SplitterLayer>(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer<MergerLayer>(mergerDescriptor, "merger"); + + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({convLayer1, convLayer2}), CreateOutputsFrom({mergerLayer}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraphInputConn2 = subGraph->GetInputSlot(1)->GetConnection(); + + IInputSlot* subGraphOutputConn = subGraph->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(2, 1); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(1).GetConnection(), subGraphInputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn); +} + +BOOST_AUTO_TEST_CASE(SingleInputMultiOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer<MergerLayer>(mergerDescriptor, "merger"); + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer<SplitterLayer>(splitterDescriptor, "splitter"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({splitterLayer}), CreateOutputsFrom({convLayer1, convLayer2}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + + IInputSlot* subGraphOutputConn1 = subGraph->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraphOutputConn2 = subGraph->GetOutputSlot(1)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(1, 2); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(1).GetConnection(0), subGraphOutputConn2); +} + +BOOST_AUTO_TEST_CASE(MultiInputMultiOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer<SplitterLayer>(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer<MergerLayer>(mergerDescriptor, "merger"); + + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = CreateSubGraphFrom(CreateInputsFrom({convLayer1, convLayer2}), + CreateOutputsFrom({convLayer1, convLayer2}), + {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraphInputConn2 = subGraph->GetInputSlot(1)->GetConnection(); + + IInputSlot* subGraphOutputConn1 = subGraph->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraphOutputConn2 = subGraph->GetOutputSlot(1)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(2, 2); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(1).GetConnection(), subGraphInputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(1).GetConnection(0), subGraphOutputConn2); +} + +BOOST_AUTO_TEST_CASE(EraseReplacedLayers) +{ + // Construct graph + Graph graph; + + graph.AddLayer<InputLayer>(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer<SplitterLayer>(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer<MergerLayer>(mergerDescriptor, "merger"); + + graph.AddLayer<OutputLayer>(0, "output"); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom({}, {}, {splitterLayer, convLayer1, convLayer2, mergerLayer}); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(0, 0); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Save sub-graph layers for later verification + const SubGraph::Layers subGraphLayers = subGraph->GetLayers(); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that the layers belonging to the sub-graph have been erased from the graph after substitution + BOOST_CHECK(!AreAnySubGraphLayersPresentInGraph(subGraphLayers, graph)); +} + +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE(SubGraphSelection) BOOST_AUTO_TEST_CASE(NoSubGraphsForNoMatch) @@ -585,3 +828,167 @@ BOOST_AUTO_TEST_CASE(MultiInputMultiOutput) } BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(IntegrationTests) + +BOOST_AUTO_TEST_CASE(SingleSubGraph) +{ + // This test case represents the scenario when we have one subgraph + // in which two layers have GpuAcc backend assigned + + //Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + convLayer1->SetBackendId(Compute::GpuAcc); + + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + convLayer2->SetBackendId(Compute::GpuAcc); + + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // GpuAcc sub graph selector + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs( + graph, + // select the GpuAcc layers only + [](const Layer & l){ + bool toSelect = (l.GetBackendId() == Compute::GpuAcc); + return toSelect; + }); + + BOOST_TEST(subGraphs.size() == 1); + if(subGraphs.size() == 1) + { + BOOST_TEST((subGraphs[0] != nullptr)); + + if (subGraphs[0].get() != nullptr) + { + unsigned int numInputSlots = boost::numeric_cast<unsigned int>(subGraphs[0]->GetInputSlots().size()); + unsigned int numOutputSlots = boost::numeric_cast<unsigned int>(subGraphs[0]->GetOutputSlots().size()); + + BOOST_TEST((numInputSlots == 1)); + BOOST_TEST((numOutputSlots == 1)); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraphs[0]->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraphOutputConn1 = subGraphs[0]->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(numInputSlots, numOutputSlots); + Layer* const preCompiledLayer = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph((std::move(subGraphs[0])), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + } + } +} + +BOOST_AUTO_TEST_CASE(MultipleSubGraphs) +{ + // This test case represents the scenario when we have two subgraphs + // in which two layers have CpuAcc backend assigned + + //Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer<InputLayer>(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer<SplitterLayer>(splitterDescriptor, "splitter"); + splitterLayer->SetBackendId(Compute::CpuAcc); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer<Convolution2dLayer>(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer<MergerLayer>(mergerDescriptor, "merger"); + mergerLayer->SetBackendId(Compute::CpuAcc); + + Layer* const outputLayer = graph.AddLayer<OutputLayer>(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // CpuAcc sub graph selector + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs( + graph, + // select the CpuAcc layers only + [](const Layer & l){ + bool toSelect = (l.GetBackendId() == Compute::CpuAcc); + return toSelect; + }); + + BOOST_TEST(subGraphs.size() == 2); + if(subGraphs.size() == 2) + { + BOOST_TEST((subGraphs[0] != nullptr)); + BOOST_TEST((subGraphs[1] != nullptr)); + + if (subGraphs[0].get() != nullptr && subGraphs[1].get() != nullptr) + { + //Sort subGraphs by their inputSlot size. + std::sort(subGraphs.begin(), subGraphs.end(), + [](SubGraphSelector::SubGraphPtr & lhs, SubGraphSelector::SubGraphPtr & rhs) + { + return (lhs->GetInputSlots().size() < rhs->GetInputSlots().size()); + } + ); + + unsigned int numInputSlots1 = boost::numeric_cast<unsigned int>(subGraphs[0]->GetInputSlots().size()); + unsigned int numOutputSlots1 = boost::numeric_cast<unsigned int>(subGraphs[0]->GetOutputSlots().size()); + + unsigned int numInputSlots2 = boost::numeric_cast<unsigned int>(subGraphs[1]->GetInputSlots().size()); + unsigned int numOutputSlots2 = boost::numeric_cast<unsigned int>(subGraphs[1]->GetOutputSlots().size()); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraph1InputConn = subGraphs[0]->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraph1OutputConn1 = subGraphs[0]->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraph1OutputConn2 = subGraphs[0]->GetOutputSlot(1)->GetConnection(0); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraph2InputConn1 = subGraphs[1]->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraph2InputConn2 = subGraphs[1]->GetInputSlot(1)->GetConnection(); + IInputSlot* subGraph2OutputConn = subGraphs[1]->GetOutputSlot(0)->GetConnection(0); + + PreCompiledDescriptor preCompiledDescriptor1(numInputSlots1, numOutputSlots1); + Layer* const preCompiledLayer1 = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor1, "pre-compiled1"); + + PreCompiledDescriptor preCompiledDescriptor2(numInputSlots2, numOutputSlots2); + Layer* const preCompiledLayer2 = graph.AddLayer<PreCompiledLayer>(preCompiledDescriptor2, "pre-compiled2"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph((std::move(subGraphs[0])), preCompiledLayer1); + graph.SubstituteSubGraph((std::move(subGraphs[1])), preCompiledLayer2); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer1->GetInputSlot(0).GetConnection(), subGraph1InputConn); + BOOST_CHECK_EQUAL(preCompiledLayer1->GetOutputSlot(0).GetConnection(0), subGraph1OutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer1->GetOutputSlot(1).GetConnection(0), subGraph1OutputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer2->GetInputSlot(0).GetConnection(), subGraph2InputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer2->GetInputSlot(1).GetConnection(), subGraph2InputConn2); + BOOST_CHECK_EQUAL(preCompiledLayer2->GetOutputSlot(0).GetConnection(0), subGraph2OutputConn); + } + } +} + +BOOST_AUTO_TEST_SUITE_END() |