From 4912402497a51c6afe0898b3900f87feefa006a6 Mon Sep 17 00:00:00 2001 From: Matteo Martincigh Date: Fri, 11 Jan 2019 13:25:59 +0000 Subject: IVGCVSW-2454 Merge together the pluggable backends work (was in a separate branch) and master * Brings in all the changes done for the pluggable backends * Added sub-graph support and tests * Added precompiled layer support and tests * Moved BackendSettings to a separate file * Removed the backend-specific code * Ported DebugLayer and associated functionality * Included fixes to make those changes work with master Change-Id: Id7028fa7917527b844628d5aff5732e3d94c0488 --- Android.mk | 3 +- CMakeLists.txt | 4 + include/armnn/BackendId.hpp | 3 +- include/armnn/Descriptors.hpp | 23 +- include/armnn/DescriptorsFwd.hpp | 16 +- include/armnn/ILayerSupport.hpp | 4 + include/armnn/INetwork.hpp | 9 +- include/armnn/LayerSupport.hpp | 6 + include/armnn/TypesUtils.hpp | 10 + src/armnn/BackendSettings.hpp | 87 ++++ src/armnn/Graph.cpp | 59 +++ src/armnn/Graph.hpp | 6 + src/armnn/ISubGraphConverter.hpp | 22 + src/armnn/InternalTypes.cpp | 1 + src/armnn/InternalTypes.hpp | 3 +- src/armnn/LayerSupportCommon.hpp | 1 + src/armnn/LayersFwd.hpp | 2 + src/armnn/Network.cpp | 340 +++++++++----- src/armnn/NetworkUtils.cpp | 55 ++- src/armnn/NetworkUtils.hpp | 9 + src/armnn/SubGraph.cpp | 30 ++ src/armnn/SubGraph.hpp | 13 +- src/armnn/SubGraphSelector.cpp | 2 +- src/armnn/TypeUtils.hpp | 20 +- src/armnn/layers/MergerLayer.cpp | 2 +- src/armnn/layers/PreCompiledLayer.cpp | 56 +++ src/armnn/layers/PreCompiledLayer.hpp | 42 ++ src/armnn/test/CreateWorkload.hpp | 128 ++++++ src/armnn/test/SubGraphTests.cpp | 407 +++++++++++++++++ src/backends/aclCommon/test/MemCopyTestImpl.hpp | 43 +- src/backends/aclCommon/test/MemCopyTests.cpp | 12 +- src/backends/backends.cmake | 4 +- src/backends/backendsCommon/IBackendInternal.hpp | 8 + src/backends/backendsCommon/LayerSupportBase.cpp | 7 + src/backends/backendsCommon/LayerSupportBase.hpp | 4 + src/backends/backendsCommon/WorkloadData.cpp | 5 + src/backends/backendsCommon/WorkloadData.hpp | 12 + src/backends/backendsCommon/WorkloadDataFwd.hpp | 3 +- src/backends/backendsCommon/WorkloadFactory.cpp | 12 +- src/backends/backendsCommon/WorkloadFactory.hpp | 3 + src/backends/backendsCommon/test/CMakeLists.txt | 2 + src/backends/backendsCommon/test/DebugTestImpl.hpp | 6 +- .../test/IsLayerSupportedTestImpl.hpp | 2 + src/backends/backendsCommon/test/LayerTests.cpp | 36 ++ src/backends/backendsCommon/test/LayerTests.hpp | 52 +++ .../backendsCommon/test/PreCompiledTestImpl.cpp | 491 +++++++++++++++++++++ .../backendsCommon/test/PreCompiledTestImpl.hpp | 27 ++ src/backends/cl/ClBackend.cpp | 6 + src/backends/cl/ClBackend.hpp | 3 + src/backends/cl/ClWorkloadFactory.cpp | 6 + src/backends/cl/ClWorkloadFactory.hpp | 3 + src/backends/cl/test/ClMemCopyTests.cpp | 12 +- src/backends/neon/NeonBackend.cpp | 6 + src/backends/neon/NeonBackend.hpp | 3 + src/backends/neon/NeonWorkloadFactory.cpp | 6 + src/backends/neon/NeonWorkloadFactory.hpp | 3 + src/backends/neon/test/NeonMemCopyTests.cpp | 12 +- src/backends/reference/RefBackend.cpp | 6 + src/backends/reference/RefBackend.hpp | 3 + src/backends/reference/RefWorkloadFactory.cpp | 8 +- src/backends/reference/RefWorkloadFactory.hpp | 4 + src/backends/reference/workloads/Debug.cpp | 2 +- .../reference/workloads/RefDebugWorkload.cpp | 7 +- .../reference/workloads/RefDebugWorkload.hpp | 7 +- tests/ExecuteNetwork/ExecuteNetwork.cpp | 4 +- tests/InferenceTest.cpp | 2 - .../TfLiteVGG16Quantized-Armnn.cpp | 14 +- 67 files changed, 2016 insertions(+), 193 deletions(-) create mode 100644 src/armnn/BackendSettings.hpp create mode 100644 src/armnn/ISubGraphConverter.hpp create mode 100644 src/armnn/layers/PreCompiledLayer.cpp create mode 100644 src/armnn/layers/PreCompiledLayer.hpp mode change 100755 => 100644 src/backends/backendsCommon/test/LayerTests.cpp create mode 100644 src/backends/backendsCommon/test/PreCompiledTestImpl.cpp create mode 100644 src/backends/backendsCommon/test/PreCompiledTestImpl.hpp diff --git a/Android.mk b/Android.mk index c02dc39db6..c61c7103d7 100644 --- a/Android.mk +++ b/Android.mk @@ -92,6 +92,7 @@ LOCAL_SRC_FILES := \ src/armnn/layers/ConvertFp32ToFp16Layer.cpp \ src/armnn/layers/DebugLayer.cpp \ src/armnn/layers/DepthwiseConvolution2dLayer.cpp \ + src/armnn/layers/DivisionLayer.cpp \ src/armnn/layers/ElementwiseBaseLayer.cpp \ src/armnn/layers/EqualLayer.cpp \ src/armnn/layers/FakeQuantizationLayer.cpp \ @@ -112,7 +113,7 @@ LOCAL_SRC_FILES := \ src/armnn/layers/PadLayer.cpp \ src/armnn/layers/PermuteLayer.cpp \ src/armnn/layers/Pooling2dLayer.cpp \ - src/armnn/layers/DivisionLayer.cpp \ + src/armnn/layers/PreCompiledLayer.cpp \ src/armnn/layers/ReshapeLayer.cpp \ src/armnn/layers/ResizeBilinearLayer.cpp \ src/armnn/layers/RsqrtLayer.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 616f616474..b39f785f3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,6 +234,8 @@ list(APPEND armnn_sources src/armnn/layers/Pooling2dLayer.cpp src/armnn/layers/DivisionLayer.cpp src/armnn/layers/DivisionLayer.hpp + src/armnn/layers/PreCompiledLayer.hpp + src/armnn/layers/PreCompiledLayer.cpp src/armnn/layers/ReshapeLayer.hpp src/armnn/layers/ReshapeLayer.cpp src/armnn/layers/SpaceToBatchNdLayer.hpp @@ -250,6 +252,7 @@ list(APPEND armnn_sources src/armnn/layers/StridedSliceLayer.hpp src/armnn/layers/SubtractionLayer.cpp src/armnn/layers/SubtractionLayer.hpp + src/armnn/BackendSettings.hpp src/armnn/Descriptors.cpp src/armnn/DeviceSpec.hpp src/armnn/Exceptions.cpp @@ -259,6 +262,7 @@ list(APPEND armnn_sources src/armnn/Instrument.hpp src/armnn/InternalTypes.cpp src/armnn/InternalTypes.hpp + src/armnn/ISubGraphConverter.hpp src/armnn/JsonPrinter.cpp src/armnn/JsonPrinter.hpp src/armnn/Layer.cpp diff --git a/include/armnn/BackendId.hpp b/include/armnn/BackendId.hpp index 87206073be..129cbb5d46 100644 --- a/include/armnn/BackendId.hpp +++ b/include/armnn/BackendId.hpp @@ -170,7 +170,8 @@ std::ostream& operator<<(std::ostream& os, return os; } -using BackendIdSet = std::unordered_set; +using BackendIdVector = std::vector; +using BackendIdSet = std::unordered_set; } // namespace armnn diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp index 2b30c2bcf6..4497d0da9e 100644 --- a/include/armnn/Descriptors.hpp +++ b/include/armnn/Descriptors.hpp @@ -18,7 +18,7 @@ namespace armnn /// An ActivationDescriptor for the ActivationLayer. struct ActivationDescriptor { - ActivationDescriptor() : m_Function(ActivationFunction::Sigmoid), m_A(0), m_B(0) {}; + ActivationDescriptor() : m_Function(ActivationFunction::Sigmoid), m_A(0), m_B(0) {} /// @brief The activation function to use /// (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square). @@ -48,7 +48,7 @@ struct PermuteDescriptor /// A SoftmaxDescriptor for the SoftmaxLayer. struct SoftmaxDescriptor { - SoftmaxDescriptor() : m_Beta(1.0f) {}; + SoftmaxDescriptor() : m_Beta(1.0f) {} /// Exponentiation value. float m_Beta; }; @@ -221,7 +221,7 @@ struct Pooling2dDescriptor , m_OutputShapeRounding(OutputShapeRounding::Floor) , m_PaddingMethod(PaddingMethod::Exclude) , m_DataLayout(DataLayout::NCHW) - {}; + {} /// The pooling algorithm to use (Max. Average, L2). PoolingAlgorithm m_PoolType; @@ -255,7 +255,7 @@ struct FullyConnectedDescriptor FullyConnectedDescriptor() : m_BiasEnabled(false) , m_TransposeWeightMatrix(false) - {}; + {} /// Enable/disable bias. bool m_BiasEnabled; @@ -275,7 +275,7 @@ struct Convolution2dDescriptor , m_StrideY(0) , m_BiasEnabled(false) , m_DataLayout(DataLayout::NCHW) - {}; + {} /// Padding left value in the width dimension. uint32_t m_PadLeft; @@ -608,4 +608,17 @@ struct DebugDescriptor unsigned int m_SlotIndex; }; +/// A PreCompiledDescriptor for the PreCompiledLayer. +struct PreCompiledDescriptor +{ + PreCompiledDescriptor(unsigned int numInputSlots = 1u, unsigned int numOutputSlots = 1u) + : m_NumInputSlots(numInputSlots), m_NumOutputSlots(numOutputSlots) + {} + + ~PreCompiledDescriptor() = default; + + unsigned int m_NumInputSlots; + unsigned int m_NumOutputSlots; +}; + } diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp index 729f739b3d..c1d21b5665 100644 --- a/include/armnn/DescriptorsFwd.hpp +++ b/include/armnn/DescriptorsFwd.hpp @@ -2,6 +2,7 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #pragma once namespace armnn @@ -10,24 +11,25 @@ struct ActivationDescriptor; struct BatchNormalizationDescriptor; struct BatchToSpaceNdDescriptor; struct Convolution2dDescriptor; +struct DebugDescriptor; struct DepthwiseConvolution2dDescriptor; struct FakeQuantizationDescriptor; struct FullyConnectedDescriptor; -struct LstmDescriptor; -struct PermuteDescriptor; -struct NormalizationDescriptor; struct L2NormalizationDescriptor; +struct LstmDescriptor; struct MeanDescriptor; +struct NormalizationDescriptor; +struct OriginsDescriptor; struct PadDescriptor; +struct PermuteDescriptor; struct Pooling2dDescriptor; +struct PreCompiledDescriptor; struct ReshapeDescriptor; -struct SpaceToBatchNdDescriptor; struct ResizeBilinearDescriptor; struct SoftmaxDescriptor; -struct OriginsDescriptor; -struct ViewsDescriptor; +struct SpaceToBatchNdDescriptor; struct StridedSliceDescriptor; -struct DebugDescriptor; +struct ViewsDescriptor; using MergerDescriptor = OriginsDescriptor; using SplitterDescriptor = ViewsDescriptor; diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp index bba344975a..929896d285 100644 --- a/include/armnn/ILayerSupport.hpp +++ b/include/armnn/ILayerSupport.hpp @@ -189,6 +189,10 @@ public: const Pooling2dDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const = 0; + virtual bool IsPreCompiledSupported(const TensorInfo& input, + const PreCompiledDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const = 0; + virtual bool IsReshapeSupported(const TensorInfo& input, const ReshapeDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const = 0; diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 5f341ad6fa..f31176ad7c 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -364,16 +364,15 @@ protected: struct OptimizerOptions { - OptimizerOptions() : - m_ReduceFp32ToFp16(false) - , m_Debug(false) + OptimizerOptions() + : m_ReduceFp32ToFp16(false) + , m_Debug(false) {} OptimizerOptions(bool reduceFp32ToFp16, bool debug) : m_ReduceFp32ToFp16(reduceFp32ToFp16) , m_Debug(debug) - { - } + {} // Reduce Fp32 data to Fp16 for faster processing bool m_ReduceFp32ToFp16; diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp index 8286ec6109..3cf53dd844 100644 --- a/include/armnn/LayerSupport.hpp +++ b/include/armnn/LayerSupport.hpp @@ -251,6 +251,12 @@ bool IsPermuteSupported(const BackendId& backend, char* reasonIfUnsupported = nullptr, size_t reasonIfUnsupportedMaxLength = 1024); +/// Deprecated in favor of IBackend and ILayerSupport interfaces +bool IsPreCompiledSupported(const BackendId& backend, + const TensorInfo& input, + char* reasonIfUnsupported = nullptr, + size_t reasonIfUnsupportedMaxLength = 1024); + /// Deprecated in favor of IBackend and ILayerSupport interfaces bool IsPooling2dSupported(const BackendId& backend, const TensorInfo& input, diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp index 7eacc00a93..8c4ceb8d4f 100644 --- a/include/armnn/TypesUtils.hpp +++ b/include/armnn/TypesUtils.hpp @@ -135,6 +135,16 @@ constexpr const char* GetDataTypeName(DataType dataType) } } +constexpr const char* GetDataLayoutName(DataLayout dataLayout) +{ + switch (dataLayout) + { + case DataLayout::NCHW: return "NCHW"; + case DataLayout::NHWC: return "NHWC"; + default: return "Unknown"; + } +} + template struct IsHalfType diff --git a/src/armnn/BackendSettings.hpp b/src/armnn/BackendSettings.hpp new file mode 100644 index 0000000000..931a0681db --- /dev/null +++ b/src/armnn/BackendSettings.hpp @@ -0,0 +1,87 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +struct BackendSettings +{ + BackendIdVector m_PreferredBackends; + BackendIdSet m_SupportedBackends; + BackendIdSet m_SelectedBackends; + BackendIdSet m_IgnoredBackends; + + BackendSettings() = default; + + BackendSettings(const BackendIdVector& preferredBackends, + const IDeviceSpec& deviceSpec) + { + Initialize(preferredBackends, deviceSpec); + } + + bool IsBackendPreferred(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_PreferredBackends); + } + + bool IsBackendSupported(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_SupportedBackends); + } + + bool IsBackendSelected(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_SelectedBackends); + } + + bool IsBackendIgnored(const BackendId& backend) const + { + return IsBackendInCollection(backend, m_IgnoredBackends); + } + + bool IsCpuRefUsed() const + { + BackendId cpuBackendId(Compute::CpuRef); + return IsBackendSupported(cpuBackendId) && IsBackendPreferred(cpuBackendId); + } + + BackendIdVector GetAvailablePreferredBackends() const + { + BackendIdVector availablePreferredBackends; + for (const BackendId& backend : m_PreferredBackends) + { + if (IsBackendSupported(backend) && !IsBackendIgnored(backend)) + { + availablePreferredBackends.push_back(backend); + } + } + return availablePreferredBackends; + } + +private: + void Initialize(const BackendIdVector& preferredBackends, + const IDeviceSpec& deviceSpec) + { + // Copy preferred backends from input + m_PreferredBackends = preferredBackends; + + // Obtain list of supported backends + const DeviceSpec& spec = *boost::polymorphic_downcast(&deviceSpec); + m_SupportedBackends = spec.GetSupportedBackends(); + } + + template + bool IsBackendInCollection(const BackendId& backend, const Collection& collection) const + { + return std::find(collection.begin(), collection.end(), backend) != collection.end(); + } +}; + +} //namespace armnn diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index 83d82a5ffe..831d85e404 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -297,6 +297,65 @@ void Graph::AddCopyLayers() } } +void Graph::SubstituteSubGraph(std::unique_ptr subGraph, IConnectableLayer* substituteLayer) +{ + BOOST_ASSERT(subGraph != nullptr); + BOOST_ASSERT(substituteLayer != nullptr); + + ReplaceSubGraphConnections(*subGraph, substituteLayer); + EraseSubGraphLayers(*subGraph); +} + +void Graph::ReplaceSubGraphConnections(const SubGraph& subGraph, IConnectableLayer* substituteLayer) +{ + BOOST_ASSERT(substituteLayer != nullptr); + BOOST_ASSERT_MSG(std::find(m_Layers.begin(), m_Layers.end(), substituteLayer) != m_Layers.end(), + "Substitue layer is not a member of graph"); + + const SubGraph::InputSlots& subGraphInputSlots = subGraph.GetInputSlots(); + const SubGraph::OutputSlots& subGraphOutputSlots = subGraph.GetOutputSlots(); + + const unsigned int numInputSlots = boost::numeric_cast(subGraphInputSlots.size()); + const unsigned int numOutputSlots = boost::numeric_cast(subGraphOutputSlots.size()); + + BOOST_ASSERT(numInputSlots == substituteLayer->GetNumInputSlots()); + BOOST_ASSERT(numOutputSlots == substituteLayer->GetNumOutputSlots()); + + // Disconnect the sub-graph and replace it with the substitute layer + // Step 1: process input slots + for(unsigned int inputSlotIdx = 0u; inputSlotIdx < numInputSlots; ++inputSlotIdx) + { + InputSlot* subGraphInputSlot = subGraphInputSlots.at(inputSlotIdx); + BOOST_ASSERT(subGraphInputSlot != nullptr); + + IOutputSlot* connectedOutputSlot = subGraphInputSlot->GetConnection(); + BOOST_ASSERT(connectedOutputSlot != nullptr); + connectedOutputSlot->Disconnect(*subGraphInputSlot); + + IInputSlot& substituteInputSlot = substituteLayer->GetInputSlot(inputSlotIdx); + connectedOutputSlot->Connect(substituteInputSlot); + } + + // Step 2: process output slots + for(unsigned int outputSlotIdx = 0u; outputSlotIdx < numOutputSlots; ++outputSlotIdx) + { + OutputSlot* subGraphOutputSlot = subGraphOutputSlots.at(outputSlotIdx); + BOOST_ASSERT(subGraphOutputSlot != nullptr); + + OutputSlot* substituteOutputSlot = boost::polymorphic_downcast( + &substituteLayer->GetOutputSlot(outputSlotIdx)); + subGraphOutputSlot->MoveAllConnections(*substituteOutputSlot); + } +} + +void Graph::EraseSubGraphLayers(const SubGraph &subGraph) +{ + for (auto layer : subGraph.GetLayers()) + { + EraseLayer(layer); + } +} + void Graph::InferTensorInfos() { for (auto&& layer : TopologicalSort()) diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp index 7ace2e0670..8f93f56b4a 100644 --- a/src/armnn/Graph.hpp +++ b/src/armnn/Graph.hpp @@ -6,6 +6,7 @@ #include "LayersFwd.hpp" #include "IGraphObservable.hpp" +#include "SubGraph.hpp" #include #include @@ -159,6 +160,8 @@ public: /// and relinking them via an intermediary copy layers. void AddCopyLayers(); + void SubstituteSubGraph(std::unique_ptr subGraph, IConnectableLayer* substituteLayer); + void InferTensorInfos(); void AttachObservable(IGraphObservable* const observable, GraphEvent notifyOnEvent) { @@ -210,6 +213,9 @@ private: std::unordered_set m_OutputIds; std::unordered_map m_PosInGraphMap; + void ReplaceSubGraphConnections(const SubGraph& subGraph, IConnectableLayer* substituteLayer); + void EraseSubGraphLayers(const SubGraph &subGraph); + /// Mutable to allow sorting on const object. mutable LayersList m_Layers; mutable bool m_LayersInOrder; diff --git a/src/armnn/ISubGraphConverter.hpp b/src/armnn/ISubGraphConverter.hpp new file mode 100644 index 0000000000..1d40c6737a --- /dev/null +++ b/src/armnn/ISubGraphConverter.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class ISubGraphConverter +{ +public: + virtual ~ISubGraphConverter() {}; + + virtual std::shared_ptr GetOutput() = 0; +}; + +} + diff --git a/src/armnn/InternalTypes.cpp b/src/armnn/InternalTypes.cpp index 9ffd73ac08..16a19722df 100644 --- a/src/armnn/InternalTypes.cpp +++ b/src/armnn/InternalTypes.cpp @@ -44,6 +44,7 @@ char const* GetLayerTypeAsCString(LayerType type) case LayerType::Pad: return "Pad"; case LayerType::Permute: return "Permute"; case LayerType::Pooling2d: return "Pooling2d"; + case LayerType::PreCompiled: return "PreCompiled"; case LayerType::Reshape: return "Reshape"; case LayerType::Rsqrt: return "Rsqrt"; case LayerType::ResizeBilinear: return "ResizeBilinear"; diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp index f4996db73e..f05ea25597 100644 --- a/src/armnn/InternalTypes.hpp +++ b/src/armnn/InternalTypes.hpp @@ -44,6 +44,7 @@ enum class LayerType Pad, Permute, Pooling2d, + PreCompiled, Reshape, ResizeBilinear, Rsqrt, @@ -53,7 +54,7 @@ enum class LayerType StridedSlice, // Last layer goes here. LastLayer, - Subtraction = LastLayer, + Subtraction = LastLayer }; const char* GetLayerTypeAsCString(LayerType type); diff --git a/src/armnn/LayerSupportCommon.hpp b/src/armnn/LayerSupportCommon.hpp index d6dda4f93d..c309f8c6c7 100644 --- a/src/armnn/LayerSupportCommon.hpp +++ b/src/armnn/LayerSupportCommon.hpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace armnn { diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp index 9f55233e8e..8b4ee0804b 100644 --- a/src/armnn/LayersFwd.hpp +++ b/src/armnn/LayersFwd.hpp @@ -36,6 +36,7 @@ #include "layers/PadLayer.hpp" #include "layers/PermuteLayer.hpp" #include "layers/Pooling2dLayer.hpp" +#include "layers/PreCompiledLayer.hpp" #include "layers/ReshapeLayer.hpp" #include "layers/ResizeBilinearLayer.hpp" #include "layers/RsqrtLayer.hpp" @@ -102,6 +103,7 @@ DECLARE_LAYER(Output) DECLARE_LAYER(Pad) DECLARE_LAYER(Permute) DECLARE_LAYER(Pooling2d) +DECLARE_LAYER(PreCompiled) DECLARE_LAYER(Reshape) DECLARE_LAYER(ResizeBilinear) DECLARE_LAYER(Rsqrt) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 187d04eb2b..7b9cb3db7f 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -2,11 +2,14 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #include "Network.hpp" #include "Graph.hpp" #include "Layer.hpp" #include "DeviceSpec.hpp" #include "Optimizer.hpp" +#include "SubGraphSelector.hpp" +#include "BackendSettings.hpp" #include "optimizations/All.hpp" #include @@ -71,6 +74,41 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const return m_Graph->SerializeToDot(stream); } +struct OptimizationResult +{ + bool m_Warning; + bool m_Error; + + OptimizationResult() + : m_Warning(false) + , m_Error(false) + {} +}; + +void ReportError(const std::string& errorMessage, + Optional&> errorMessages) +{ + std::stringstream fullErrorMessage; + fullErrorMessage << "ERROR: " << errorMessage; + BOOST_LOG_TRIVIAL(warning) << fullErrorMessage.str(); + if (errorMessages) + { + errorMessages.value().push_back(fullErrorMessage.str()); + } +} + +void ReportWarning(const std::string& warningMessage, + Optional&> warningMessages) +{ + std::stringstream fullWarningMessage; + fullWarningMessage << "WARNING: " << warningMessage; + BOOST_LOG_TRIVIAL(warning) << fullWarningMessage.str(); + if (warningMessages) + { + warningMessages.value().push_back(fullWarningMessage.str()); + } +} + bool CheckScaleSetOnQuantizedType(Layer* layer, Optional&> errMessages) { bool noErrors = true; @@ -82,108 +120,50 @@ bool CheckScaleSetOnQuantizedType(Layer* layer, OptionalGetType()) + ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType()) << " (" << layer->GetNameStr() << ") is of type" << " Quantized 8 bit but its scale parameter has not been set"; - BOOST_LOG_TRIVIAL(warning) << ss.str() ; - if (errMessages) { - errMessages.value().push_back(ss.str()); - } + ReportError(ss.str(), errMessages); } } } return noErrors; } -IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, - const std::vector& backendPreferences, - const IDeviceSpec& deviceSpec, - const OptimizerOptions& options, - Optional&> errMessages) +OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, + BackendSettings& backendSettings, + Graph::Iterator& firstLayer, + Graph::Iterator& lastLayer, + Optional&> errMessages) { - if (backendPreferences.empty()) { - throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); - } - const Network& network = *boost::polymorphic_downcast(&inNetwork); - std::unique_ptr graph = std::make_unique(network.GetGraph()); - - auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); + OptimizationResult result; - OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast(optNet.get()); - - // Perform optimisation passes - using namespace optimizations; - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(), - SquashEqualReshapeSiblings(), - OptimizeInversePermutes(), - MovePermuteUp(), - PermuteAsReshape(), - OptimizeConsecutiveReshapes())); - - // Infer the tensor infos for all output slots. Throws an exception on failure. - optNetObjPtr->GetGraph().InferTensorInfos(); - - // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 - if (options.m_ReduceFp32ToFp16) + // Helper lambda to compose meaningful error message before returning with error + auto ReturnWithError = [&](const Layer* layer) { - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter())); - } - - // if debug optimization is set, then print out data after each layer - if (options.m_Debug) - { - Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(InsertDebugLayer())); - } - - // We know that DeviceSpec should be the only implementation of IDeviceSpec. - const DeviceSpec& spec = *boost::polymorphic_downcast(&deviceSpec); - auto const& supportedBackends = spec.GetSupportedBackends(); - - // determine which of the preferred backends we have available for use - // and whether we have specified CpuRef as one of those backends. - bool cpuRefUsed = false; - std::vector availablePreferredBackends; - for (const auto& backend : backendPreferences) - { - // Check if the backend is in the available backend devices. - if (supportedBackends.count(backend) > 0) - { - availablePreferredBackends.push_back(backend); - if (backend == armnn::Compute::CpuRef) { - cpuRefUsed = true; - } - } - } - if (availablePreferredBackends.empty()) { std::stringstream failureMsg; - failureMsg << "ERROR: None of the preferred backends " << backendPreferences - << " are supported. Current platform provides " << supportedBackends; - BOOST_LOG_TRIVIAL(warning) << failureMsg.str(); - if (errMessages) { - errMessages.value().push_back(failureMsg.str()); - } - return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); - } + failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) + << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends; + ReportError(failureMsg.str(), errMessages); + + result.m_Error = true; + return result; + }; - auto ReturnWithError = [&](Layer* layer) + auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends(); + if (availablePreferredBackends.empty()) { std::stringstream failureMsg; - failureMsg << "ERROR: Layer of type " << GetLayerTypeAsCString(layer->GetType()) - << " is not supported on any preferred backend " << backendPreferences; - BOOST_LOG_TRIVIAL(warning) << failureMsg.str(); - if (errMessages) { - errMessages.value().push_back(failureMsg.str()); - } - return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); - }; + failureMsg << "No preferred backends are available"; + ReportError(failureMsg.str(), errMessages); - // The backends that we choose to run layers on - std::unordered_set chosenBackends; + result.m_Error = true; + return result; + } - // Assign a compute device for all nodes - bool bErrorFound = false; - for (auto&& layer : optNetObjPtr->GetGraph()) + for (auto it = firstLayer; it != lastLayer; ++it) { + auto layer = *it; DataType dataType = layer->GetDataType(); std::string reasonIfUnsupported; bool found = false; @@ -191,8 +171,9 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, { // don't bomb immediately, find all the quantized outputs // which haven't had a scale set and report them all back. - bErrorFound = true; + result.m_Error = true; } + for (const auto& backend : availablePreferredBackends) { // need to set the compute device on the layer @@ -273,38 +254,36 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } } std::stringstream warningMsg; - warningMsg << "WARNING: Layer of type " << GetLayerTypeAsCString(layer->GetType()) + warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << " is not supported on requested backend " << layer->GetBackendId().Get() << " for data type " << GetDataTypeName(dataType) << " (reason: " << reasonIfUnsupported << "), falling back to the next backend."; - std::string wMsg = warningMsg.str(); - BOOST_LOG_TRIVIAL(warning) << wMsg; - if (errMessages) { - errMessages.value().push_back(wMsg); - } + ReportWarning(warningMsg.str(), errMessages); } else { found = true; - chosenBackends.insert(backend); + backendSettings.m_SelectedBackends.insert(backend); break; } } // If the layer is unsupported by any devices, log and return a null network. - if (!found) { + if (!found) + { // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a // fallback we should set the compute device on the layer to CpuRef (these are not // available as accelerated operations, or are only available under certain // conditions, currently they comprise MemCopy, Constant, Permute) armnn::LayerType layerType = layer->GetType(); - if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy || - layerType == armnn::LayerType::Constant || - layerType == armnn::LayerType::Permute)) + if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy || + layerType == armnn::LayerType::Constant || + layerType == armnn::LayerType::Permute)) { - layer->SetBackendId(armnn::Compute::CpuRef); - chosenBackends.insert(armnn::Compute::CpuRef); + BackendId cpuBackendId(armnn::Compute::CpuRef); + layer->SetBackendId(cpuBackendId); + backendSettings.m_SelectedBackends.insert(cpuBackendId); } else { @@ -312,14 +291,175 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } } } - if (bErrorFound) + + return result; +} + +OptimizationResult InsertPreCompiledLayers(OptimizedNetwork* optNetObjPtr, + const IBackendInternalUniquePtr& backendObjPtr, + BackendSettings& backendSettings, + Optional&> errMessages) +{ + BOOST_ASSERT(backendObjPtr); + + OptimizationResult result; + + // Select sub-graphs based on backend + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs(optNetObjPtr->GetGraph(), + // select layers assigned to requested backend + [&](const Layer& layer) + { + return layer.GetType() != LayerType::Input && + layer.GetType() != LayerType::Output && + layer.GetBackendId() == backendObjPtr->GetId(); + }); + + if (subGraphs.empty()) + { + // No sub-graphs found -> return with no error + return result; + } + + // Convert sub-graphs and substitute them with pre-compiled layers + unsigned int index = 0u; + for (auto& subGraph : subGraphs) + { + // Create a pre-compiled layer + PreCompiledLayer* preCompiledLayer = CreatePreCompiledLayer(optNetObjPtr->GetGraph(), + *subGraph, + index++, + backendObjPtr); + if (preCompiledLayer) + { + // Substitute sub-graph with pre-compiled layer in graph + optNetObjPtr->GetGraph().SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + } + else + { + // Failed to create pre-compiled layer from sub-graph -> + // re-assign sub-graph layers to other available backends + std::stringstream warningMsg; + warningMsg << "Sub-graph #" << index << " failed to compile on " + << backendObjPtr->GetId() << ". Re-assigning backends to " + << subGraph->GetLayers().size() << " layers inside sub-graph"; + ReportWarning(warningMsg.str(), errMessages); + + backendSettings.m_IgnoredBackends = { backendObjPtr->GetId() }; + + Graph::Iterator firstLayer = subGraph->begin(); + Graph::Iterator lastLayer = subGraph->end(); + OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr, + backendSettings, + firstLayer, + lastLayer, + errMessages); + + if (reassignmentResult.m_Error) + { + result.m_Error = true; + return result; + } + } + } + + return result; +} + +IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, + const std::vector& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options, + Optional&> errMessages) +{ + if (backendPreferences.empty()) + { + throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); + } + + const Network& network = *boost::polymorphic_downcast(&inNetwork); + std::unique_ptr graph = std::make_unique(network.GetGraph()); + + auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); + + OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast(optNet.get()); + + // Perform optimisation passes + using namespace optimizations; + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualReshapeSiblings(), + OptimizeInversePermutes(), + MovePermuteUp(), + PermuteAsReshape(), + OptimizeConsecutiveReshapes())); + + // Infer the tensor infos for all output slots. Throws an exception on failure. + optNetObjPtr->GetGraph().InferTensorInfos(); + + // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 + if (options.m_ReduceFp32ToFp16) + { + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter())); + } + + // Initialize backend settings + BackendSettings backendSettings(backendPreferences, deviceSpec); + if (backendSettings.GetAvailablePreferredBackends().empty()) { + std::stringstream failureMsg; + failureMsg << "None of the preferred backends " << backendPreferences + << " are supported. Current platform provides " << backendSettings.m_SupportedBackends; + ReportError(failureMsg.str(), errMessages); + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + } + + // Assign an available backend to each layer + Graph::Iterator firstLayer = optNetObjPtr->GetGraph().begin(); + Graph::Iterator lastLayer = optNetObjPtr->GetGraph().end(); + OptimizationResult assigBackendsResult = AssignBackends(optNetObjPtr, + backendSettings, + firstLayer, + lastLayer, + errMessages); + if (assigBackendsResult.m_Error) + { + // Failed to assign a backend to each layer return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); } Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(), OptimizeInverseConversionsFp32())); + // Insert pre-compiled layers where required by the backend + // TODO: This is a dummy/default backend id used for making the code build until + // we've properly refactored the optimizer + const BackendId backendId(Compute::Undefined); + auto const& backendRegistry = BackendRegistryInstance(); + if (backendRegistry.IsBackendRegistered(backendId)) + { + // Obtain a backend object using the registered factory + auto backendFactory = backendRegistry.GetFactory(backendId); + auto backendObjPtr = backendFactory(); + + OptimizationResult insertPreCompiledLayersResult = InsertPreCompiledLayers(optNetObjPtr, + backendObjPtr, + backendSettings, + errMessages); + if (insertPreCompiledLayersResult.m_Error) + { + // Failed to insert pre-compiled layers + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + } + } + + // If the debug flag is set, then insert a DebugLayer after each layer. + // NOTE: This optimization can only happen strictly after the PreCompiled layers have + // already been inserted + if (options.m_Debug) + { + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(InsertDebugLayer())); + } + optNetObjPtr->GetGraph().AddCopyLayers(); // Convert constants @@ -327,7 +467,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat())); // Run backend specific optimizations - for (auto&& chosenBackend : chosenBackends) + for (auto&& chosenBackend : backendSettings.m_SelectedBackends) { auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend); auto backendPtr = factoryFun(); diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 9a4ce87b59..735a6244d5 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -5,6 +5,12 @@ #include "NetworkUtils.hpp" +#include "SubGraphSelector.hpp" + +#include + +#include + namespace armnn { @@ -74,7 +80,6 @@ std::vector InsertConvertFp32ToFp16LayersAfter(Graph& g return convertLayers; } - std::vector InsertDebugLayerAfter(Graph& graph, Layer& layer) { std::vector debugLayers; @@ -97,10 +102,58 @@ std::vector InsertDebugLayerAfter(Graph& graph, Layer& layer) debugLayer->GetOutputSlot().SetTensorInfo(debugInfo); + // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef + debugLayer->SetBackendId(Compute::CpuRef); + debugLayers.emplace_back(debugLayer); } return debugLayers; } +PreCompiledLayer* CreatePreCompiledLayer(Graph& graph, + const SubGraph& subGraph, + unsigned int subGraphIndex, + const IBackendInternalUniquePtr& backendObjPtr) +{ + BOOST_ASSERT(backendObjPtr); + + IBackendInternal::ISubGraphConverterPtr converter = + backendObjPtr->CreateSubGraphConverter(std::make_shared(subGraph)); + if (!converter) + { + return nullptr; + } + + try + { + // Attempt to convert and compile sub-graph + auto preCompiledObject = converter->GetOutput(); + } + catch (std::exception&) + { + return nullptr; + } + + // Create pre-compiled layer + std::string name = "pre-compiled" + std::to_string(subGraphIndex); + PreCompiledLayer* preCompiledLayer = graph.AddLayer( + PreCompiledDescriptor(subGraph.GetNumInputSlots(), subGraph.GetNumOutputSlots()), name.c_str()); + + // Copy output tensor infos from sub-graph + for (unsigned int i = 0u; i < subGraph.GetNumOutputSlots(); i++) + { + preCompiledLayer->GetOutputSlot(i).SetTensorInfo(subGraph.GetOutputSlot(i)->GetTensorInfo()); + } + + // Assign pre-compiled object to layer + preCompiledLayer->SetPreCompiledObject(converter->GetOutput()); + + // Set the backend-id for the pre-compiled layer + BackendId backendId = backendObjPtr->GetId(); + preCompiledLayer->SetBackendId(backendId); + + return preCompiledLayer; +} + } // namespace armnn diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp index b81d5cb5e7..1a520b7195 100644 --- a/src/armnn/NetworkUtils.hpp +++ b/src/armnn/NetworkUtils.hpp @@ -5,7 +5,11 @@ #pragma once +#include "DeviceSpec.hpp" #include "Graph.hpp" +#include "SubGraph.hpp" + +#include namespace armnn { @@ -16,4 +20,9 @@ std::vector InsertConvertFp32ToFp16LayersAfter(Graph& g std::vector InsertDebugLayerAfter(Graph& graph, Layer& layer); +PreCompiledLayer* CreatePreCompiledLayer(Graph& graph, + const SubGraph& subGraph, + unsigned int subGraphIndex, + const IBackendInternalUniquePtr& backendObject); + } // namespace armnn diff --git a/src/armnn/SubGraph.cpp b/src/armnn/SubGraph.cpp index 5d41f32932..74a1838ef0 100644 --- a/src/armnn/SubGraph.cpp +++ b/src/armnn/SubGraph.cpp @@ -69,4 +69,34 @@ const SubGraph::Layers & SubGraph::GetLayers() const return m_Layers; } +SubGraph::Layers::iterator SubGraph::begin() +{ + return m_Layers.begin(); +} + +SubGraph::Layers::iterator SubGraph::end() +{ + return m_Layers.end(); +} + +SubGraph::Layers::const_iterator SubGraph::begin() const +{ + return m_Layers.begin(); +} + +SubGraph::Layers::const_iterator SubGraph::end() const +{ + return m_Layers.end(); +} + +SubGraph::Layers::const_iterator SubGraph::cbegin() const +{ + return begin(); +} + +SubGraph::Layers::const_iterator SubGraph::cend() const +{ + return end(); +} + } // namespace armnn diff --git a/src/armnn/SubGraph.hpp b/src/armnn/SubGraph.hpp index 312bb115eb..d22377daff 100644 --- a/src/armnn/SubGraph.hpp +++ b/src/armnn/SubGraph.hpp @@ -8,7 +8,7 @@ #include "Layer.hpp" #include -#include +#include namespace armnn { @@ -24,7 +24,7 @@ class SubGraph final public: using InputSlots = std::vector; using OutputSlots = std::vector; - using Layers = std::unordered_set; + using Layers = std::list; SubGraph(); SubGraph(InputSlots && inputs, @@ -44,6 +44,15 @@ public: unsigned int GetNumInputSlots() const; unsigned int GetNumOutputSlots() const; + Layers::iterator begin(); + Layers::iterator end(); + + Layers::const_iterator begin() const; + Layers::const_iterator end() const; + + Layers::const_iterator cbegin() const; + Layers::const_iterator cend() const; + private: InputSlots m_InputSlots; OutputSlots m_OutputSlots; diff --git a/src/armnn/SubGraphSelector.cpp b/src/armnn/SubGraphSelector.cpp index b87e2b73b1..d0542fd41f 100644 --- a/src/armnn/SubGraphSelector.cpp +++ b/src/armnn/SubGraphSelector.cpp @@ -166,7 +166,7 @@ SubGraphSelector::SelectSubGraphs(Graph& graph, { infoPtr->CollectNonSelectedOutputSlots(outputs, selector); infoPtr->CollectNonSelectedInputs(inputs, selector); - layers.insert(infoPtr->m_Layer); + layers.push_back(infoPtr->m_Layer); } result.emplace_back( std::make_unique( diff --git a/src/armnn/TypeUtils.hpp b/src/armnn/TypeUtils.hpp index 01a0e6479a..5bb040f780 100644 --- a/src/armnn/TypeUtils.hpp +++ b/src/armnn/TypeUtils.hpp @@ -11,16 +11,9 @@ namespace armnn { - template struct ResolveTypeImpl; -template<> -struct ResolveTypeImpl -{ - using Type = uint8_t; -}; - template <> struct ResolveTypeImpl { @@ -33,6 +26,18 @@ struct ResolveTypeImpl using Type = float; }; +template<> +struct ResolveTypeImpl +{ + using Type = uint8_t; +}; + +template<> +struct ResolveTypeImpl +{ + using Type = int32_t; +}; + template<> struct ResolveTypeImpl { @@ -42,5 +47,4 @@ struct ResolveTypeImpl template using ResolveType = typename ResolveTypeImpl
::Type; - } //namespace armnn diff --git a/src/armnn/layers/MergerLayer.cpp b/src/armnn/layers/MergerLayer.cpp index 85dc0e7609..b4b5d3c2ef 100644 --- a/src/armnn/layers/MergerLayer.cpp +++ b/src/armnn/layers/MergerLayer.cpp @@ -180,7 +180,7 @@ void MergerLayer::ValidateTensorShapesFromInputs() VerifyLayerConnections(m_Param.GetNumViews(), CHECK_LOCATION()); std::vector inputShapes; - for (uint i = 0; i < GetNumInputSlots(); ++i) + for (unsigned int i = 0; i < GetNumInputSlots(); ++i) { inputShapes.push_back(GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape()); } diff --git a/src/armnn/layers/PreCompiledLayer.cpp b/src/armnn/layers/PreCompiledLayer.cpp new file mode 100644 index 0000000000..c443f9ae79 --- /dev/null +++ b/src/armnn/layers/PreCompiledLayer.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PreCompiledLayer.hpp" + +#include "LayerCloneBase.hpp" + +#include "backendsCommon/Workload.hpp" + +#include + +namespace armnn +{ + +PreCompiledLayer::PreCompiledLayer(const PreCompiledDescriptor& param, const char* name) + : LayerWithParameters(param.m_NumInputSlots, param.m_NumOutputSlots, LayerType::PreCompiled, param, name) + , m_PreCompiledObject(nullptr) +{} + +PreCompiledLayer::~PreCompiledLayer() +{} + +PreCompiledLayer* PreCompiledLayer::Clone(Graph& graph) const +{ + PreCompiledLayer* clone = CloneBase(graph, m_Param, GetName()); + clone->m_PreCompiledObject = this->m_PreCompiledObject; + return clone; +} + +std::unique_ptr PreCompiledLayer::CreateWorkload(const armnn::Graph& graph, + const armnn::IWorkloadFactory& factory) const +{ + PreCompiledQueueDescriptor descriptor; + descriptor.m_PreCompiledObject = m_PreCompiledObject; + return factory.CreatePreCompiled(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void PreCompiledLayer::ValidateTensorShapesFromInputs() +{ + // NOTE: since the PreCompiledLayer is an internal layer created from a valid SubGraph, + // we do not need to validate its input shapes +} + +std::shared_ptr PreCompiledLayer::GetPreCompiledObject() const +{ + return m_PreCompiledObject; +} + +void PreCompiledLayer::SetPreCompiledObject(const std::shared_ptr& preCompiledObject) +{ + m_PreCompiledObject = preCompiledObject; +} + +} // namespace armnn diff --git a/src/armnn/layers/PreCompiledLayer.hpp b/src/armnn/layers/PreCompiledLayer.hpp new file mode 100644 index 0000000000..fd28d0e7a8 --- /dev/null +++ b/src/armnn/layers/PreCompiledLayer.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "LayerWithParameters.hpp" +#include + +#include + +#include + +namespace armnn +{ + +class PreCompiledLayer : public LayerWithParameters +{ +public: + PreCompiledLayer(const PreCompiledDescriptor& param, const char* name); + ~PreCompiledLayer(); + + virtual std::unique_ptr CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + PreCompiledLayer* Clone(Graph &graph) const override; + + void ValidateTensorShapesFromInputs() override; + + std::shared_ptr GetPreCompiledObject() const; + + void SetPreCompiledObject(const std::shared_ptr& preCompiledObject); + +private: + PreCompiledLayer(const PreCompiledLayer& other) = delete; + PreCompiledLayer& operator=(const PreCompiledLayer& other) = delete; + + std::shared_ptr m_PreCompiledObject; +}; + +} // namespace armnn diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index f52f6055ca..acc5cbdb1a 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include @@ -1093,4 +1095,130 @@ std::unique_ptr CreateMergerWorkloadTest(armnn::IWorkloadFactory return std::move(workloadMerger); } +template +std::pair> CreatePreCompiledWorkloadTest( + armnn::IWorkloadFactory& factory, + armnn::Graph& graph, + bool biasEnabled = false) +{ + // To create a PreCompiled layer, create a network and Optimize it. + armnn::Network net; + + // Add an input layer + armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW + // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC + // this test is using NHWC, so the weights shape is OHWI + TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0); + unsigned int weightsLength = weightsTensorInfo.GetNumElements(); + + using WeightType = armnn::ResolveType; + std::vector convWeightsData(weightsLength); + for (unsigned int i = 0; i < weightsLength; ++i) + { + convWeightsData[i] = static_cast(i); + } + + armnn::ConstTensor weights(weightsTensorInfo, convWeightsData); + + // Add a layer that can be used in the PreCompiled layer + armnn::Convolution2dDescriptor convDesc2d; + convDesc2d.m_StrideX = 1; + convDesc2d.m_StrideY = 1; + convDesc2d.m_BiasEnabled = biasEnabled; + convDesc2d.m_DataLayout = armnn::DataLayout::NHWC; + + armnn::IConnectableLayer* convLayer = nullptr; + const std::string convLayerName("conv layer"); + + if (biasEnabled) + { + constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QuantisedAsymm8) ? + armnn::DataType::Signed32 : armnn::DataType::Float32; + + TensorInfo biasTensorInfo(TensorShape({1, 1, 1, 16}), biasDataType, 0.9f * 0.9f, 0); + unsigned int biasLength = biasTensorInfo.GetNumElements(); + + using BiasType = armnn::ResolveType; + std::vector biasData(biasLength); + std::fill(biasData.begin(), biasData.end(), static_cast(0)); + + armnn::ConstTensor biases(biasTensorInfo, biasData); + + // Create convolution layer with biases + convLayer = net.AddConvolution2dLayer(convDesc2d, weights, biases, convLayerName.c_str()); + } + else + { + // Create convolution layer without biases + convLayer = net.AddConvolution2dLayer(convDesc2d, weights, convLayerName.c_str()); + } + + BOOST_TEST(convLayer); + + // Add an output layer + armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + // set the tensors in the network (NHWC format) + TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType); + if (dataType == armnn::DataType::QuantisedAsymm8) + { + inputTensorInfo.SetQuantizationOffset(0); + inputTensorInfo.SetQuantizationScale(0.9f); + } + + TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType); + if (dataType == armnn::DataType::QuantisedAsymm8) + { + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(0.9f); + } + + // Connect the layers + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimize the network for the backend supported by the factory + std::vector backends = {factory.GetBackendId()}; + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + armnn::OptimizerOptions optimizerOptions; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), + optimizerOptions); + BOOST_CHECK(optimizedNet != nullptr); + + // Find the PreCompiled layer in the optimised graph + armnn::Graph& optimisedGraph = static_cast(optimizedNet.get())->GetGraph(); + Layer* preCompiledLayer = nullptr; + for (auto& layer : optimisedGraph) + { + if (layer->GetType() == LayerType::PreCompiled) + { + preCompiledLayer = layer; + } + } + BOOST_TEST(preCompiledLayer); + + // Create the TensorHandles. + CreateTensorHandles(optimisedGraph, factory); + + // Make the workload and check it. + auto workload = MakeAndCheckWorkload(*preCompiledLayer, optimisedGraph, factory); + + PreCompiledQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // Returns the workload so we can do extra, backend-specific tests. + // NOTE: We need to return the optimised network as well, otherwise it gets + // out of scope and the tensor handles get destructed + return std::make_pair(std::move(optimizedNet), std::move(workload)); +} + } diff --git a/src/armnn/test/SubGraphTests.cpp b/src/armnn/test/SubGraphTests.cpp index e516ac0fa6..9e49197ea6 100644 --- a/src/armnn/test/SubGraphTests.cpp +++ b/src/armnn/test/SubGraphTests.cpp @@ -17,6 +17,20 @@ using namespace armnn; namespace { +bool AreAnySubGraphLayersPresentInGraph(const SubGraph::Layers &subGraphLayers, const Graph &graph) +{ + for(auto&& layer : subGraphLayers) + { + auto posInGraph = std::find(graph.begin(), graph.end(), layer); + if(posInGraph != graph.end()) + { + return true; + } + } + + return false; +} + // // this helper only works if all layers where the inputs connect to are not selected // @@ -112,6 +126,235 @@ void CompareSubGraphs(SubGraphSelector::SubGraphPtr & result, } // namespace +BOOST_AUTO_TEST_SUITE(SubGraphSubstitution) + +BOOST_AUTO_TEST_CASE(SingleInputSingleOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + + Layer* const outputLayer = graph.AddLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({convLayer1}), CreateOutputsFrom({convLayer2}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn = subGraph->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraphOutputConn = subGraph->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(1, 1); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn); +} + +BOOST_AUTO_TEST_CASE(MultiInputSingleOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer(mergerDescriptor, "merger"); + + Layer* const outputLayer = graph.AddLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({convLayer1, convLayer2}), CreateOutputsFrom({mergerLayer}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraphInputConn2 = subGraph->GetInputSlot(1)->GetConnection(); + + IInputSlot* subGraphOutputConn = subGraph->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(2, 1); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(1).GetConnection(), subGraphInputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn); +} + +BOOST_AUTO_TEST_CASE(SingleInputMultiOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer(mergerDescriptor, "merger"); + Layer* const outputLayer = graph.AddLayer(0, "output"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer(splitterDescriptor, "splitter"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom(CreateInputsFrom({splitterLayer}), CreateOutputsFrom({convLayer1, convLayer2}), {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + + IInputSlot* subGraphOutputConn1 = subGraph->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraphOutputConn2 = subGraph->GetOutputSlot(1)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(1, 2); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(1).GetConnection(0), subGraphOutputConn2); +} + +BOOST_AUTO_TEST_CASE(MultiInputMultiOutput) +{ + // Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer(mergerDescriptor, "merger"); + + Layer* const outputLayer = graph.AddLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = CreateSubGraphFrom(CreateInputsFrom({convLayer1, convLayer2}), + CreateOutputsFrom({convLayer1, convLayer2}), + {}); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraph->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraphInputConn2 = subGraph->GetInputSlot(1)->GetConnection(); + + IInputSlot* subGraphOutputConn1 = subGraph->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraphOutputConn2 = subGraph->GetOutputSlot(1)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(2, 2); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(1).GetConnection(), subGraphInputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(1).GetConnection(0), subGraphOutputConn2); +} + +BOOST_AUTO_TEST_CASE(EraseReplacedLayers) +{ + // Construct graph + Graph graph; + + graph.AddLayer(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer(splitterDescriptor, "splitter"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer(mergerDescriptor, "merger"); + + graph.AddLayer(0, "output"); + + // Construct sub-graph + SubGraphSelector::SubGraphPtr subGraph = + CreateSubGraphFrom({}, {}, {splitterLayer, convLayer1, convLayer2, mergerLayer}); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(0, 0); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Save sub-graph layers for later verification + const SubGraph::Layers subGraphLayers = subGraph->GetLayers(); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph(std::move(subGraph), preCompiledLayer); + + // Check that the layers belonging to the sub-graph have been erased from the graph after substitution + BOOST_CHECK(!AreAnySubGraphLayersPresentInGraph(subGraphLayers, graph)); +} + +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE(SubGraphSelection) BOOST_AUTO_TEST_CASE(NoSubGraphsForNoMatch) @@ -585,3 +828,167 @@ BOOST_AUTO_TEST_CASE(MultiInputMultiOutput) } BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(IntegrationTests) + +BOOST_AUTO_TEST_CASE(SingleSubGraph) +{ + // This test case represents the scenario when we have one subgraph + // in which two layers have GpuAcc backend assigned + + //Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + convLayer1->SetBackendId(Compute::GpuAcc); + + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + convLayer2->SetBackendId(Compute::GpuAcc); + + Layer* const outputLayer = graph.AddLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // GpuAcc sub graph selector + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs( + graph, + // select the GpuAcc layers only + [](const Layer & l){ + bool toSelect = (l.GetBackendId() == Compute::GpuAcc); + return toSelect; + }); + + BOOST_TEST(subGraphs.size() == 1); + if(subGraphs.size() == 1) + { + BOOST_TEST((subGraphs[0] != nullptr)); + + if (subGraphs[0].get() != nullptr) + { + unsigned int numInputSlots = boost::numeric_cast(subGraphs[0]->GetInputSlots().size()); + unsigned int numOutputSlots = boost::numeric_cast(subGraphs[0]->GetOutputSlots().size()); + + BOOST_TEST((numInputSlots == 1)); + BOOST_TEST((numOutputSlots == 1)); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraphInputConn1 = subGraphs[0]->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraphOutputConn1 = subGraphs[0]->GetOutputSlot(0)->GetConnection(0); + + // Construct dummy pre-compiled layer + PreCompiledDescriptor preCompiledDescriptor(numInputSlots, numOutputSlots); + Layer* const preCompiledLayer = graph.AddLayer(preCompiledDescriptor, "pre-compiled"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph((std::move(subGraphs[0])), preCompiledLayer); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer->GetInputSlot(0).GetConnection(), subGraphInputConn1); + + BOOST_CHECK_EQUAL(preCompiledLayer->GetOutputSlot(0).GetConnection(0), subGraphOutputConn1); + } + } +} + +BOOST_AUTO_TEST_CASE(MultipleSubGraphs) +{ + // This test case represents the scenario when we have two subgraphs + // in which two layers have CpuAcc backend assigned + + //Construct graph + Graph graph; + + Layer* const inputLayer = graph.AddLayer(0, "input"); + + ViewsDescriptor splitterDescriptor(2); + Layer* const splitterLayer = graph.AddLayer(splitterDescriptor, "splitter"); + splitterLayer->SetBackendId(Compute::CpuAcc); + + Convolution2dDescriptor convDescriptor; + Layer* const convLayer1 = graph.AddLayer(convDescriptor, "conv1"); + Layer* const convLayer2 = graph.AddLayer(convDescriptor, "conv2"); + + OriginsDescriptor mergerDescriptor(2); + Layer* const mergerLayer = graph.AddLayer(mergerDescriptor, "merger"); + mergerLayer->SetBackendId(Compute::CpuAcc); + + Layer* const outputLayer = graph.AddLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(convLayer2->GetInputSlot(0)); + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // CpuAcc sub graph selector + SubGraphSelector::SubGraphs subGraphs = + SubGraphSelector::SelectSubGraphs( + graph, + // select the CpuAcc layers only + [](const Layer & l){ + bool toSelect = (l.GetBackendId() == Compute::CpuAcc); + return toSelect; + }); + + BOOST_TEST(subGraphs.size() == 2); + if(subGraphs.size() == 2) + { + BOOST_TEST((subGraphs[0] != nullptr)); + BOOST_TEST((subGraphs[1] != nullptr)); + + if (subGraphs[0].get() != nullptr && subGraphs[1].get() != nullptr) + { + //Sort subGraphs by their inputSlot size. + std::sort(subGraphs.begin(), subGraphs.end(), + [](SubGraphSelector::SubGraphPtr & lhs, SubGraphSelector::SubGraphPtr & rhs) + { + return (lhs->GetInputSlots().size() < rhs->GetInputSlots().size()); + } + ); + + unsigned int numInputSlots1 = boost::numeric_cast(subGraphs[0]->GetInputSlots().size()); + unsigned int numOutputSlots1 = boost::numeric_cast(subGraphs[0]->GetOutputSlots().size()); + + unsigned int numInputSlots2 = boost::numeric_cast(subGraphs[1]->GetInputSlots().size()); + unsigned int numOutputSlots2 = boost::numeric_cast(subGraphs[1]->GetOutputSlots().size()); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraph1InputConn = subGraphs[0]->GetInputSlot(0)->GetConnection(); + IInputSlot* subGraph1OutputConn1 = subGraphs[0]->GetOutputSlot(0)->GetConnection(0); + IInputSlot* subGraph1OutputConn2 = subGraphs[0]->GetOutputSlot(1)->GetConnection(0); + + // Save sub-graph connections for comparison after substitution + IOutputSlot* subGraph2InputConn1 = subGraphs[1]->GetInputSlot(0)->GetConnection(); + IOutputSlot* subGraph2InputConn2 = subGraphs[1]->GetInputSlot(1)->GetConnection(); + IInputSlot* subGraph2OutputConn = subGraphs[1]->GetOutputSlot(0)->GetConnection(0); + + PreCompiledDescriptor preCompiledDescriptor1(numInputSlots1, numOutputSlots1); + Layer* const preCompiledLayer1 = graph.AddLayer(preCompiledDescriptor1, "pre-compiled1"); + + PreCompiledDescriptor preCompiledDescriptor2(numInputSlots2, numOutputSlots2); + Layer* const preCompiledLayer2 = graph.AddLayer(preCompiledDescriptor2, "pre-compiled2"); + + // Substitute sub-graph with pre-compiled layer + graph.SubstituteSubGraph((std::move(subGraphs[0])), preCompiledLayer1); + graph.SubstituteSubGraph((std::move(subGraphs[1])), preCompiledLayer2); + + // Check that connections are correct after substitution + BOOST_CHECK_EQUAL(preCompiledLayer1->GetInputSlot(0).GetConnection(), subGraph1InputConn); + BOOST_CHECK_EQUAL(preCompiledLayer1->GetOutputSlot(0).GetConnection(0), subGraph1OutputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer1->GetOutputSlot(1).GetConnection(0), subGraph1OutputConn2); + + BOOST_CHECK_EQUAL(preCompiledLayer2->GetInputSlot(0).GetConnection(), subGraph2InputConn1); + BOOST_CHECK_EQUAL(preCompiledLayer2->GetInputSlot(1).GetConnection(), subGraph2InputConn2); + BOOST_CHECK_EQUAL(preCompiledLayer2->GetOutputSlot(0).GetConnection(0), subGraph2OutputConn); + } + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/aclCommon/test/MemCopyTestImpl.hpp b/src/backends/aclCommon/test/MemCopyTestImpl.hpp index 4247cc5ef4..4e0bfa85de 100644 --- a/src/backends/aclCommon/test/MemCopyTestImpl.hpp +++ b/src/backends/aclCommon/test/MemCopyTestImpl.hpp @@ -4,6 +4,8 @@ // #pragma once +#include + #include #include @@ -18,33 +20,29 @@ namespace { -LayerTestResult MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, - armnn::IWorkloadFactory& dstWorkloadFactory, - bool withSubtensors) +template> +LayerTestResult MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, + armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) { const std::array shapeData = { { 1u, 1u, 6u, 5u } }; const armnn::TensorShape tensorShape(4, shapeData.data()); - const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); - boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( + const armnn::TensorInfo tensorInfo(tensorShape, dataType); + boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, }) ); - LayerTestResult ret(tensorInfo); + LayerTestResult ret(tensorInfo); ret.outputExpected = inputData; - boost::multi_array outputData(shapeData); + boost::multi_array outputData(shapeData); auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); @@ -75,8 +73,11 @@ LayerTestResult MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactor return ret; } -template -LayerTestResult MemCopyTest(bool withSubtensors) +template> +LayerTestResult MemCopyTest(bool withSubtensors) { armnn::IBackendInternal::IMemoryManagerSharedPtr srcMemoryManager = WorkloadFactoryHelper::GetMemoryManager(); @@ -87,7 +88,7 @@ LayerTestResult MemCopyTest(bool withSubtensors) SrcWorkloadFactory srcWorkloadFactory = WorkloadFactoryHelper::GetFactory(srcMemoryManager); DstWorkloadFactory dstWorkloadFactory = WorkloadFactoryHelper::GetFactory(dstMemoryManager); - return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); + return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); } } // anonymous namespace diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp index 7099a70bc7..78cd95b21d 100644 --- a/src/backends/aclCommon/test/MemCopyTests.cpp +++ b/src/backends/aclCommon/test/MemCopyTests.cpp @@ -47,25 +47,29 @@ BOOST_FIXTURE_TEST_SUITE(MemCopyClNeon, ClContextControlFixture) BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/backends.cmake b/src/backends/backends.cmake index c82de8d8e8..438fda3664 100644 --- a/src/backends/backends.cmake +++ b/src/backends/backends.cmake @@ -5,8 +5,8 @@ # single place to use wildcards, so we can include # yet unknown backend modules and corresponding common libraries -FILE(GLOB commonIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/common.cmake) -FILE(GLOB backendIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/backend.cmake) +file(GLOB commonIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/common.cmake) +file(GLOB backendIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/backend.cmake) # prefer to include common code first foreach(includeFile ${commonIncludes}) diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp index b102d1a1f9..2e6b056798 100644 --- a/src/backends/backendsCommon/IBackendInternal.hpp +++ b/src/backends/backendsCommon/IBackendInternal.hpp @@ -6,6 +6,10 @@ #include #include + +#include +#include + #include namespace armnn @@ -37,6 +41,8 @@ public: using IMemoryManagerUniquePtr = std::unique_ptr; using IMemoryManagerSharedPtr = std::shared_ptr; + using ISubGraphConverterPtr = std::unique_ptr; + virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0; virtual IWorkloadFactoryPtr CreateWorkloadFactory( @@ -44,6 +50,8 @@ public: virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0; + virtual ISubGraphConverterPtr CreateSubGraphConverter(const std::shared_ptr& subGraph) const = 0; + virtual Optimizations GetOptimizations() const = 0; virtual ILayerSupportSharedPtr GetLayerSupport() const = 0; }; diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index 2987e5dd2a..187d2f7d38 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -285,6 +285,13 @@ bool LayerSupportBase::IsPooling2dSupported(const TensorInfo& input, return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } +bool LayerSupportBase::IsPreCompiledSupported(const TensorInfo& input, + const PreCompiledDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); +} + bool LayerSupportBase::IsReshapeSupported(const TensorInfo& input, const ReshapeDescriptor& descriptor, Optional reasonIfUnsupported) const diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index 8c7aa98043..c6f943c7e0 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -184,6 +184,10 @@ public: const Pooling2dDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsPreCompiledSupported(const TensorInfo& input, + const PreCompiledDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const ReshapeDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index a5db088be7..97981e2b8d 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1051,4 +1051,9 @@ void RsqrtQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const "output"); } +void PreCompiledQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + // This is internally generated so it should not need validation. +} + } //namespace armnn diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp index 59e3dfbf5c..453896b912 100644 --- a/src/backends/backendsCommon/WorkloadData.hpp +++ b/src/backends/backendsCommon/WorkloadData.hpp @@ -378,4 +378,16 @@ struct RsqrtQueueDescriptor : QueueDescriptor void Validate(const WorkloadInfo& workloadInfo) const; }; +struct PreCompiledQueueDescriptor : QueueDescriptorWithParameters +{ + PreCompiledQueueDescriptor() + : m_PreCompiledObject(nullptr) + { + } + + std::shared_ptr m_PreCompiledObject; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + } //namespace armnn diff --git a/src/backends/backendsCommon/WorkloadDataFwd.hpp b/src/backends/backendsCommon/WorkloadDataFwd.hpp index 9ae20e0ce1..9fbd81b326 100644 --- a/src/backends/backendsCommon/WorkloadDataFwd.hpp +++ b/src/backends/backendsCommon/WorkloadDataFwd.hpp @@ -23,5 +23,6 @@ struct MultiplicationQueueDescriptor; struct BatchNormalizationQueueDescriptor; struct FakeQuantizationQueueDescriptor; struct ReshapeQueueDescriptor; +struct PreCompiledQueueDescriptor; -} // namespace armnn \ No newline at end of file +} // namespace armnn diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 209ba6a4ed..0f015bd540 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -193,14 +193,13 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, case LayerType::Debug: { auto cLayer = boost::polymorphic_downcast(&layer); - const DebugDescriptor& descriptor = cLayer->GetParameters(); const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); result = layerSupportObject->IsDebugSupported(OverrideDataType(input, dataType), OverrideDataType(output, dataType), - descriptor, + cLayer->GetParameters(), reason); break; } @@ -577,6 +576,15 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, reason); break; } + case LayerType::PreCompiled: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = layerSupportObject->IsPreCompiledSupported(OverrideDataType(input, dataType), + cLayer->GetParameters(), + reason); + break; + } case LayerType::Division: { const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp index aee9f91b56..d516698d3f 100644 --- a/src/backends/backendsCommon/WorkloadFactory.hpp +++ b/src/backends/backendsCommon/WorkloadFactory.hpp @@ -159,6 +159,9 @@ public: virtual std::unique_ptr CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; }; } //namespace armnn diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 8107176210..7edd93e1d6 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -28,6 +28,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources OptimizedNetworkTests.cpp PermuteTestImpl.hpp Pooling2dTestImpl.hpp + PreCompiledTestImpl.cpp + PreCompiledTestImpl.hpp QuantizeHelper.hpp ReshapeTestImpl.hpp RuntimeTestImpl.hpp diff --git a/src/backends/backendsCommon/test/DebugTestImpl.hpp b/src/backends/backendsCommon/test/DebugTestImpl.hpp index e0f8a35d0a..d112054198 100644 --- a/src/backends/backendsCommon/test/DebugTestImpl.hpp +++ b/src/backends/backendsCommon/test/DebugTestImpl.hpp @@ -64,15 +64,15 @@ LayerTestResult DebugTestImpl( outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), input.data()); - + std::ostringstream oss; std::streambuf* coutStreambuf = std::cout.rdbuf(); std::cout.rdbuf(oss.rdbuf()); ExecuteWorkload(*workload, memoryManager); - + std::cout.rdbuf(coutStreambuf); - + BOOST_TEST(oss.str() == expectedStringOutput); CopyDataFromITensorHandle(ret.output.data(), outputHandle.get()); diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index 78716efaaf..edc58cf514 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -372,6 +372,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Permute) DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) +DECLARE_LAYER_POLICY_2_PARAM(PreCompiled) + DECLARE_LAYER_POLICY_1_PARAM(Division) DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp old mode 100755 new mode 100644 index 8e4596b703..0bf56e2445 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -37,6 +37,7 @@ #include "StridedSliceTestImpl.hpp" #include "NormTestImpl.hpp" #include "PermuteTestImpl.hpp" +#include "PreCompiledTestImpl.hpp" #include "LstmTestImpl.hpp" #include "ConvertFp16ToFp32TestImpl.hpp" #include "ConvertFp32ToFp16TestImpl.hpp" @@ -8567,3 +8568,38 @@ LayerTestResult Debug1DUint8Test( { return Debug1DTest(workloadFactory, memoryManager); } + +LayerTestResult PreCompiledConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledConvolution2dTestImpl(workloadFactory, memoryManager); +} + +LayerTestResult PreCompiledConvolution2dStride2x2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledConvolution2dStride2x2TestImpl(workloadFactory, memoryManager); +} + +LayerTestResult PreCompiledDepthwiseConvolution2dTest( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + return PreCompiledDepthwiseConvolution2dTestImpl(workloadFactory, memoryManager); +} + +LayerTestResult PreCompiledDepthwiseConvolution2dStride2x2Test( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + return PreCompiledDepthwiseConvolution2dStride2x2TestImpl(workloadFactory, memoryManager); +} + +LayerTestResult PreCompiledMaxPooling2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledMaxPooling2dTestImpl(workloadFactory, memoryManager); +} diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 98c0806ddf..744470db49 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -1317,3 +1317,55 @@ LayerTestResult Debug2DUint8Test( LayerTestResult Debug1DUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult PreCompiledConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult PreCompiledConvolution2dStride2x2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult PreCompiledDepthwiseConvolution2dTest( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult PreCompiledDepthwiseConvolution2dStride2x2Test( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult PreCompiledMaxPooling2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug4DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug3DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug2DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug1DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug4DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug3DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug2DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult Debug1DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp new file mode 100644 index 0000000000..5a2bba1375 --- /dev/null +++ b/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp @@ -0,0 +1,491 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PreCompiledTestImpl.hpp" + +#include "TensorCopyUtils.hpp" + +#include +#include +#include + +#include +#include + +#include + +#include + +#include + +using namespace armnn; + +namespace +{ + +template +struct PreCompiledConvolutionHelper +{ +}; + +template<> +struct PreCompiledConvolutionHelper +{ + static IConnectableLayer* AddConvolutionLayerToNetwork( + Network& network, + Convolution2dDescriptor descriptor, + const ConstTensor& weights, + const ConstTensor& biases) + { + return network.AddConvolution2dLayer(descriptor, weights, biases, "convolution"); + } +}; + +template<> +struct PreCompiledConvolutionHelper +{ + static IConnectableLayer* AddConvolutionLayerToNetwork( + Network& network, + DepthwiseConvolution2dDescriptor descriptor, + const ConstTensor& weights, + const ConstTensor& biases) + { + return network.AddDepthwiseConvolution2dLayer(descriptor, weights, biases, "depthwiseConvolution"); + } +}; + +template +ConvolutionDescriptor CreateConvolutionDescriptor(unsigned int stride, unsigned int padding) +{ + ConvolutionDescriptor descriptor; + + descriptor.m_StrideX = stride; + descriptor.m_StrideY = stride; + descriptor.m_PadLeft = padding; + descriptor.m_PadRight = padding; + descriptor.m_PadTop = padding; + descriptor.m_PadBottom = padding; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = DataLayout::NHWC; + + return descriptor; +} + +static std::vector CreateIdentityConvolutionKernel( + unsigned int kernelSize, unsigned int channels) +{ + BOOST_ASSERT(kernelSize % 2 == 1); // kernelSize need to be an odd number + + const unsigned int numElements = channels * (kernelSize * kernelSize); + std::vector kernel(numElements, 0u); + + unsigned int centerIndex = kernelSize / 2; + for(unsigned int y = 0u; y < kernelSize; y++) + { + for(unsigned int x = 0u; x < kernelSize; x++) + { + for(unsigned int channel = 0u; channel < channels; channel++) + { + if (x == centerIndex && y == centerIndex) + { + const unsigned int flatIndex = + (y * kernelSize * channels) + (x * channels) + channel; + + kernel[flatIndex] = 1u; + } + } + } + } + + return kernel; +} + +template +std::vector GetIdentityConvolutionExpectedOutputData( + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const ConvolutionDescriptor& descriptor, + const std::vector& inputData) +{ + const unsigned int outputDataSize = outputInfo.GetNumElements(); + std::vector expectedOutputData(outputDataSize); + + const unsigned int channels = outputInfo.GetShape()[3]; + BOOST_ASSERT(channels == inputInfo.GetShape()[3]); + + const unsigned int inputW = inputInfo.GetShape()[2]; + + const unsigned int outputH = outputInfo.GetShape()[1]; + const unsigned int outputW = outputInfo.GetShape()[2]; + + // Pick values from the input buffer, but after each iteration skip a number of + // rows and columns equal to the stride in the respective dimension + for (unsigned int inputY = 0, outputY = 0; outputY < outputH; inputY += descriptor.m_StrideY, outputY++) + { + for (unsigned int inputX = 0, outputX = 0; outputX < outputW; inputX += descriptor.m_StrideX, outputX++) + { + for (unsigned int channel = 0u; channel < channels; channel++) + { + const unsigned int inputIndex = + (inputY * inputW * channels) + (inputX * channels) + channel; + const unsigned int outputIndex = + (outputY * outputW * channels) + (outputX * channels) + channel; + + expectedOutputData[outputIndex] = inputData[inputIndex]; + } + } + } + + return expectedOutputData; +} + +armnn::PreCompiledLayer* FindPreCompiledLayer(armnn::Graph& optimisedGraph) +{ + for (auto& layer : optimisedGraph) + { + if (layer->GetType() == armnn::LayerType::PreCompiled) + { + return boost::polymorphic_pointer_downcast(layer); + } + } + + // No pre-compiled layer found + return nullptr; +} + +// NOTE: This only supports a single input and a single output +LayerTestResult OptimiseAndRunNetwork(armnn::IWorkloadFactory& workloadFactory, + Network& net, + TensorInfo inputInfo, + std::vector inputData, + TensorInfo outputInfo, + std::vector expectedOutputData) +{ + // Optimize the network for the backend supported by the factory + std::vector backends = {workloadFactory.GetBackendId()}; + IRuntimePtr runtime(IRuntime::Create(IRuntime::CreationOptions())); + IOptimizedNetworkPtr optimizedNet = Optimize(net, backends, runtime->GetDeviceSpec(), OptimizerOptions()); + if (!optimizedNet) + { + throw RuntimeException(std::string("Failed to optimize network for ") + std::string(backends[0]), + CHECK_LOCATION()); + } + + // Find the pre-compiled layer in the optimised graph + Graph& optimisedGraph = static_cast(optimizedNet.get())->GetGraph(); + PreCompiledLayer* preCompiledLayer = FindPreCompiledLayer(optimisedGraph); + if (!preCompiledLayer) + { + throw RuntimeException("Could not find pre-compiled layer in optimised graph", CHECK_LOCATION()); + } + + // Create the tensor handles + for (auto&& layer : optimisedGraph.TopologicalSort()) + { + layer->CreateTensorHandles(optimisedGraph, workloadFactory); + } + + // Create the pre-compiled workload + auto workload = preCompiledLayer->CreateWorkload(optimisedGraph, workloadFactory); + + // Set the input data + boost::multi_array input = MakeTensor(inputInfo, inputData); + const QueueDescriptor& workloadData = + static_cast*>(workload.get())->GetData(); + CopyDataToITensorHandle(workloadData.m_Inputs[0], &input[0][0][0][0]); + + // Execute the workload + workload->Execute(); + + // Set the expected and actual outputs + LayerTestResult result(outputInfo); + result.outputExpected = MakeTensor(outputInfo, expectedOutputData); + CopyDataFromITensorHandle(&result.output[0][0][0][0], workloadData.m_Outputs[0]); + return result; +} + +} // anonymous namespace + +template +LayerTestResult PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + unsigned int inputSize, + unsigned int outputSize, + unsigned int channels, + unsigned int kernelSize, + const ConvolutionDescriptor& descriptor, + bool isDepthwiseConvolution = false) +{ + BOOST_ASSERT(descriptor.m_BiasEnabled == true); + BOOST_ASSERT(descriptor.m_DataLayout == DataLayout::NHWC); + + // Set up tensor shapes and infos + const TensorShape inputShape ({1, inputSize, inputSize, channels}); + const TensorShape outputShape({1, outputSize, outputSize, channels}); + const TensorShape kernelShape = isDepthwiseConvolution + // The format for the depthwise convolution is MIHW + ? TensorShape({1, channels, kernelSize, kernelSize}) + // The format for the regular convolution depends on the layout of the inputs, + // in this case is NHWC + : TensorShape({1, kernelSize, kernelSize, channels}); + const TensorShape biasesShape({1, 1, 1, channels}); + + // NOTE: inputScale * weightsScale / outputScale must be >= 0.0 and < 1.0 + TensorInfo inputInfo(inputShape, DataType::QuantisedAsymm8, 1.0f, 0); + TensorInfo outputInfo(outputShape, DataType::QuantisedAsymm8, 2.0f, 0); + TensorInfo weightsInfo(kernelShape, DataType::QuantisedAsymm8, 1.0f, 0); + TensorInfo biasesInfo(biasesShape, DataType::Signed32, 1.0f, 0); + + // Populate weight and bias data + std::vector weightsData = CreateIdentityConvolutionKernel(kernelSize, channels); + + // NOTE: We need to multiply the elements of the identity kernel by 2 + // to compensate for the scaling factor + std::transform(weightsData.begin(), weightsData.end(), weightsData.begin(), + [](uint8_t w) -> uint8_t { return static_cast(w * 2); }); + + const unsigned int biasDataSize = biasesInfo.GetNumElements(); + std::vector biasesData(biasDataSize, 0); + + // Construct network + Network network; + ConstTensor weights(weightsInfo, weightsData); + ConstTensor biases(biasesInfo, biasesData); + + IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input"); + + IConnectableLayer* const convolutionLayer = + PreCompiledConvolutionHelper + ::AddConvolutionLayerToNetwork(network, descriptor, weights, biases); + + IConnectableLayer* const outputLayer = network.AddOutputLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convolutionLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + convolutionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convolutionLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // Generate input data: sequence [0, 1 .. 255] + const unsigned int inputDataSize = inputInfo.GetNumElements(); + std::vector inputData(inputDataSize); + std::iota(inputData.begin(), inputData.end(), 0); + + // Set expected output + std::vector expectedOutputData = + GetIdentityConvolutionExpectedOutputData(inputInfo, + outputInfo, + descriptor, + inputData); + + return OptimiseAndRunNetwork(workloadFactory, + network, + inputInfo, + inputData, + outputInfo, + expectedOutputData); +} + +LayerTestResult PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 16; + const unsigned int channels = 1; + const unsigned int kernelSize = 3; + const unsigned int stride = 1; + const unsigned int padding = 1; + + Convolution2dDescriptor descriptor = + CreateConvolutionDescriptor(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult PreCompiledConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 8; + const unsigned int channels = 1; + const unsigned int kernelSize = 3; + const unsigned int stride = 2; + const unsigned int padding = 1; + + Convolution2dDescriptor descriptor = + CreateConvolutionDescriptor(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult PreCompiledDepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 16; + const unsigned int channels = 3; + const unsigned int kernelSize = 1; + const unsigned int stride = 1; + const unsigned int padding = 0; + + DepthwiseConvolution2dDescriptor descriptor = + CreateConvolutionDescriptor(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor, + true); +} + +LayerTestResult PreCompiledDepthwiseConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 8; + const unsigned int channels = 3; + const unsigned int kernelSize = 3; + const unsigned int stride = 2; + const unsigned int padding = 1; + + DepthwiseConvolution2dDescriptor descriptor = + CreateConvolutionDescriptor(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult PreCompiledMaxPooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + // Pooling cannot be run in isolation, it must be fused with the previous layer, e.g. Convolution2d. + + // Set up the Convolution descriptor + Convolution2dDescriptor convDescriptor; + convDescriptor.m_StrideX = 1; + convDescriptor.m_StrideY = 1; + convDescriptor.m_BiasEnabled = true; + convDescriptor.m_DataLayout = DataLayout::NHWC; + + // Set up the Convolution weights + TensorInfo weightsInfo(TensorShape({16, 1, 1, 16}), DataType::QuantisedAsymm8, 2.0f, 0); + const unsigned int weightsDataSize = weightsInfo.GetNumElements(); + std::vector weightsData(weightsDataSize); + for (unsigned int i = 0; i < 16; ++i) + { + for (unsigned int j = 0; j < 16; ++j) + { + weightsData[(i * 16) + j] = i == j ? 1.0f : 0.0f; + } + } + ConstTensor weights(weightsInfo, weightsData); + + // Set up the Convolution biases + TensorInfo biasInfo(TensorShape({1, 1, 1, 16}), DataType::Signed32, 1.0f * 2.0f, 0); + const unsigned int biasDataSize = biasInfo.GetNumElements(); + std::vector biasData(biasDataSize, 0); + ConstTensor biases(biasInfo, biasData); + + // Set up the Convolution input + TensorInfo inputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 1.0f, 0); + const unsigned int inputDataSize = inputInfo.GetNumElements(); + std::vector inputData(inputDataSize); + for (unsigned int i = 0; i < inputDataSize; ++i) + { + inputData[i] = boost::numeric_cast((i * 4) % 250); + } + + // Set up the Convolution output / Pooling input info + TensorInfo convOutputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 4.0f, 0); + + // Set up the Pooling descriptor + Pooling2dDescriptor poolDescriptor; + poolDescriptor.m_PoolType = PoolingAlgorithm::Max; + poolDescriptor.m_PoolWidth = 2; + poolDescriptor.m_PoolHeight = 2; + poolDescriptor.m_StrideX = 2; + poolDescriptor.m_StrideY = 2; + poolDescriptor.m_PaddingMethod = PaddingMethod::Exclude; + poolDescriptor.m_DataLayout = DataLayout::NHWC; + + // Set the expected output from the Pooling layer + TensorInfo outputInfo(TensorShape({1, 8, 8, 16 }), DataType::QuantisedAsymm8, 4.0f, 0); + const unsigned int outputDataSize = outputInfo.GetNumElements(); + std::vector expectedOutputData(outputDataSize); + // The Maxpooling inputs are the Convolution outputs, i.e. (Convolution inputs / 2) after scale adjustments + // Maxpooling selects the max value in each pool from its inputs and our pool size is 2x2 + for (unsigned int channel = 0; channel < 16; ++channel) + { + for (unsigned int row = 0; row < 8; ++row) + { + for (unsigned int column = 0; column < 8; ++column) + { + // The input and output data indexes are calculated for NHWC data layout. + // Output index: (row * columns * channels) + (column * channels) + channel + auto outIndex = (row * 8 * 16) + (column * 16) + channel; + // Input index: (row * strideY * columns * channels) + (column * strideX * channels) + channel + // and we take 4 entries for the 2x2 pool + auto in0Index = ((row * 2) * 16 * 16) + ((column * 2) * 16) + channel; + auto in1Index = ((row * 2) * 16 * 16) + (((column * 2) + 1) * 16) + channel; + auto in2Index = (((row * 2) + 1) * 16 * 16) + ((column * 2) * 16) + channel; + auto in3Index = (((row * 2) + 1) * 16 * 16) + (((column * 2) + 1) * 16) + channel; + // output value is the maximum of the input pool values, adjusted for the quantization scale change + auto maxIn = std::max({inputData[in0Index], + inputData[in1Index], + inputData[in2Index], + inputData[in3Index]}); + expectedOutputData[outIndex] = maxIn / 2; + } + } + } + + // Construct the network + Network net; + IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input"); + IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDescriptor, weights, biases, "conv"); + IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(poolDescriptor, "pooling2d"); + IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output"); + + // Connect the layers + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(convOutputInfo); + poolingLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + return OptimiseAndRunNetwork(workloadFactory, + net, + inputInfo, + inputData, + outputInfo, + expectedOutputData); +} diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp new file mode 100644 index 0000000000..f4e78b6002 --- /dev/null +++ b/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "LayerTests.hpp" + +LayerTestResult PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult PreCompiledConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult PreCompiledDepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult PreCompiledDepthwiseConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult PreCompiledMaxPooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 2b82c185f0..2f9dfa9755 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -63,6 +63,12 @@ ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const return IBackendContextPtr{new ClBackendContext{options}}; } +IBackendInternal::ISubGraphConverterPtr ClBackend::CreateSubGraphConverter( + const std::shared_ptr& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations ClBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index ef98da08a4..84b5b9a9d2 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index af47f65d29..28011cfd7b 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -356,4 +356,10 @@ std::unique_ptr ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescri return MakeWorkload(descriptor, info); } +std::unique_ptr ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + } // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 85cbd91e11..286e897472 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -150,6 +150,9 @@ public: virtual std::unique_ptr CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: template static std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp index 93d8dd5662..3cd9af7910 100644 --- a/src/backends/cl/test/ClMemCopyTests.cpp +++ b/src/backends/cl/test/ClMemCopyTests.cpp @@ -17,25 +17,29 @@ BOOST_AUTO_TEST_SUITE(ClMemCopy) BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index fd2b766500..ce97a1d03c 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -63,6 +63,12 @@ IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRu return IBackendContextPtr{}; } +IBackendInternal::ISubGraphConverterPtr NeonBackend::CreateSubGraphConverter( + const std::shared_ptr& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations NeonBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 127a5a4b62..3b1d186a06 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 65093fb593..3728c86a66 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -324,4 +324,10 @@ std::unique_ptr NeonWorkloadFactory::CreateRsqrt(const RsqrtQueueDesc return MakeWorkloadHelper(descriptor, info); } +std::unique_ptr NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkloadHelper(descriptor, info); +} + } // namespace armnn diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 9a6308871a..68317ed651 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -151,6 +151,9 @@ public: virtual std::unique_ptr CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: mutable std::shared_ptr m_MemoryManager; }; diff --git a/src/backends/neon/test/NeonMemCopyTests.cpp b/src/backends/neon/test/NeonMemCopyTests.cpp index f6699a61ba..dbe1f8da3f 100644 --- a/src/backends/neon/test/NeonMemCopyTests.cpp +++ b/src/backends/neon/test/NeonMemCopyTests.cpp @@ -18,25 +18,29 @@ BOOST_AUTO_TEST_SUITE(NeonMemCopy) BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) { - LayerTestResult result = MemCopyTest(false); + LayerTestResult result = + MemCopyTest(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) { - LayerTestResult result = MemCopyTest(true); + LayerTestResult result = + MemCopyTest(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/reference/RefBackend.cpp b/src/backends/reference/RefBackend.cpp index 8f5e9c4d5e..e4f468c15e 100644 --- a/src/backends/reference/RefBackend.cpp +++ b/src/backends/reference/RefBackend.cpp @@ -56,6 +56,12 @@ IBackendInternal::IMemoryManagerUniquePtr RefBackend::CreateMemoryManager() cons return IMemoryManagerUniquePtr{}; } +IBackendInternal::ISubGraphConverterPtr RefBackend::CreateSubGraphConverter( + const std::shared_ptr& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations RefBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index 1a0aef58c4..51366221c7 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 79293635fb..361a3f1f74 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -318,4 +318,10 @@ std::unique_ptr RefWorkloadFactory::CreateRsqrt(const RsqrtQueueDescr return MakeWorkload(descriptor, info); } -} // namespace armnn \ No newline at end of file +std::unique_ptr RefWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +} // namespace armnn diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp index f6707f5046..432ac72c6e 100644 --- a/src/backends/reference/RefWorkloadFactory.hpp +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -167,6 +167,10 @@ public: virtual std::unique_ptr CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: template diff --git a/src/backends/reference/workloads/Debug.cpp b/src/backends/reference/workloads/Debug.cpp index dfcbbd8e97..cc83c7b4ee 100644 --- a/src/backends/reference/workloads/Debug.cpp +++ b/src/backends/reference/workloads/Debug.cpp @@ -98,4 +98,4 @@ template void Debug(const TensorInfo& inputInfo, const DebugDescriptor& descriptor, const uint8_t* inputData, uint8_t* outputData); -} //namespace armnn +} // namespace armnn diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp index 17eb8fc143..d9a47c0596 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.cpp +++ b/src/backends/reference/workloads/RefDebugWorkload.cpp @@ -2,11 +2,12 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #include "RefDebugWorkload.hpp" #include "Debug.hpp" - #include "RefWorkloadUtils.hpp" -#include "TypeUtils.hpp" + +#include namespace armnn { @@ -30,4 +31,4 @@ void RefDebugWorkload::Execute() const template class RefDebugWorkload; template class RefDebugWorkload; -} //namespace armnn +} // namespace armnn diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp index a1231f92d3..c1a3e26ec2 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.hpp +++ b/src/backends/reference/workloads/RefDebugWorkload.hpp @@ -2,12 +2,13 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#pragma once -#include +#pragma once #include +#include + namespace armnn { @@ -30,4 +31,4 @@ public: using RefDebugFloat32Workload = RefDebugWorkload; using RefDebugUint8Workload = RefDebugWorkload; -} //namespace armnn +} // namespace armnn diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index cfddc38a99..7d6aafcfbf 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -463,9 +463,9 @@ int main(int argc, const char* argv[]) "as they are expected to be defined in the file for each test in particular.") ("concurrent,n", po::bool_switch()->default_value(false), "Whether or not the test cases should be executed in parallel") - ("model-format,f", po::value(&modelFormat), + ("model-format,f", po::value(&modelFormat)->required(), "caffe-binary, caffe-text, onnx-binary, onnx-text, tflite-binary, tensorflow-binary or tensorflow-text.") - ("model-path,m", po::value(&modelPath), "Path to model file, e.g. .caffemodel, .prototxt," + ("model-path,m", po::value(&modelPath)->required(), "Path to model file, e.g. .caffemodel, .prototxt," " .tflite, .onnx") ("compute,c", po::value>()->multitoken(), backendsMessage.c_str()) diff --git a/tests/InferenceTest.cpp b/tests/InferenceTest.cpp index 8733bc53a0..7413de97dd 100644 --- a/tests/InferenceTest.cpp +++ b/tests/InferenceTest.cpp @@ -34,8 +34,6 @@ bool ParseCommandLine(int argc, char** argv, IInferenceTestCaseProvider& testCas { namespace po = boost::program_options; - std::string computeDeviceStr; - po::options_description desc("Options"); try diff --git a/tests/TfLiteVGG16Quantized-Armnn/TfLiteVGG16Quantized-Armnn.cpp b/tests/TfLiteVGG16Quantized-Armnn/TfLiteVGG16Quantized-Armnn.cpp index e23dbdc9d4..84d5292195 100644 --- a/tests/TfLiteVGG16Quantized-Armnn/TfLiteVGG16Quantized-Armnn.cpp +++ b/tests/TfLiteVGG16Quantized-Armnn/TfLiteVGG16Quantized-Armnn.cpp @@ -23,7 +23,7 @@ int main(int argc, char* argv[]) {"shark.jpg", 669}, }; - armnn::TensorShape inputTensorShape({ 2, 224, 224, 3 }); + armnn::TensorShape inputTensorShape({ 1, 224, 224, 3 }); using DataType = uint8_t; using DatabaseType = ImagePreprocessor; @@ -34,11 +34,11 @@ int main(int argc, char* argv[]) retVal = armnn::test::ClassifierInferenceTestMain( argc, argv, - "vgg_16_u8.tflite", // model name - true, // model is binary - "content_vgg/concat", // input tensor name - "content_vgg/prob", // output tensor name - { 0, 1, 2 }, // test images to test with as above + "vgg_16_u8_batch1.tflite", // model name + true, // model is binary + "content_vgg/concat", // input tensor name + "content_vgg/prob", // output tensor name + { 0, 1, 2 }, // test images to test with as above [&imageSet](const char* dataDir, const ModelType & model) { // we need to get the input quantization parameters from // the parsed model @@ -53,7 +53,7 @@ int main(int argc, char* argv[]) {{0, 0, 0}}, {{1, 1, 1}}, DatabaseType::DataFormat::NCHW, - 2); + 1); }, &inputTensorShape); } -- cgit v1.2.1