diff options
Diffstat (limited to 'src/armnn/Network.cpp')
-rw-r--r-- | src/armnn/Network.cpp | 339 |
1 files changed, 298 insertions, 41 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 0a5325c2a4..f510207c06 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -5,16 +5,21 @@ #include "Network.hpp" #include "Graph.hpp" #include "Layer.hpp" +#include "DeviceSpec.hpp" #include "backends/CpuTensorHandle.hpp" #include "backends/WorkloadFactory.hpp" #include "Optimizer.hpp" +#include "armnn/Exceptions.hpp" #include <armnn/Utils.hpp> +#include <armnn/TypesUtils.hpp> #include <fcntl.h> #include <algorithm> #include <fstream> #include <memory> +#include <vector> +#include <algorithm> #include <boost/assert.hpp> #include <boost/format.hpp> @@ -22,6 +27,8 @@ #include <boost/numeric/conversion/converter_policies.hpp> #include <boost/cast.hpp> +#include "optimizations/All.hpp" + namespace armnn { @@ -62,43 +69,195 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const return m_Graph->SerializeToDot(stream); } -IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const DeviceSpec& deviceSpec) +IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, + const std::vector<armnn::Compute>& backendPreferences, + const IDeviceSpec& deviceSpec, + const OptimizerOptions& options) { + if (backendPreferences.empty()) { + throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); + } const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); - OptimizedNetwork* optNet = new OptimizedNetwork(std::move(graph)); + auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); - Optimizer::Optimize(optNet->GetGraph()); + OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get()); + + // Perform optimisation passes + using namespace optimizations; + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualReshapeSiblings(), + OptimizeInversePermutes(), + MovePermuteUp(), + PermuteAsReshape(), + OptimizeConsecutiveReshapes())); // Infer the tensor infos for all output slots. Throws an exception on failure. - optNet->GetGraph().InferTensorInfos(); + optNetObjPtr->GetGraph().InferTensorInfos(); - // Assign a compute device for all nodes - for (auto&& layer : optNet->GetGraph()) + // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 + if (options.m_ReduceFp32ToFp16) { - DataType dataType = layer->GetDataType(); + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter())); + } + + // We know that DeviceSpec should be the only implementation of IDeviceSpec. + const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec); + + // determine which of the preferred backends we have available for use + // and whether we have specified CpuRef as one of those backends. + bool cpuRefUsed = false; + std::vector<armnn::Compute> availablePreferredBackends; + for (const armnn::Compute& backend : backendPreferences) + { + // Check if the backend is in the available backend devices. + if (std::find(spec.m_SupportedComputeDevices.begin(), + spec.m_SupportedComputeDevices.end(), backend) != + spec.m_SupportedComputeDevices.end()) + { + availablePreferredBackends.push_back(backend); + if (armnn::Compute::CpuRef == backend) { + cpuRefUsed = true; + } + } + } + if (availablePreferredBackends.empty()) { + BOOST_LOG_TRIVIAL(warning) << "None of the preferred backends " << backendPreferences + << " are supported. Current platform provides " << spec.m_SupportedComputeDevices; + return {nullptr, &IOptimizedNetwork::Destroy}; + } - // Default to the user-requested compute device from the Runtime - layer->SetComputeDevice(deviceSpec.DefaultComputeDevice); + auto ReturnWithError = [&](Layer* layer) + { + BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) + << " is not supported on any preferred backend " << backendPreferences; + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + }; - // If the layer is unsupported by this device, fall back to reference + // Assign a compute device for all nodes + for (auto&& layer : optNetObjPtr->GetGraph()) + { + DataType dataType = layer->GetDataType(); std::string reasonIfUnsupported; - if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported)) + bool found = false; + for (const armnn::Compute& backend : availablePreferredBackends) { - BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << - " is not supported on requested backend " << layer->GetComputeDevice() << " (reason: " << - reasonIfUnsupported << "), falling back to CpuRef backend."; - layer->SetComputeDevice(Compute::CpuRef); + // need to set the compute device on the layer + // before we can check if it is supported + layer->SetComputeDevice(backend); + if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported)) + { + if (dataType == DataType::Float16) + { + if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) + && layer->GetType() != LayerType::ConvertFp32ToFp16 + && layer->GetType() != LayerType::ConvertFp16ToFp32) + { + // Insert FP16 -> FP32 conversion layer before current layer + std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers = + InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer); + + // Insert FP32 -> FP16 conversion layer after current layer + std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers = + InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer); + + // Assign a supported backend to the newly introduced conversion layers + auto AssignFirstSupportedBackend = [&](Layer* layer, Compute preferredBackend) + { + bool supportedBackendFound = false; + std::string reasonIfUnsupported; + + // Try preferred backend first + layer->SetComputeDevice(preferredBackend); + if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported)) + { + supportedBackendFound = true; + } + else + { + for (const Compute& backend : availablePreferredBackends) + { + // Skip preferred backend (we already determined that it is not supported) + if (backend == preferredBackend) + { + continue; + } + + layer->SetComputeDevice(backend); + if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported)) + { + supportedBackendFound = true; + break; + } + } + } + + return supportedBackendFound; + }; + + for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnWithError(convertLayer); + } + } + + for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnWithError(convertLayer); + } + } + + found = true; + break; + } + } + BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) + << " is not supported on requested backend " << layer->GetComputeDevice() + << " (reason: " << reasonIfUnsupported + << "), falling back to the next backend."; + } + else + { + found = true; + break; + } } - BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported), - "Layer has no valid compute device"); + // If the layer is unsupported by any devices, log and return a null network. + if (!found) { + // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a + // fallback we should set the compute device on the layer to CpuRef (these are not + // available as accelerated operations, or are only available under certain + // conditions, currently they comprise MemCopy, Constant, Permute) + armnn::LayerType layerType = layer->GetType(); + if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy || + layerType == armnn::LayerType::Constant || + layerType == armnn::LayerType::Permute)) + { + layer->SetComputeDevice(armnn::Compute::CpuRef); + } + else + { + return ReturnWithError(layer); + } + } } - optNet->GetGraph().AddCopyLayers(); + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(), + OptimizeInverseConversionsFp32())); + + optNetObjPtr->GetGraph().AddCopyLayers(); + + // Convert constants + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf())); + Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat())); - return {optNet, &IOptimizedNetwork::Destroy}; + return optNet; } Network::Network() @@ -116,9 +275,9 @@ IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name) } IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const ConstTensor* biases, - const char* name) + const ConstTensor& weights, + const ConstTensor* biases, + const char* name) { if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr)) { @@ -138,24 +297,24 @@ IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescr } IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const char* name) + const ConstTensor& weights, + const char* name) { return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name); } IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name) + const ConstTensor& weights, + const ConstTensor& biases, + const char* name) { return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name); } IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const ConstTensor* biases, - const char* name) + const ConstTensor& weights, + const ConstTensor* biases, + const char* name) { if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr)) { @@ -175,15 +334,15 @@ IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescrip } IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const char* name) + const ConstTensor& weights, + const char* name) { return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name); } IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name) + const ConstTensor& weights, + const ConstTensor& biases, + const char* name) { return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name); } @@ -199,7 +358,8 @@ IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl( throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL"); } - const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name); + const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, + name); layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights); @@ -245,7 +405,8 @@ IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activ return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name); } -IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, +IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor& +normalizationDescriptor, const char* name) { return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name); @@ -301,7 +462,8 @@ IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationD return layer; } -IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDescriptor, const char* name) +IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& +resizeDescriptor, const char* name) { return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name); } @@ -313,10 +475,15 @@ IConnectableLayer* Network::AddL2NormalizationLayer(const char* name) IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name) { - return m_Graph->AddLayer<ConstantLayer>(std::make_shared<ScopedCpuTensorHandle>(input), name); + auto layer = m_Graph->AddLayer<ConstantLayer>(name); + + layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input); + + return layer; } -IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, const char* name) +IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, + const char* name) { return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name); } @@ -326,6 +493,97 @@ IConnectableLayer* Network::AddFloorLayer(const char* name) return m_Graph->AddLayer<FloorLayer>(name); } +IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor, + const LstmInputParams& params, + const char* name) +{ + const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name); + + //Lstm Basic Parameters + layer->m_BasicParameters.m_InputToForgetWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights)); + layer->m_BasicParameters.m_InputToCellWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights)); + layer->m_BasicParameters.m_InputToOutputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights)); + layer->m_BasicParameters.m_RecurrentToForgetWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights)); + layer->m_BasicParameters.m_RecurrentToCellWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights)); + layer->m_BasicParameters.m_RecurrentToOutputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights)); + layer->m_BasicParameters.m_ForgetGateBias = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias)); + layer->m_BasicParameters.m_CellBias = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias)); + layer->m_BasicParameters.m_OutputGateBias = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias)); + + //Lstm Cifg parameters + if(!descriptor.m_CifgEnabled) + { + if(params.m_InputToInputWeights == nullptr) + { + throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL"); + } + if(params.m_RecurrentToInputWeights == nullptr) + { + throw InvalidArgumentException( + "AddLstmLayer: Recurrent To Input Weights cannot be NULL"); + } + if(params.m_InputGateBias == nullptr) + { + throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL"); + } + layer->m_CifgParameters.m_InputToInputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights)); + layer->m_CifgParameters.m_RecurrentToInputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights)); + // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not. + if(params.m_CellToInputWeights != nullptr) + { + layer->m_CifgParameters.m_CellToInputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights)); + } + layer->m_CifgParameters.m_InputGateBias = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias)); + } + + //Lstm projection parameters + if(descriptor.m_ProjectionEnabled) + { + if(params.m_ProjectionWeights == nullptr) + { + throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL"); + } + layer->m_ProjectionParameters.m_ProjectionWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights)); + if(params.m_ProjectionBias != nullptr) + { + layer->m_ProjectionParameters.m_ProjectionBias = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias)); + } + } + + //Lstm Peephole params + if(descriptor.m_PeepholeEnabled) + { + if(params.m_CellToForgetWeights == nullptr) + { + throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL"); + } + if(params.m_CellToOutputWeights == nullptr) + { + throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL"); + } + layer->m_PeepholeParameters.m_CellToForgetWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights)); + layer->m_PeepholeParameters.m_CellToOutputWeights = + std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights)); + } + return layer; +} + OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph) : m_Graph(std::move(graph)) { @@ -336,4 +594,3 @@ OptimizedNetwork::~OptimizedNetwork() } } // namespace armnn - |