aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/Network.cpp
diff options
context:
space:
mode:
authortelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
committertelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
commitc577f2c6a3b4ddb6ba87a882723c53a248afbeba (patch)
treebd7d4c148df27f8be6649d313efb24f536b7cf34 /src/armnn/Network.cpp
parent4c7098bfeab1ffe1cdc77f6c15548d3e73274746 (diff)
downloadarmnn-c577f2c6a3b4ddb6ba87a882723c53a248afbeba.tar.gz
Release 18.08
Diffstat (limited to 'src/armnn/Network.cpp')
-rw-r--r--src/armnn/Network.cpp339
1 files changed, 298 insertions, 41 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 0a5325c2a4..f510207c06 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -5,16 +5,21 @@
#include "Network.hpp"
#include "Graph.hpp"
#include "Layer.hpp"
+#include "DeviceSpec.hpp"
#include "backends/CpuTensorHandle.hpp"
#include "backends/WorkloadFactory.hpp"
#include "Optimizer.hpp"
+#include "armnn/Exceptions.hpp"
#include <armnn/Utils.hpp>
+#include <armnn/TypesUtils.hpp>
#include <fcntl.h>
#include <algorithm>
#include <fstream>
#include <memory>
+#include <vector>
+#include <algorithm>
#include <boost/assert.hpp>
#include <boost/format.hpp>
@@ -22,6 +27,8 @@
#include <boost/numeric/conversion/converter_policies.hpp>
#include <boost/cast.hpp>
+#include "optimizations/All.hpp"
+
namespace armnn
{
@@ -62,43 +69,195 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
return m_Graph->SerializeToDot(stream);
}
-IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const DeviceSpec& deviceSpec)
+IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
+ const std::vector<armnn::Compute>& backendPreferences,
+ const IDeviceSpec& deviceSpec,
+ const OptimizerOptions& options)
{
+ if (backendPreferences.empty()) {
+ throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
+ }
const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
- OptimizedNetwork* optNet = new OptimizedNetwork(std::move(graph));
+ auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
- Optimizer::Optimize(optNet->GetGraph());
+ OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+
+ // Perform optimisation passes
+ using namespace optimizations;
+ Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(),
+ SquashEqualReshapeSiblings(),
+ OptimizeInversePermutes(),
+ MovePermuteUp(),
+ PermuteAsReshape(),
+ OptimizeConsecutiveReshapes()));
// Infer the tensor infos for all output slots. Throws an exception on failure.
- optNet->GetGraph().InferTensorInfos();
+ optNetObjPtr->GetGraph().InferTensorInfos();
- // Assign a compute device for all nodes
- for (auto&& layer : optNet->GetGraph())
+ // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
+ if (options.m_ReduceFp32ToFp16)
{
- DataType dataType = layer->GetDataType();
+ Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter()));
+ }
+
+ // We know that DeviceSpec should be the only implementation of IDeviceSpec.
+ const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
+
+ // determine which of the preferred backends we have available for use
+ // and whether we have specified CpuRef as one of those backends.
+ bool cpuRefUsed = false;
+ std::vector<armnn::Compute> availablePreferredBackends;
+ for (const armnn::Compute& backend : backendPreferences)
+ {
+ // Check if the backend is in the available backend devices.
+ if (std::find(spec.m_SupportedComputeDevices.begin(),
+ spec.m_SupportedComputeDevices.end(), backend) !=
+ spec.m_SupportedComputeDevices.end())
+ {
+ availablePreferredBackends.push_back(backend);
+ if (armnn::Compute::CpuRef == backend) {
+ cpuRefUsed = true;
+ }
+ }
+ }
+ if (availablePreferredBackends.empty()) {
+ BOOST_LOG_TRIVIAL(warning) << "None of the preferred backends " << backendPreferences
+ << " are supported. Current platform provides " << spec.m_SupportedComputeDevices;
+ return {nullptr, &IOptimizedNetwork::Destroy};
+ }
- // Default to the user-requested compute device from the Runtime
- layer->SetComputeDevice(deviceSpec.DefaultComputeDevice);
+ auto ReturnWithError = [&](Layer* layer)
+ {
+ BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
+ << " is not supported on any preferred backend " << backendPreferences;
+ return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
+ };
- // If the layer is unsupported by this device, fall back to reference
+ // Assign a compute device for all nodes
+ for (auto&& layer : optNetObjPtr->GetGraph())
+ {
+ DataType dataType = layer->GetDataType();
std::string reasonIfUnsupported;
- if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
+ bool found = false;
+ for (const armnn::Compute& backend : availablePreferredBackends)
{
- BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) <<
- " is not supported on requested backend " << layer->GetComputeDevice() << " (reason: " <<
- reasonIfUnsupported << "), falling back to CpuRef backend.";
- layer->SetComputeDevice(Compute::CpuRef);
+ // need to set the compute device on the layer
+ // before we can check if it is supported
+ layer->SetComputeDevice(backend);
+ if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
+ {
+ if (dataType == DataType::Float16)
+ {
+ if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
+ && layer->GetType() != LayerType::ConvertFp32ToFp16
+ && layer->GetType() != LayerType::ConvertFp16ToFp32)
+ {
+ // Insert FP16 -> FP32 conversion layer before current layer
+ std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
+ InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
+
+ // Insert FP32 -> FP16 conversion layer after current layer
+ std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
+ InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
+
+ // Assign a supported backend to the newly introduced conversion layers
+ auto AssignFirstSupportedBackend = [&](Layer* layer, Compute preferredBackend)
+ {
+ bool supportedBackendFound = false;
+ std::string reasonIfUnsupported;
+
+ // Try preferred backend first
+ layer->SetComputeDevice(preferredBackend);
+ if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
+ {
+ supportedBackendFound = true;
+ }
+ else
+ {
+ for (const Compute& backend : availablePreferredBackends)
+ {
+ // Skip preferred backend (we already determined that it is not supported)
+ if (backend == preferredBackend)
+ {
+ continue;
+ }
+
+ layer->SetComputeDevice(backend);
+ if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
+ {
+ supportedBackendFound = true;
+ break;
+ }
+ }
+ }
+
+ return supportedBackendFound;
+ };
+
+ for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
+ {
+ if (!AssignFirstSupportedBackend(convertLayer, backend))
+ {
+ return ReturnWithError(convertLayer);
+ }
+ }
+
+ for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
+ {
+ if (!AssignFirstSupportedBackend(convertLayer, backend))
+ {
+ return ReturnWithError(convertLayer);
+ }
+ }
+
+ found = true;
+ break;
+ }
+ }
+ BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
+ << " is not supported on requested backend " << layer->GetComputeDevice()
+ << " (reason: " << reasonIfUnsupported
+ << "), falling back to the next backend.";
+ }
+ else
+ {
+ found = true;
+ break;
+ }
}
- BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported),
- "Layer has no valid compute device");
+ // If the layer is unsupported by any devices, log and return a null network.
+ if (!found) {
+ // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
+ // fallback we should set the compute device on the layer to CpuRef (these are not
+ // available as accelerated operations, or are only available under certain
+ // conditions, currently they comprise MemCopy, Constant, Permute)
+ armnn::LayerType layerType = layer->GetType();
+ if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy ||
+ layerType == armnn::LayerType::Constant ||
+ layerType == armnn::LayerType::Permute))
+ {
+ layer->SetComputeDevice(armnn::Compute::CpuRef);
+ }
+ else
+ {
+ return ReturnWithError(layer);
+ }
+ }
}
- optNet->GetGraph().AddCopyLayers();
+ Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
+ OptimizeInverseConversionsFp32()));
+
+ optNetObjPtr->GetGraph().AddCopyLayers();
+
+ // Convert constants
+ Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf()));
+ Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat()));
- return {optNet, &IOptimizedNetwork::Destroy};
+ return optNet;
}
Network::Network()
@@ -116,9 +275,9 @@ IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
}
IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
- const ConstTensor& weights,
- const ConstTensor* biases,
- const char* name)
+ const ConstTensor& weights,
+ const ConstTensor* biases,
+ const char* name)
{
if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr))
{
@@ -138,24 +297,24 @@ IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescr
}
IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
- const ConstTensor& weights,
- const char* name)
+ const ConstTensor& weights,
+ const char* name)
{
return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name);
}
IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
- const ConstTensor& weights,
- const ConstTensor& biases,
- const char* name)
+ const ConstTensor& weights,
+ const ConstTensor& biases,
+ const char* name)
{
return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name);
}
IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
- const ConstTensor& weights,
- const ConstTensor* biases,
- const char* name)
+ const ConstTensor& weights,
+ const ConstTensor* biases,
+ const char* name)
{
if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
{
@@ -175,15 +334,15 @@ IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescrip
}
IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
- const ConstTensor& weights,
- const char* name)
+ const ConstTensor& weights,
+ const char* name)
{
return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
}
IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
- const ConstTensor& weights,
- const ConstTensor& biases,
- const char* name)
+ const ConstTensor& weights,
+ const ConstTensor& biases,
+ const char* name)
{
return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
}
@@ -199,7 +358,8 @@ IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
}
- const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
+ const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
+ name);
layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
@@ -245,7 +405,8 @@ IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activ
return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
}
-IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
+IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
+normalizationDescriptor,
const char* name)
{
return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
@@ -301,7 +462,8 @@ IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationD
return layer;
}
-IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDescriptor, const char* name)
+IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor&
+resizeDescriptor, const char* name)
{
return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
}
@@ -313,10 +475,15 @@ IConnectableLayer* Network::AddL2NormalizationLayer(const char* name)
IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
{
- return m_Graph->AddLayer<ConstantLayer>(std::make_shared<ScopedCpuTensorHandle>(input), name);
+ auto layer = m_Graph->AddLayer<ConstantLayer>(name);
+
+ layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
+
+ return layer;
}
-IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, const char* name)
+IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
+ const char* name)
{
return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
}
@@ -326,6 +493,97 @@ IConnectableLayer* Network::AddFloorLayer(const char* name)
return m_Graph->AddLayer<FloorLayer>(name);
}
+IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
+ const LstmInputParams& params,
+ const char* name)
+{
+ const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
+
+ //Lstm Basic Parameters
+ layer->m_BasicParameters.m_InputToForgetWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
+ layer->m_BasicParameters.m_InputToCellWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
+ layer->m_BasicParameters.m_InputToOutputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
+ layer->m_BasicParameters.m_RecurrentToForgetWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
+ layer->m_BasicParameters.m_RecurrentToCellWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
+ layer->m_BasicParameters.m_RecurrentToOutputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
+ layer->m_BasicParameters.m_ForgetGateBias =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
+ layer->m_BasicParameters.m_CellBias =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
+ layer->m_BasicParameters.m_OutputGateBias =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
+
+ //Lstm Cifg parameters
+ if(!descriptor.m_CifgEnabled)
+ {
+ if(params.m_InputToInputWeights == nullptr)
+ {
+ throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
+ }
+ if(params.m_RecurrentToInputWeights == nullptr)
+ {
+ throw InvalidArgumentException(
+ "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
+ }
+ if(params.m_InputGateBias == nullptr)
+ {
+ throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
+ }
+ layer->m_CifgParameters.m_InputToInputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
+ layer->m_CifgParameters.m_RecurrentToInputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
+ // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
+ if(params.m_CellToInputWeights != nullptr)
+ {
+ layer->m_CifgParameters.m_CellToInputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
+ }
+ layer->m_CifgParameters.m_InputGateBias =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
+ }
+
+ //Lstm projection parameters
+ if(descriptor.m_ProjectionEnabled)
+ {
+ if(params.m_ProjectionWeights == nullptr)
+ {
+ throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
+ }
+ layer->m_ProjectionParameters.m_ProjectionWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
+ if(params.m_ProjectionBias != nullptr)
+ {
+ layer->m_ProjectionParameters.m_ProjectionBias =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
+ }
+ }
+
+ //Lstm Peephole params
+ if(descriptor.m_PeepholeEnabled)
+ {
+ if(params.m_CellToForgetWeights == nullptr)
+ {
+ throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
+ }
+ if(params.m_CellToOutputWeights == nullptr)
+ {
+ throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
+ }
+ layer->m_PeepholeParameters.m_CellToForgetWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
+ layer->m_PeepholeParameters.m_CellToOutputWeights =
+ std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
+ }
+ return layer;
+}
+
OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
: m_Graph(std::move(graph))
{
@@ -336,4 +594,3 @@ OptimizedNetwork::~OptimizedNetwork()
}
} // namespace armnn
-