1 files changed, 298 insertions, 41 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 0a5325c2a4..f510207c06 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -5,16 +5,21 @@
 #include "Network.hpp"
 #include "Graph.hpp"
 #include "Layer.hpp"
+#include "DeviceSpec.hpp"
 #include "backends/CpuTensorHandle.hpp"
 #include "backends/WorkloadFactory.hpp"
 #include "Optimizer.hpp"
+#include "armnn/Exceptions.hpp"
 
 #include <armnn/Utils.hpp>
+#include <armnn/TypesUtils.hpp>
 
 #include <fcntl.h>
 #include <algorithm>
 #include <fstream>
 #include <memory>
+#include <vector>
+#include <algorithm>
 
 #include <boost/assert.hpp>
 #include <boost/format.hpp>
@@ -22,6 +27,8 @@
 #include <boost/numeric/conversion/converter_policies.hpp>
 #include <boost/cast.hpp>
 
+#include "optimizations/All.hpp"
+
 namespace armnn
 {
 
@@ -62,43 +69,195 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
     return m_Graph->SerializeToDot(stream);
 }
 
-IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const DeviceSpec& deviceSpec)
+IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
+                              const std::vector<armnn::Compute>& backendPreferences,
+                              const IDeviceSpec& deviceSpec,
+                              const OptimizerOptions& options)
 {
+    if (backendPreferences.empty()) {
+        throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
+    }
     const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
 
-    OptimizedNetwork* optNet = new OptimizedNetwork(std::move(graph));
+    auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
 
-    Optimizer::Optimize(optNet->GetGraph());
+    OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+
+    // Perform optimisation passes
+    using namespace optimizations;
+    Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(),
+                                                                SquashEqualReshapeSiblings(),
+                                                                OptimizeInversePermutes(),
+                                                                MovePermuteUp(),
+                                                                PermuteAsReshape(),
+                                                                OptimizeConsecutiveReshapes()));
 
     // Infer the tensor infos for all output slots. Throws an exception on failure.
-    optNet->GetGraph().InferTensorInfos();
+    optNetObjPtr->GetGraph().InferTensorInfos();
 
-    // Assign a compute device for all nodes
-    for (auto&& layer : optNet->GetGraph())
+    // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
+    if (options.m_ReduceFp32ToFp16)
     {
-        DataType dataType = layer->GetDataType();
+        Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter()));
+    }
+
+    // We know that DeviceSpec should be the only implementation of IDeviceSpec.
+    const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
+
+    // determine which of the preferred backends we have available for use
+    // and whether we have specified CpuRef as one of those backends.
+    bool cpuRefUsed = false;
+    std::vector<armnn::Compute> availablePreferredBackends;
+    for (const armnn::Compute& backend : backendPreferences)
+    {
+        // Check if the backend is in the available backend devices.
+        if (std::find(spec.m_SupportedComputeDevices.begin(),
+                      spec.m_SupportedComputeDevices.end(), backend) !=
+                      spec.m_SupportedComputeDevices.end())
+        {
+            availablePreferredBackends.push_back(backend);
+            if (armnn::Compute::CpuRef == backend) {
+                cpuRefUsed = true;
+            }
+        }
+    }
+    if (availablePreferredBackends.empty()) {
+        BOOST_LOG_TRIVIAL(warning) << "None of the preferred backends " << backendPreferences
+                                   << " are supported. Current platform provides " << spec.m_SupportedComputeDevices;
+        return {nullptr, &IOptimizedNetwork::Destroy};
+    }
 
-        // Default to the user-requested compute device from the Runtime
-        layer->SetComputeDevice(deviceSpec.DefaultComputeDevice);
+    auto ReturnWithError = [&](Layer* layer)
+    {
+        BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
+                    << " is not supported on any preferred backend " << backendPreferences;
+        return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
+    };
 
-        // If the layer is unsupported by this device, fall back to reference
+    // Assign a compute device for all nodes
+    for (auto&& layer : optNetObjPtr->GetGraph())
+    {
+        DataType dataType = layer->GetDataType();
         std::string reasonIfUnsupported;
-        if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
+        bool found = false;
+        for (const armnn::Compute& backend : availablePreferredBackends)
         {
-            BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) <<
-                " is not supported on requested backend " << layer->GetComputeDevice() << " (reason: " <<
-                reasonIfUnsupported << "), falling back to CpuRef backend.";
-            layer->SetComputeDevice(Compute::CpuRef);
+            // need to set the compute device on the layer
+            // before we can check if it is supported
+            layer->SetComputeDevice(backend);
+            if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
+            {
+                if (dataType == DataType::Float16)
+                {
+                    if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
+                        && layer->GetType() != LayerType::ConvertFp32ToFp16
+                        && layer->GetType() != LayerType::ConvertFp16ToFp32)
+                    {
+                        // Insert FP16 -> FP32 conversion layer before current layer
+                        std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
+                            InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
+
+                        // Insert FP32 -> FP16 conversion layer after current layer
+                        std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
+                            InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
+
+                        // Assign a supported backend to the newly introduced conversion layers
+                        auto AssignFirstSupportedBackend = [&](Layer* layer, Compute preferredBackend)
+                        {
+                            bool supportedBackendFound = false;
+                            std::string reasonIfUnsupported;
+
+                            // Try preferred backend first
+                            layer->SetComputeDevice(preferredBackend);
+                            if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
+                            {
+                                supportedBackendFound = true;
+                            }
+                            else
+                            {
+                                for (const Compute& backend : availablePreferredBackends)
+                                {
+                                    // Skip preferred backend (we already determined that it is not supported)
+                                    if (backend == preferredBackend)
+                                    {
+                                        continue;
+                                    }
+
+                                    layer->SetComputeDevice(backend);
+                                    if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
+                                    {
+                                        supportedBackendFound = true;
+                                        break;
+                                    }
+                                }
+                            }
+
+                            return supportedBackendFound;
+                        };
+
+                        for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
+                        {
+                            if (!AssignFirstSupportedBackend(convertLayer, backend))
+                            {
+                                return ReturnWithError(convertLayer);
+                            }
+                        }
+
+                        for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
+                        {
+                            if (!AssignFirstSupportedBackend(convertLayer, backend))
+                            {
+                                return ReturnWithError(convertLayer);
+                            }
+                        }
+
+                        found = true;
+                        break;
+                    }
+                }
+                BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
+                                           << " is not supported on requested backend " << layer->GetComputeDevice()
+                                           << " (reason: " << reasonIfUnsupported
+                                           << "), falling back to the next backend.";
+            }
+            else
+            {
+                found = true;
+                break;
+            }
         }
 
-        BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported),
-            "Layer has no valid compute device");
+        // If the layer is unsupported by any devices, log and return a null network.
+        if (!found) {
+            // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
+            //       fallback we should set the compute device on the layer to CpuRef (these are not
+            //       available as accelerated operations, or are only available under certain
+            //       conditions, currently they comprise MemCopy, Constant, Permute)
+            armnn::LayerType layerType = layer->GetType();
+            if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy ||
+                                layerType == armnn::LayerType::Constant ||
+                                layerType == armnn::LayerType::Permute))
+            {
+                layer->SetComputeDevice(armnn::Compute::CpuRef);
+            }
+            else
+            {
+                return ReturnWithError(layer);
+            }
+        }
     }
 
-    optNet->GetGraph().AddCopyLayers();
+    Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
+                                                                OptimizeInverseConversionsFp32()));
+
+    optNetObjPtr->GetGraph().AddCopyLayers();
+
+    // Convert constants
+    Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf()));
+    Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat()));
 
-    return {optNet, &IOptimizedNetwork::Destroy};
+    return optNet;
 }
 
 Network::Network()
@@ -116,9 +275,9 @@ IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
 }
 
 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-    const ConstTensor& weights,
-    const ConstTensor* biases,
-    const char* name)
+                                                       const ConstTensor& weights,
+                                                       const ConstTensor* biases,
+                                                       const char* name)
 {
     if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr))
     {
@@ -138,24 +297,24 @@ IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescr
 }
 
 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-    const ConstTensor& weights,
-    const char* name)
+                                                   const ConstTensor& weights,
+                                                   const char* name)
 {
     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name);
 }
 
 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
-    const ConstTensor& weights,
-    const ConstTensor& biases,
-    const char* name)
+                                                   const ConstTensor& weights,
+                                                   const ConstTensor& biases,
+                                                   const char* name)
 {
     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name);
 }
 
 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
-    const ConstTensor& weights,
-    const ConstTensor* biases,
-    const char* name)
+                                                      const ConstTensor& weights,
+                                                      const ConstTensor* biases,
+                                                      const char* name)
 {
     if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
     {
@@ -175,15 +334,15 @@ IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescrip
 }
 
 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
-    const ConstTensor& weights,
-    const char* name)
+                                                  const ConstTensor& weights,
+                                                  const char* name)
 {
     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
 }
 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
-    const ConstTensor& weights,
-    const ConstTensor& biases,
-    const char* name)
+                                                  const ConstTensor& weights,
+                                                  const ConstTensor& biases,
+                                                  const char* name)
 {
     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
 }
@@ -199,7 +358,8 @@ IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
     }
 
-    const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
+    const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
+            name);
 
     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
 
@@ -245,7 +405,8 @@ IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activ
     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
 }
 
-IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
+IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
+normalizationDescriptor,
     const char* name)
 {
     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
@@ -301,7 +462,8 @@ IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationD
     return layer;
 }
 
-IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDescriptor, const char* name)
+IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor&
+resizeDescriptor, const char* name)
 {
     return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
 }
@@ -313,10 +475,15 @@ IConnectableLayer* Network::AddL2NormalizationLayer(const char* name)
 
 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
 {
-    return m_Graph->AddLayer<ConstantLayer>(std::make_shared<ScopedCpuTensorHandle>(input), name);
+    auto layer = m_Graph->AddLayer<ConstantLayer>(name);
+
+    layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
+
+    return layer;
 }
 
-IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, const char* name)
+IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
+                                            const char* name)
 {
     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
 }
@@ -326,6 +493,97 @@ IConnectableLayer* Network::AddFloorLayer(const char* name)
     return m_Graph->AddLayer<FloorLayer>(name);
 }
 
+IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
+                                         const LstmInputParams& params,
+                                         const char* name)
+{
+    const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
+
+    //Lstm Basic Parameters
+    layer->m_BasicParameters.m_InputToForgetWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
+    layer->m_BasicParameters.m_InputToCellWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
+    layer->m_BasicParameters.m_InputToOutputWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
+    layer->m_BasicParameters.m_RecurrentToForgetWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
+    layer->m_BasicParameters.m_RecurrentToCellWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
+    layer->m_BasicParameters.m_RecurrentToOutputWeights =
+        std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
+    layer->m_BasicParameters.m_ForgetGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
+    layer->m_BasicParameters.m_CellBias =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
+    layer->m_BasicParameters.m_OutputGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
+
+    //Lstm Cifg parameters
+    if(!descriptor.m_CifgEnabled)
+    {
+        if(params.m_InputToInputWeights == nullptr)
+        {
+            throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
+        }
+        if(params.m_RecurrentToInputWeights == nullptr)
+        {
+            throw InvalidArgumentException(
+                    "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
+        }
+        if(params.m_InputGateBias == nullptr)
+        {
+            throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
+        }
+        layer->m_CifgParameters.m_InputToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
+        layer->m_CifgParameters.m_RecurrentToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
+        // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
+        if(params.m_CellToInputWeights != nullptr)
+        {
+            layer->m_CifgParameters.m_CellToInputWeights =
+                    std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
+        }
+        layer->m_CifgParameters.m_InputGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
+    }
+
+    //Lstm projection parameters
+    if(descriptor.m_ProjectionEnabled)
+    {
+        if(params.m_ProjectionWeights == nullptr)
+        {
+            throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
+        }
+        layer->m_ProjectionParameters.m_ProjectionWeights =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
+        if(params.m_ProjectionBias != nullptr)
+        {
+            layer->m_ProjectionParameters.m_ProjectionBias =
+                std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
+        }
+    }
+
+    //Lstm Peephole params
+    if(descriptor.m_PeepholeEnabled)
+    {
+        if(params.m_CellToForgetWeights == nullptr)
+        {
+            throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
+        }
+        if(params.m_CellToOutputWeights == nullptr)
+        {
+            throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
+        }
+        layer->m_PeepholeParameters.m_CellToForgetWeights =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
+        layer->m_PeepholeParameters.m_CellToOutputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
+    }
+    return layer;
+}
+
 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
     : m_Graph(std::move(graph))
 {
@@ -336,4 +594,3 @@ OptimizedNetwork::~OptimizedNetwork()
 }
 
 } // namespace armnn
-