From 4f1e8e47e29d42ed5862cccb29cf183853c4a86c Mon Sep 17 00:00:00 2001
From: David Monahan <david.monahan@arm.com>
Date: Wed, 4 Sep 2019 09:22:10 +0100
Subject: IVGCVSW-3687 Add INetworkProperties to LoadNetwork

 * Allows users to specify if Import/Export should be used

Signed-off-by: David Monahan <david.monahan@arm.com>
Change-Id: I64da26a6acbeb91ef72d31b6ccc01bb1447f624d
---
 include/armnn/Exceptions.hpp                       | 10 +++
 include/armnn/IRuntime.hpp                         | 21 ++++-
 src/armnn/LoadedNetwork.cpp                        | 50 +++++++----
 src/armnn/LoadedNetwork.hpp                        |  7 +-
 src/armnn/Runtime.cpp                              | 14 ++-
 src/armnn/Runtime.hpp                              |  7 +-
 .../backendsCommon/test/EndToEndTestImpl.hpp       | 99 +++++++++++++++++-----
 src/backends/reference/test/RefEndToEndTests.cpp   |  7 +-
 8 files changed, 167 insertions(+), 48 deletions(-)
diff --git a/include/armnn/Exceptions.hpp b/include/armnn/Exceptions.hpp
index 1740a8c47d..f8e0b430a6 100644
--- a/include/armnn/Exceptions.hpp
+++ b/include/armnn/Exceptions.hpp
@@ -115,6 +115,16 @@ class RuntimeException : public Exception
     using Exception::Exception;
 };
 
+class MemoryImportException : public Exception
+{
+    using Exception::Exception;
+};
+
+class MemoryExportException : public Exception
+{
+    using Exception::Exception;
+};
+
 template <typename ExceptionType>
 void ConditionalThrow(bool condition, const std::string& message)
 {
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index 41e1c47c5c..68965cffa5 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -23,6 +23,18 @@ class IGpuAccTunedParameters;
 class IRuntime;
 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
 
+struct INetworkProperties
+{
+    INetworkProperties(bool importEnabled = false, bool exportEnabled = false)
+        : m_ImportEnabled(importEnabled),
+          m_ExportEnabled(exportEnabled) {}
+
+    const bool m_ImportEnabled;
+    const bool m_ExportEnabled;
+
+    virtual ~INetworkProperties() {}
+};
+
 class IRuntime
 {
 public:
@@ -82,7 +94,12 @@ public:
     /// @return armnn::Status
     virtual Status LoadNetwork(NetworkId& networkIdOut,
                                IOptimizedNetworkPtr network,
-                               std::string & errorMessage) = 0;
+                               std::string& errorMessage) = 0;
+
+    virtual Status LoadNetwork(NetworkId& networkIdOut,
+                               IOptimizedNetworkPtr network,
+                               std::string& errorMessage,
+                               const INetworkProperties& networkProperties) = 0;
 
     virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
     virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
@@ -163,4 +180,4 @@ protected:
     virtual ~IGpuAccTunedParameters() {};
 };
 
-}
+} // namespace armnn
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 5b64085869..1000eceda0 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -41,7 +41,8 @@ std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
 } // anonymous
 
 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
-                                                                std::string & errorMessage)
+                                                                std::string& errorMessage,
+                                                                const INetworkProperties& networkProperties)
 {
     std::unique_ptr<LoadedNetwork> loadedNetwork;
 
@@ -55,7 +56,7 @@ std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<
 
     try
     {
-        loadedNetwork.reset(new LoadedNetwork(std::move(net)));
+        loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties));
     }
     catch (const armnn::RuntimeException& error)
     {
@@ -73,8 +74,11 @@ std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<
     return loadedNetwork;
 }
 
-LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net)
-    : m_OptimizedNetwork(std::move(net))
+LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
+                             const INetworkProperties& networkProperties) :
+                             m_OptimizedNetwork(std::move(net)),
+                             m_IsImportEnabled(networkProperties.m_ImportEnabled),
+                             m_IsExportEnabled(networkProperties.m_ExportEnabled)
 {
     // Create a profiler and register it for the current thread.
     m_Profiler = std::make_shared<Profiler>();
@@ -392,7 +396,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
     info.m_OutputTensorInfos.push_back(outputTensorInfo);
 
     MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
-    if (CheckFlag(importFlags, MemorySource::Malloc))  // Try import the input tensor
+    if (CheckFlag(importFlags, MemorySource::Malloc) && m_IsImportEnabled)  // Try import the input tensor
     {
         // This assumes a CPU Tensor handle
         void* mem = tensorHandle->Map(false);
@@ -402,13 +406,16 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
             return; // No need for a workload since the import has been done.
         }
         tensorHandle->Unmap();
+        throw MemoryImportException("EnqueueInput: Memory Import failed");
     }
+    else
+    {
+        // Create a mem copy workload for input since we did not import
+        auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
 
-    // Create a mem copy workload for input since we could not import
-    auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
-
-    BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
-    m_InputQueue.push_back(move(inputWorkload));
+        BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
+        m_InputQueue.push_back(move(inputWorkload));
+    }
 }
 
 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
@@ -444,7 +451,8 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
     // b) The tensor has zero padding
     // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
     // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
-    if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
+    if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input
+        && m_IsExportEnabled)
     {
         if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
         {
@@ -467,17 +475,23 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
 
                     return; //No need to add the output workload below
                 }
+                else
+                {
+                    throw MemoryExportException("EnqueueOutput: Memory Export failed");
+                }
             }
         }
     }
+    else
+    {
+        // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
+        outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
+        info.m_InputTensorInfos.push_back(inputTensorInfo);
 
-    // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
-    outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
-    info.m_InputTensorInfos.push_back(inputTensorInfo);
-
-    auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
-    BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
-    m_OutputQueue.push_back(move(outputWorkload));
+        auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
+        BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
+        m_OutputQueue.push_back(move(outputWorkload));
+    }
 }
 
 void LoadedNetwork::AllocateWorkingMemory()
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index 808a93222a..08c09b8801 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -41,7 +41,8 @@ public:
     Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors);
 
     static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
-                                                            std::string & errorMessage);
+                                                            std::string & errorMessage,
+                                                            const INetworkProperties& networkProperties);
 
     // NOTE we return by reference as the purpose of this method is only to provide
     // access to the private m_Profiler and in theory we should not need to increment
@@ -55,7 +56,7 @@ public:
 private:
     void AllocateWorkingMemory();
 
-    LoadedNetwork(std::unique_ptr<OptimizedNetwork> net);
+    LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const INetworkProperties& networkProperties);
 
     void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
 
@@ -84,6 +85,8 @@ private:
     mutable std::mutex m_WorkingMemMutex;
 
     bool m_IsWorkingMemAllocated=false;
+    bool m_IsImportEnabled=false;
+    bool m_IsExportEnabled=false;
 
     TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
 };
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 9e874848ec..e47835687d 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -49,7 +49,16 @@ Status Runtime::LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr inNetw
 
 Status Runtime::LoadNetwork(NetworkId& networkIdOut,
                             IOptimizedNetworkPtr inNetwork,
-                            std::string & errorMessage)
+                            std::string& errorMessage)
+{
+    INetworkProperties networkProperties;
+    return LoadNetwork(networkIdOut, std::move(inNetwork), errorMessage, networkProperties);
+}
+
+Status Runtime::LoadNetwork(NetworkId& networkIdOut,
+                            IOptimizedNetworkPtr inNetwork,
+                            std::string& errorMessage,
+                            const INetworkProperties& networkProperties)
 {
     IOptimizedNetwork* rawNetwork = inNetwork.release();
 
@@ -62,7 +71,8 @@ Status Runtime::LoadNetwork(NetworkId& networkIdOut,
 
     unique_ptr<LoadedNetwork> loadedNetwork = LoadedNetwork::MakeLoadedNetwork(
         std::unique_ptr<OptimizedNetwork>(boost::polymorphic_downcast<OptimizedNetwork*>(rawNetwork)),
-        errorMessage);
+        errorMessage,
+        networkProperties);
 
     if (!loadedNetwork)
     {
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index 35684f1f78..a028c878a0 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -38,7 +38,12 @@ public:
     /// @return armnn::Status
     virtual Status LoadNetwork(NetworkId& networkIdOut,
                                IOptimizedNetworkPtr network,
-                               std::string & errorMessage) override;
+                               std::string& errorMessage) override;
+
+    virtual Status LoadNetwork(NetworkId& networkIdOut,
+                               IOptimizedNetworkPtr network,
+                               std::string& errorMessage,
+                               const INetworkProperties& networkProperties) override;
 
     virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override;
     virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override;
diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
index 040782bf68..ecc8806626 100644
--- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
@@ -172,7 +172,7 @@ void EndToEndLayerTestImpl(INetworkPtr network,
     }
 }
 
-inline void ImportNonAlignedPointerTest(std::vector<BackendId> backends)
+inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
 {
     using namespace armnn;
 
@@ -201,7 +201,10 @@ inline void ImportNonAlignedPointerTest(std::vector<BackendId> backends)
 
     // Loads it into the runtime.
     NetworkId netId;
-    runtime->LoadNetwork(netId, std::move(optNet));
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
 
     // Creates structures for input & output
     std::vector<float> inputData
@@ -214,8 +217,8 @@ inline void ImportNonAlignedPointerTest(std::vector<BackendId> backends)
 
     std::vector<float> outputData(5);
 
-    // Misaligned output
-    float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
+    // Aligned output
+    float * alignedOutputData = outputData.data();
 
     InputTensors inputTensors
     {
@@ -223,31 +226,80 @@ inline void ImportNonAlignedPointerTest(std::vector<BackendId> backends)
     };
     OutputTensors outputTensors
     {
-        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
+        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
     };
 
     // The result of the inference is not important, just the fact that there
     // should not be CopyMemGeneric workloads.
     runtime->GetProfiler(netId)->EnableProfiling(true);
 
-    // Do the inference
-    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+    // Do the inference and expect it to fail with a ImportMemoryException
+    BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
+}
 
-    // Retrieve the Profiler.Print() output to get the workload execution
-    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
-    std::stringstream ss;
-    profilerManager.GetProfiler()->Print(ss);;
-    std::string dump = ss.str();
+inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
+{
+    using namespace armnn;
 
-    // Contains RefNormalizationWorkload
-    std::size_t found = dump.find("RefNormalizationWorkload");
-    BOOST_TEST(found != std::string::npos);
-    // No Contains SyncMemGeneric (Created when importing the output tensor handle)
-    found = dump.find("SyncMemGeneric");
-    BOOST_TEST(found == std::string::npos);
-    // Contains CopyMemGeneric
-    found = dump.find("CopyMemGeneric");
-    BOOST_TEST(found != std::string::npos);
+    // Create runtime in which test will run
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
+    norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+    norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+
+    // Optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Loads it into the runtime.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f
+    };
+
+    // Aligned input
+    float * alignedInputData = inputData.data();
+
+    std::vector<float> outputData(5);
+
+    // Misaligned output
+    float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
+
+    InputTensors inputTensors
+    {
+        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
+    };
+    OutputTensors outputTensors
+    {
+        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
+    };
+
+    // The result of the inference is not important, just the fact that there
+    // should not be CopyMemGeneric workloads.
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference and expect it to fail with a ImportMemoryException
+    BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
 }
 
 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
@@ -279,7 +331,10 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
 
     // Loads it into the runtime.
     NetworkId netId;
-    runtime->LoadNetwork(netId, std::move(optNet));
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
 
     // Creates structures for input & output
     std::vector<float> inputData
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 6024f1584b..52454a2a26 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -973,7 +973,12 @@ BOOST_AUTO_TEST_CASE(RefResizeNearestNeighborEndToEndInt16NhwcTest)
 // Only run these tests on non Android platforms
 BOOST_AUTO_TEST_CASE(RefImportNonAlignedPointerTest)
 {
-    ImportNonAlignedPointerTest(defaultBackends);
+    ImportNonAlignedInputPointerTest(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(RefExportNonAlignedPointerTest)
+{
+    ImportNonAlignedOutputPointerTest(defaultBackends);
 }
 
 BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest)
-- 
cgit v1.2.1