Merge 'main' onto 'experimental/GpuFsa'.

* I6c71be11e9b73694747b27fe9febab8d9669b4d4 Signed-off-by: Cathal Corbett <cathal.corbett@arm.com> Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733
author: Cathal Corbett <cathal.corbett@arm.com> 2023-01-12 11:17:03 +0000
committer: Cathal Corbett <cathal.corbett@arm.com> 2023-01-12 11:18:21 +0000
commit: d69c1c595375b904a7f19f562ac1d54098184b4e (patch)
tree: b2c4980eb367aa160282aae5c2deda8ef19682de /src/armnn
parent: 267c985a6322fbc1efa22ba44188ac867537f1b1 (diff)
download: armnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz
9 files changed, 85 insertions, 22 deletions
diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp
index 5b87927af2..73ce66b7fb 100644
--- a/src/armnn/AsyncExecutionCallback.cpp
+++ b/src/armnn/AsyncExecutionCallback.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -11,6 +11,8 @@ namespace armnn
 namespace experimental
 {
 
+InferenceId AsyncExecutionCallback::nextID = 0u;
+
 void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken)
 {
     {
diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp
index 9eab06b4fa..d48f80737d 100644
--- a/src/armnn/AsyncExecutionCallback.hpp
+++ b/src/armnn/AsyncExecutionCallback.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -68,7 +68,6 @@ private:
     armnn::Status       m_Status = Status::Failure;
     InferenceId m_InferenceId;
 };
-InferenceId AsyncExecutionCallback::nextID = 0u;
 
 // Manager to create and monitor AsyncExecutionCallbacks
 // GetNewCallback will create a callback for use in Threadpool::Schedule
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index b5769f75f3..e5d123830c 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr
         IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
         ARMNN_ASSERT(subgraphInputSlot);
 
-        IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
-        ARMNN_ASSERT(connectedOutputSlot);
-        connectedOutputSlot->Disconnect(*subgraphInputSlot);
+        // Only disconnect if the InputSlot has a connection, this might not be the case when
+        // dealing with working copies of SubgraphViews
+        // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections
+        if (subgraphInputSlot->GetConnection())
+        {
+            IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
+            ARMNN_ASSERT(connectedOutputSlot);
+            connectedOutputSlot->Disconnect(*subgraphInputSlot);
 
-        IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
-        ARMNN_ASSERT(substituteInputSlot);
-        connectedOutputSlot->Connect(*substituteInputSlot);
+            IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
+            ARMNN_ASSERT(substituteInputSlot);
+            connectedOutputSlot->Connect(*substituteInputSlot);
+        }
     }
 
     // Step 2: process output slots
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7b24fd77b8..b42874f29d 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle*
     }
     else
     {
+        ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput");
         std::unique_ptr<ITensorHandle> tensorHandle =
                 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
 
@@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle
 
 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
 {
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
     auto copyFunc = [](void* dst, const void* src, size_t size)
     {
         memcpy(dst, src, size);
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 158142f48e..42388bfbd7 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
     // need to set the compute device on the layer
     // before we can check if it is supported
     layer->SetBackendId(backend);
-    if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
+
+    // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture 
+    // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
+    // to be FP32 and inserting convert layers around the FP32 operator.
+    bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
+    std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
+    if (!isLayerSupported ||
+        reasonIfUnsupported.find(checkStr) != std::string::npos)
     {
         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
         {
@@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
                               const OptimizerOptions& options,
                               Optional<std::vector<std::string>&> messages)
 {
-    const auto start_time = armnn::GetTimeNow();
-
     ARMNN_LOG(debug) << options.ToString();
 
     // Enable profiling
@@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
         optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
     }
 
-    ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2)
-                    << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms.";
-
     return optNet;
 }
 
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 75b1ee8179..ff64e856f4 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -20,7 +20,10 @@
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnn/utility/Timer.hpp>
 
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
 #include <backendsCommon/DynamicBackendUtils.hpp>
+#endif
+
 #include <backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp>
 
 #include <client/include/backends/IBackendProfiling.hpp>
@@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
         throw RuntimeException(
                 "It is not possible to enable timeline reporting without profiling being enabled");
     }
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
     // Load any available/compatible dynamic backend before the runtime
     // goes through the backend registry
     LoadDynamicBackends(options.m_DynamicBackendsPath);
-
+#endif
     armnn::BackendIdSet supportedBackends;
     for (const auto& id : BackendRegistryInstance().GetBackendIds())
     {
@@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
             if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() &&
                 customAllocatorMapIterator->second == nullptr)
             {
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
                 // We need to manually clean up  the dynamic backends before throwing an exception.
                 DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
                 m_DeviceSpec.ClearDynamicBackends();
+#endif
                 throw armnn::Exception("Allocator associated with id " + id.Get() + " is null");
             }
 
@@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
                         }
                         // No errors so register the Custom Allocator with the BackendRegistry
                         BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+                        m_AllocatorsAddedByThisRuntime.emplace(id);
                     }
                     else
                     {
@@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
                     }
                     // No errors so register the Custom Allocator with the BackendRegistry
                     BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+                    m_AllocatorsAddedByThisRuntime.emplace(id);
                 }
             }
 
@@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl()
                       << std::endl;
         }
     }
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
     // Clear all dynamic backends.
     DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
     m_DeviceSpec.ClearDynamicBackends();
+#endif
     m_BackendContexts.clear();
 
     BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
+    // Remove custom allocators that this runtime has added.
+    // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator
+    // may have been overwritten by another runtime.
+    for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(),
+             [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);});
+
     ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";
 }
@@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback
     loadedNetwork->RegisterDebugCallback(func);
 }
 
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
 void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
 {
     // Get the paths where to load the dynamic backends from
@@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
     // Add the registered dynamic backend ids to the list of supported backends
     m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true);
 }
-
+#endif
 } // namespace armnn
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index f5dfadf948..9d47b7898d 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -157,6 +157,10 @@ private:
 
     /// Profiling Service Instance
     std::unique_ptr<arm::pipe::IProfilingService> m_ProfilingService;
+
+    /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The
+    /// destructor can then clean up for this runtime.
+    std::set<BackendId> m_AllocatorsAddedByThisRuntime;
 };
 
 } // namespace armnn
diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp
index 4ba9ed19e1..74ac231bc9 100644
--- a/src/armnn/TypesUtils.cpp
+++ b/src/armnn/TypesUtils.cpp
@@ -81,4 +81,8 @@ float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset);
 
 /// Explicit specialization of Dequantize for int32_t
 template
-float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
-\ No newline at end of file
+float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
+
+/// Explicit specialization of Dequantize for int64_t
+template
+float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset);
diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp
index 4ce67b0fec..9bb5e69bbb 100644
--- a/src/armnn/test/SubgraphViewTests.cpp
+++ b/src/armnn/test/SubgraphViewTests.cpp
@@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph")
     CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception);
 }
 
+TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph")
+{
+    Graph graph;
+
+    auto input = graph.AddLayer<InputLayer>(0, "Input");
+    auto activation = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation");
+    auto output = graph.AddLayer<OutputLayer>(1, "Output");
+
+    input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+    activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    //Add in out of order
+    auto view = CreateSubgraphViewFrom({activation},
+                                       {&activation->GetInputSlot(0)},
+                                       {&activation->GetOutputSlot(0)});
+
+    auto workingCopy = view->GetWorkingCopy();
+
+    // First (and only) layer in the subgraph is the Activation
+    CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation");
+
+    // Substitute the "Activation" layer for an equivalent layer
+    auto activation2 = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation2");
+    SubgraphView pattern(*workingCopy.beginIConnectable());
+    workingCopy.SubstituteSubgraph(pattern, activation2);
+
+    CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2");
+}
+
 TEST_CASE("SubgraphViewWorkingCopyOptimizationViews")
 {
     Graph graph;
author	Cathal Corbett <cathal.corbett@arm.com>	2023-01-12 11:17:03 +0000
committer	Cathal Corbett <cathal.corbett@arm.com>	2023-01-12 11:18:21 +0000
commit	d69c1c595375b904a7f19f562ac1d54098184b4e (patch)
tree	b2c4980eb367aa160282aae5c2deda8ef19682de /src/armnn
parent	267c985a6322fbc1efa22ba44188ac867537f1b1 (diff)
download	armnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz