aboutsummaryrefslogtreecommitdiff
path: root/src/armnn
diff options
context:
space:
mode:
authorCathal Corbett <cathal.corbett@arm.com>2023-01-12 11:17:03 +0000
committerCathal Corbett <cathal.corbett@arm.com>2023-01-12 11:18:21 +0000
commitd69c1c595375b904a7f19f562ac1d54098184b4e (patch)
treeb2c4980eb367aa160282aae5c2deda8ef19682de /src/armnn
parent267c985a6322fbc1efa22ba44188ac867537f1b1 (diff)
downloadarmnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz
Merge 'main' onto 'experimental/GpuFsa'.
* I6c71be11e9b73694747b27fe9febab8d9669b4d4 Signed-off-by: Cathal Corbett <cathal.corbett@arm.com> Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733
Diffstat (limited to 'src/armnn')
-rw-r--r--src/armnn/AsyncExecutionCallback.cpp4
-rw-r--r--src/armnn/AsyncExecutionCallback.hpp3
-rw-r--r--src/armnn/Graph.cpp18
-rw-r--r--src/armnn/LoadedNetwork.cpp2
-rw-r--r--src/armnn/Network.cpp14
-rw-r--r--src/armnn/Runtime.cpp25
-rw-r--r--src/armnn/Runtime.hpp6
-rw-r--r--src/armnn/TypesUtils.cpp6
-rw-r--r--src/armnn/test/SubgraphViewTests.cpp29
9 files changed, 85 insertions, 22 deletions
diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp
index 5b87927af2..73ce66b7fb 100644
--- a/src/armnn/AsyncExecutionCallback.cpp
+++ b/src/armnn/AsyncExecutionCallback.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -11,6 +11,8 @@ namespace armnn
namespace experimental
{
+InferenceId AsyncExecutionCallback::nextID = 0u;
+
void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken)
{
{
diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp
index 9eab06b4fa..d48f80737d 100644
--- a/src/armnn/AsyncExecutionCallback.hpp
+++ b/src/armnn/AsyncExecutionCallback.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -68,7 +68,6 @@ private:
armnn::Status m_Status = Status::Failure;
InferenceId m_InferenceId;
};
-InferenceId AsyncExecutionCallback::nextID = 0u;
// Manager to create and monitor AsyncExecutionCallbacks
// GetNewCallback will create a callback for use in Threadpool::Schedule
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index b5769f75f3..e5d123830c 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr
IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
ARMNN_ASSERT(subgraphInputSlot);
- IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
- ARMNN_ASSERT(connectedOutputSlot);
- connectedOutputSlot->Disconnect(*subgraphInputSlot);
+ // Only disconnect if the InputSlot has a connection, this might not be the case when
+ // dealing with working copies of SubgraphViews
+ // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections
+ if (subgraphInputSlot->GetConnection())
+ {
+ IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
+ ARMNN_ASSERT(connectedOutputSlot);
+ connectedOutputSlot->Disconnect(*subgraphInputSlot);
- IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
- ARMNN_ASSERT(substituteInputSlot);
- connectedOutputSlot->Connect(*substituteInputSlot);
+ IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
+ ARMNN_ASSERT(substituteInputSlot);
+ connectedOutputSlot->Connect(*substituteInputSlot);
+ }
}
// Step 2: process output slots
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7b24fd77b8..b42874f29d 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle*
}
else
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput");
std::unique_ptr<ITensorHandle> tensorHandle =
std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
@@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle
void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
auto copyFunc = [](void* dst, const void* src, size_t size)
{
memcpy(dst, src, size);
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 158142f48e..42388bfbd7 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
// need to set the compute device on the layer
// before we can check if it is supported
layer->SetBackendId(backend);
- if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
+
+ // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
+ // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
+ // to be FP32 and inserting convert layers around the FP32 operator.
+ bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
+ std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
+ if (!isLayerSupported ||
+ reasonIfUnsupported.find(checkStr) != std::string::npos)
{
if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
{
@@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
const OptimizerOptions& options,
Optional<std::vector<std::string>&> messages)
{
- const auto start_time = armnn::GetTimeNow();
-
ARMNN_LOG(debug) << options.ToString();
// Enable profiling
@@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
}
- ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2)
- << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms.";
-
return optNet;
}
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 75b1ee8179..ff64e856f4 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -20,7 +20,10 @@
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <armnn/utility/Timer.hpp>
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
#include <backendsCommon/DynamicBackendUtils.hpp>
+#endif
+
#include <backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp>
#include <client/include/backends/IBackendProfiling.hpp>
@@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
throw RuntimeException(
"It is not possible to enable timeline reporting without profiling being enabled");
}
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// Load any available/compatible dynamic backend before the runtime
// goes through the backend registry
LoadDynamicBackends(options.m_DynamicBackendsPath);
-
+#endif
armnn::BackendIdSet supportedBackends;
for (const auto& id : BackendRegistryInstance().GetBackendIds())
{
@@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() &&
customAllocatorMapIterator->second == nullptr)
{
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// We need to manually clean up the dynamic backends before throwing an exception.
DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
m_DeviceSpec.ClearDynamicBackends();
+#endif
throw armnn::Exception("Allocator associated with id " + id.Get() + " is null");
}
@@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
}
// No errors so register the Custom Allocator with the BackendRegistry
BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ m_AllocatorsAddedByThisRuntime.emplace(id);
}
else
{
@@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
}
// No errors so register the Custom Allocator with the BackendRegistry
BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ m_AllocatorsAddedByThisRuntime.emplace(id);
}
}
@@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl()
<< std::endl;
}
}
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// Clear all dynamic backends.
DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
m_DeviceSpec.ClearDynamicBackends();
+#endif
m_BackendContexts.clear();
BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
+ // Remove custom allocators that this runtime has added.
+ // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator
+ // may have been overwritten by another runtime.
+ for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(),
+ [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);});
+
ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";
}
@@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback
loadedNetwork->RegisterDebugCallback(func);
}
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
{
// Get the paths where to load the dynamic backends from
@@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
// Add the registered dynamic backend ids to the list of supported backends
m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true);
}
-
+#endif
} // namespace armnn
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index f5dfadf948..9d47b7898d 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -157,6 +157,10 @@ private:
/// Profiling Service Instance
std::unique_ptr<arm::pipe::IProfilingService> m_ProfilingService;
+
+ /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The
+ /// destructor can then clean up for this runtime.
+ std::set<BackendId> m_AllocatorsAddedByThisRuntime;
};
} // namespace armnn
diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp
index 4ba9ed19e1..74ac231bc9 100644
--- a/src/armnn/TypesUtils.cpp
+++ b/src/armnn/TypesUtils.cpp
@@ -81,4 +81,8 @@ float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset);
/// Explicit specialization of Dequantize for int32_t
template
-float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset); \ No newline at end of file
+float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
+
+/// Explicit specialization of Dequantize for int64_t
+template
+float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset);
diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp
index 4ce67b0fec..9bb5e69bbb 100644
--- a/src/armnn/test/SubgraphViewTests.cpp
+++ b/src/armnn/test/SubgraphViewTests.cpp
@@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph")
CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception);
}
+TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph")
+{
+ Graph graph;
+
+ auto input = graph.AddLayer<InputLayer>(0, "Input");
+ auto activation = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation");
+ auto output = graph.AddLayer<OutputLayer>(1, "Output");
+
+ input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ //Add in out of order
+ auto view = CreateSubgraphViewFrom({activation},
+ {&activation->GetInputSlot(0)},
+ {&activation->GetOutputSlot(0)});
+
+ auto workingCopy = view->GetWorkingCopy();
+
+ // First (and only) layer in the subgraph is the Activation
+ CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation");
+
+ // Substitute the "Activation" layer for an equivalent layer
+ auto activation2 = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation2");
+ SubgraphView pattern(*workingCopy.beginIConnectable());
+ workingCopy.SubstituteSubgraph(pattern, activation2);
+
+ CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2");
+}
+
TEST_CASE("SubgraphViewWorkingCopyOptimizationViews")
{
Graph graph;