diff options
author | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-12 11:17:03 +0000 |
---|---|---|
committer | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-12 11:18:21 +0000 |
commit | d69c1c595375b904a7f19f562ac1d54098184b4e (patch) | |
tree | b2c4980eb367aa160282aae5c2deda8ef19682de /src/armnn | |
parent | 267c985a6322fbc1efa22ba44188ac867537f1b1 (diff) | |
download | armnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz |
Merge 'main' onto 'experimental/GpuFsa'.
* I6c71be11e9b73694747b27fe9febab8d9669b4d4
Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/AsyncExecutionCallback.cpp | 4 | ||||
-rw-r--r-- | src/armnn/AsyncExecutionCallback.hpp | 3 | ||||
-rw-r--r-- | src/armnn/Graph.cpp | 18 | ||||
-rw-r--r-- | src/armnn/LoadedNetwork.cpp | 2 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 14 | ||||
-rw-r--r-- | src/armnn/Runtime.cpp | 25 | ||||
-rw-r--r-- | src/armnn/Runtime.hpp | 6 | ||||
-rw-r--r-- | src/armnn/TypesUtils.cpp | 6 | ||||
-rw-r--r-- | src/armnn/test/SubgraphViewTests.cpp | 29 |
9 files changed, 85 insertions, 22 deletions
diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp index 5b87927af2..73ce66b7fb 100644 --- a/src/armnn/AsyncExecutionCallback.cpp +++ b/src/armnn/AsyncExecutionCallback.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -11,6 +11,8 @@ namespace armnn namespace experimental { +InferenceId AsyncExecutionCallback::nextID = 0u; + void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken) { { diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp index 9eab06b4fa..d48f80737d 100644 --- a/src/armnn/AsyncExecutionCallback.hpp +++ b/src/armnn/AsyncExecutionCallback.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -68,7 +68,6 @@ private: armnn::Status m_Status = Status::Failure; InferenceId m_InferenceId; }; -InferenceId AsyncExecutionCallback::nextID = 0u; // Manager to create and monitor AsyncExecutionCallbacks // GetNewCallback will create a callback for use in Threadpool::Schedule diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index b5769f75f3..e5d123830c 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx); ARMNN_ASSERT(subgraphInputSlot); - IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); - ARMNN_ASSERT(connectedOutputSlot); - connectedOutputSlot->Disconnect(*subgraphInputSlot); + // Only disconnect if the InputSlot has a connection, this might not be the case when + // dealing with working copies of SubgraphViews + // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections + if (subgraphInputSlot->GetConnection()) + { + IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); + ARMNN_ASSERT(connectedOutputSlot); + connectedOutputSlot->Disconnect(*subgraphInputSlot); - IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); - ARMNN_ASSERT(substituteInputSlot); - connectedOutputSlot->Connect(*substituteInputSlot); + IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); + ARMNN_ASSERT(substituteInputSlot); + connectedOutputSlot->Connect(*substituteInputSlot); + } } // Step 2: process output slots diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7b24fd77b8..b42874f29d 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* } else { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput"); std::unique_ptr<ITensorHandle> tensorHandle = std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea()); @@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle) { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput"); auto copyFunc = [](void* dst, const void* src, size_t size) { memcpy(dst, src, size); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 158142f48e..42388bfbd7 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); - if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported)) + + // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture + // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs + // to be FP32 and inserting convert layers around the FP32 operator. + bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported); + std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; + if (!isLayerSupported || + reasonIfUnsupported.find(checkStr) != std::string::npos) { if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { @@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, const OptimizerOptions& options, Optional<std::vector<std::string>&> messages) { - const auto start_time = armnn::GetTimeNow(); - ARMNN_LOG(debug) << options.ToString(); // Enable profiling @@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry); } - ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2) - << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms."; - return optNet; } diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 75b1ee8179..ff64e856f4 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,7 +20,10 @@ #include <armnn/utility/PolymorphicDowncast.hpp> #include <armnn/utility/Timer.hpp> +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) #include <backendsCommon/DynamicBackendUtils.hpp> +#endif + #include <backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp> #include <client/include/backends/IBackendProfiling.hpp> @@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) throw RuntimeException( "It is not possible to enable timeline reporting without profiling being enabled"); } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Load any available/compatible dynamic backend before the runtime // goes through the backend registry LoadDynamicBackends(options.m_DynamicBackendsPath); - +#endif armnn::BackendIdSet supportedBackends; for (const auto& id : BackendRegistryInstance().GetBackendIds()) { @@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() && customAllocatorMapIterator->second == nullptr) { +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // We need to manually clean up the dynamic backends before throwing an exception. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif throw armnn::Exception("Allocator associated with id " + id.Get() + " is null"); } @@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } else { @@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } } @@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl() << std::endl; } } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Clear all dynamic backends. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif m_BackendContexts.clear(); BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional()); + // Remove custom allocators that this runtime has added. + // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator + // may have been overwritten by another runtime. + for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(), + [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);}); + ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; } @@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback loadedNetwork->RegisterDebugCallback(func); } +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) { // Get the paths where to load the dynamic backends from @@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) // Add the registered dynamic backend ids to the list of supported backends m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true); } - +#endif } // namespace armnn diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index f5dfadf948..9d47b7898d 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -157,6 +157,10 @@ private: /// Profiling Service Instance std::unique_ptr<arm::pipe::IProfilingService> m_ProfilingService; + + /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The + /// destructor can then clean up for this runtime. + std::set<BackendId> m_AllocatorsAddedByThisRuntime; }; } // namespace armnn diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp index 4ba9ed19e1..74ac231bc9 100644 --- a/src/armnn/TypesUtils.cpp +++ b/src/armnn/TypesUtils.cpp @@ -81,4 +81,8 @@ float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int32_t template -float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
\ No newline at end of file +float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset); + +/// Explicit specialization of Dequantize for int64_t +template +float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset); diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp index 4ce67b0fec..9bb5e69bbb 100644 --- a/src/armnn/test/SubgraphViewTests.cpp +++ b/src/armnn/test/SubgraphViewTests.cpp @@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph") CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception); } +TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph") +{ + Graph graph; + + auto input = graph.AddLayer<InputLayer>(0, "Input"); + auto activation = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation"); + auto output = graph.AddLayer<OutputLayer>(1, "Output"); + + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + //Add in out of order + auto view = CreateSubgraphViewFrom({activation}, + {&activation->GetInputSlot(0)}, + {&activation->GetOutputSlot(0)}); + + auto workingCopy = view->GetWorkingCopy(); + + // First (and only) layer in the subgraph is the Activation + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation"); + + // Substitute the "Activation" layer for an equivalent layer + auto activation2 = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation2"); + SubgraphView pattern(*workingCopy.beginIConnectable()); + workingCopy.SubstituteSubgraph(pattern, activation2); + + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2"); +} + TEST_CASE("SubgraphViewWorkingCopyOptimizationViews") { Graph graph; |