From b4c493430567bff25e61e9df0dbab554c29f635d Mon Sep 17 00:00:00 2001 From: Ryan OShea Date: Tue, 25 Jul 2023 14:28:27 +0100 Subject: IVGCVSW-7880 Add check for FP16 backend support * Check if preferred backends have FP16 support before enable fp16-turbo-mode * Unit tests * Replaced global gpuAccCapabilities with getter method construction * Replaced deprecated function call in SL shim Signed-off-by: Narumol Prangnawarat Signed-off-by: Ryan OShea Change-Id: If29b62b330ca8987de8acf6408db11daf25ca0b5 --- src/armnn/Network.cpp | 77 +++++- src/armnn/Network.hpp | 2 + .../Fp32NetworkToFp16ConverterTests.cpp | 288 ++++++++++++++++++++- 3 files changed, 355 insertions(+), 12 deletions(-) (limited to 'src/armnn') diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 22d2c78c65..3074c1ffe9 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace armnn { @@ -837,14 +838,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); + std::string currentReasonIfUnsupported; // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs // to be FP32 and inserting convert layers around the FP32 operator. - bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported); + bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported); + reasonIfUnsupported += currentReasonIfUnsupported; + // This string matches the error message that is produced by acl when attempting to run FP16 kernels on + // a cpu or build that does not have fp16 support. We use this to check if we should add + // conversion layers or not. std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; - if (!isLayerSupported || - reasonIfUnsupported.find(checkStr) != std::string::npos) + if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos) { if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { @@ -994,6 +999,51 @@ inline std::vector GetLayerInOutDatatype(const Layer* layer) return {dataTypeIn, dataTypeOut}; } +bool CheckFp16Support(BackendsMap& backends, + const std::vector& availablePreferredBackends) +{ + bool hasFp16 = false; + // Check if the first preferred backend has FP16 support + auto firstBackend = availablePreferredBackends[0]; + auto backendObjPtr = backends.find(firstBackend)->second.get(); + ARMNN_ASSERT(backendObjPtr); + auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true}; + auto backendCapabilities = backendObjPtr->GetCapabilities(); + + if (HasMatchingCapability(hasFp16Capability, backendCapabilities)) + { + // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled. + hasFp16 = true; + ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend + << ", has FP16 support."; + } + else + { + ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend + << ", does not have FP16 support. " + << "The FP16 turbo mode option will be disable. It will run using FP32."; + } + + // Check if the rest of the available preferred backends have FP16 support + for (size_t i = 1; i < availablePreferredBackends.size(); ++i) + { + auto backend = availablePreferredBackends[i]; + backendObjPtr = backends.find(backend)->second.get(); + backendCapabilities = backendObjPtr->GetCapabilities(); + if (!HasMatchingCapability(hasFp16Capability, backendCapabilities)) + { + ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. " + << "It will run using FP32 when falling back to this backend."; + } + else + { + ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support."; + } + } + + return hasFp16; +} + // Refactor to allow passing the IConnectableLayer* rather than Layer Iterator // on Graph and SubgraphView which are different types. void AssignBackendsIConnectable(OptimizedNetworkImpl* optNetObjPtr, @@ -1913,16 +1963,10 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FuseBatchNormIntoDepthwiseConvolution2DFloat16())); - if (options.GetReduceFp32ToFp16()) - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16"); - Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter())); - Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); - } - // Initialize backend settings BackendSettings backendSettings(backendPreferences, deviceSpec); - if (backendSettings.GetAvailablePreferredBackends().empty()) + auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends(); + if (availablePreferredBackends.empty()) { std::stringstream failureMsg; failureMsg << "None of the preferred backends " << backendPreferences @@ -1935,6 +1979,17 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, TensorHandleFactoryRegistry tensorHandleFactoryRegistry; BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings); + if (options.GetReduceFp32ToFp16()) + { + bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends); + if (hasFp16) + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16"); + Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter())); + Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); + } + } + // Assign an available backend to each layer Graph::Iterator firstLayer = optGraph.begin(); Graph::Iterator lastLayer = optGraph.end(); diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index a84a0e9ba4..34549248bc 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -350,6 +350,8 @@ struct OptimizerOptionsOpaqueImpl } /// Reduces all Fp32 operators in the model to Fp16 for faster processing. + /// If the first preferred backend does not have Fp16 support, this option will be disabled. + /// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value. /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16. /// The overhead of these conversions can lead to a slower overall performance if too many conversions are diff --git a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp index 0a4a4fafde..90f94bccbf 100644 --- a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp +++ b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -7,6 +7,12 @@ #include +#if defined(ARMNNREF_ENABLED) +#include +#include +#include +#endif + #include TEST_SUITE("Optimizer") @@ -50,4 +56,284 @@ TEST_CASE("Fp32NetworkToFp16OptimizationTest") CHECK(floor->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); } +#if defined(ARMNNREF_ENABLED) +TEST_CASE("ReduceFp32ToFp16EnabledBackendHasFp16SupportTest") +{ + using namespace armnn; + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { Compute::CpuRef }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Layers are added to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float16); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float16); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +TEST_CASE("ReduceFp32ToFp16EnabledBackendNoFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { "MockRef" }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Do not add layers to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + // Checks that data type is FP32 + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float32); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); +} + +TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendHasFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { Compute::CpuRef, "MockRef" }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Layers are added to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float16); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float16); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendNoFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { "MockRef", Compute::CpuRef }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Do not add layers to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + // Checks that data type is FP32 + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float32); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float32); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); +} +#endif // ARMNNREF_ENABLED + } \ No newline at end of file -- cgit v1.2.1