From b4c493430567bff25e61e9df0dbab554c29f635d Mon Sep 17 00:00:00 2001 From: Ryan OShea Date: Tue, 25 Jul 2023 14:28:27 +0100 Subject: IVGCVSW-7880 Add check for FP16 backend support * Check if preferred backends have FP16 support before enable fp16-turbo-mode * Unit tests * Replaced global gpuAccCapabilities with getter method construction * Replaced deprecated function call in SL shim Signed-off-by: Narumol Prangnawarat Signed-off-by: Ryan OShea Change-Id: If29b62b330ca8987de8acf6408db11daf25ca0b5 --- shim/sl/canonical/ArmnnDevice.cpp | 3 +- src/armnn/Network.cpp | 77 +++++- src/armnn/Network.hpp | 2 + .../Fp32NetworkToFp16ConverterTests.cpp | 288 ++++++++++++++++++++- src/backends/backendsCommon/common.mk | 1 + src/backends/cl/ClBackend.cpp | 17 ++ src/backends/cl/ClBackend.hpp | 18 +- src/backends/cl/test/ClOptimizedNetworkTests.cpp | 30 ++- src/backends/neon/NeonBackend.hpp | 5 +- src/backends/reference/RefBackend.hpp | 5 +- .../ExecuteNetworkProgramOptions.cpp | 4 +- 11 files changed, 408 insertions(+), 42 deletions(-) diff --git a/shim/sl/canonical/ArmnnDevice.cpp b/shim/sl/canonical/ArmnnDevice.cpp index 71135cb13e..4b9fa18a00 100644 --- a/shim/sl/canonical/ArmnnDevice.cpp +++ b/shim/sl/canonical/ArmnnDevice.cpp @@ -125,7 +125,8 @@ ArmnnDevice::ArmnnDevice(DriverOptions options) else { if (m_Options.isAsyncModelExecutionEnabled() && - armnn::HasCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false}, backend)) + armnn::HasMatchingCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false}, + backend)) { VLOG(DRIVER) << "ArmnnDevice: ArmNN does not support AsyncExecution with the following backend: " << backend.Get().c_str(); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 22d2c78c65..3074c1ffe9 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace armnn { @@ -837,14 +838,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); + std::string currentReasonIfUnsupported; // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs // to be FP32 and inserting convert layers around the FP32 operator. - bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported); + bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported); + reasonIfUnsupported += currentReasonIfUnsupported; + // This string matches the error message that is produced by acl when attempting to run FP16 kernels on + // a cpu or build that does not have fp16 support. We use this to check if we should add + // conversion layers or not. std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; - if (!isLayerSupported || - reasonIfUnsupported.find(checkStr) != std::string::npos) + if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos) { if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { @@ -994,6 +999,51 @@ inline std::vector GetLayerInOutDatatype(const Layer* layer) return {dataTypeIn, dataTypeOut}; } +bool CheckFp16Support(BackendsMap& backends, + const std::vector& availablePreferredBackends) +{ + bool hasFp16 = false; + // Check if the first preferred backend has FP16 support + auto firstBackend = availablePreferredBackends[0]; + auto backendObjPtr = backends.find(firstBackend)->second.get(); + ARMNN_ASSERT(backendObjPtr); + auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true}; + auto backendCapabilities = backendObjPtr->GetCapabilities(); + + if (HasMatchingCapability(hasFp16Capability, backendCapabilities)) + { + // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled. + hasFp16 = true; + ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend + << ", has FP16 support."; + } + else + { + ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend + << ", does not have FP16 support. " + << "The FP16 turbo mode option will be disable. It will run using FP32."; + } + + // Check if the rest of the available preferred backends have FP16 support + for (size_t i = 1; i < availablePreferredBackends.size(); ++i) + { + auto backend = availablePreferredBackends[i]; + backendObjPtr = backends.find(backend)->second.get(); + backendCapabilities = backendObjPtr->GetCapabilities(); + if (!HasMatchingCapability(hasFp16Capability, backendCapabilities)) + { + ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. " + << "It will run using FP32 when falling back to this backend."; + } + else + { + ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support."; + } + } + + return hasFp16; +} + // Refactor to allow passing the IConnectableLayer* rather than Layer Iterator // on Graph and SubgraphView which are different types. void AssignBackendsIConnectable(OptimizedNetworkImpl* optNetObjPtr, @@ -1913,16 +1963,10 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FuseBatchNormIntoDepthwiseConvolution2DFloat16())); - if (options.GetReduceFp32ToFp16()) - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16"); - Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter())); - Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); - } - // Initialize backend settings BackendSettings backendSettings(backendPreferences, deviceSpec); - if (backendSettings.GetAvailablePreferredBackends().empty()) + auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends(); + if (availablePreferredBackends.empty()) { std::stringstream failureMsg; failureMsg << "None of the preferred backends " << backendPreferences @@ -1935,6 +1979,17 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, TensorHandleFactoryRegistry tensorHandleFactoryRegistry; BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings); + if (options.GetReduceFp32ToFp16()) + { + bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends); + if (hasFp16) + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16"); + Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter())); + Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); + } + } + // Assign an available backend to each layer Graph::Iterator firstLayer = optGraph.begin(); Graph::Iterator lastLayer = optGraph.end(); diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index a84a0e9ba4..34549248bc 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -350,6 +350,8 @@ struct OptimizerOptionsOpaqueImpl } /// Reduces all Fp32 operators in the model to Fp16 for faster processing. + /// If the first preferred backend does not have Fp16 support, this option will be disabled. + /// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value. /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16. /// The overhead of these conversions can lead to a slower overall performance if too many conversions are diff --git a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp index 0a4a4fafde..90f94bccbf 100644 --- a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp +++ b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -7,6 +7,12 @@ #include +#if defined(ARMNNREF_ENABLED) +#include +#include +#include +#endif + #include TEST_SUITE("Optimizer") @@ -50,4 +56,284 @@ TEST_CASE("Fp32NetworkToFp16OptimizationTest") CHECK(floor->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); } +#if defined(ARMNNREF_ENABLED) +TEST_CASE("ReduceFp32ToFp16EnabledBackendHasFp16SupportTest") +{ + using namespace armnn; + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { Compute::CpuRef }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Layers are added to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float16); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float16); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +TEST_CASE("ReduceFp32ToFp16EnabledBackendNoFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { "MockRef" }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Do not add layers to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + // Checks that data type is FP32 + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float32); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); +} + +TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendHasFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { Compute::CpuRef, "MockRef" }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Layers are added to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float16); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float16); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float16); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); +} + +TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendNoFp16SupportTest") +{ + using namespace armnn; + + // Create a mock backend without FP16 support + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + CHECK((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + FAIL(message); + } + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + std::vector backends = { "MockRef", Compute::CpuRef }; + + // optimize the network + OptimizerOptionsOpaque optOptions; + optOptions.SetReduceFp32ToFp16(true); + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // Do not add layers to convert the inputs to FP16 + CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + // Checks that data type is FP32 + Layer* const addLayer = GetFirstLayerWithName(graph, "add"); + Layer* const subLayer = GetFirstLayerWithName(graph, "sub"); + + CHECK(addLayer->GetDataType() == armnn::DataType::Float32); + CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + CHECK(subLayer->GetDataType() == armnn::DataType::Float32); + CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType() + == armnn::DataType::Float32); + CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32); +} +#endif // ARMNNREF_ENABLED + } \ No newline at end of file diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 2c41285615..303f8aca9c 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -115,5 +115,6 @@ COMMON_TEST_SOURCES := \ ifeq ($(ARMNN_REF_ENABLED),1) COMMON_TEST_SOURCES += \ + test/mockBackend/MockImportBackend.cpp \ test/WorkloadDataValidation.cpp endif # ARMNN_REF_ENABLED == 1 diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index b018654288..532892e0d0 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -273,6 +273,23 @@ std::unique_ptr ClBackend::GetDefaultAllocator() const return std::make_unique(); } +BackendCapabilities ClBackend::GetCapabilities() const +{ + // add new capabilities here.. + return BackendCapabilities ("GpuAcc", + { + {"NonConstWeights", true}, + {"AsyncExecution", false}, + {"ProtectedContentAllocation", true}, + {"ConstantTensorsAsInputs", true}, + {"PreImportIOTensors", false}, + {"ExternallyManagedMemory", true}, + {"MultiAxisPacking", false}, + {"SingleAxisPacking", true}, + {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()} + }); +} + OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, const ModelOptions& modelOptions) const { diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index d276eacbe1..1d2a866f23 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -21,19 +21,6 @@ namespace armnn { -// add new capabilities here.. -const BackendCapabilities gpuAccCapabilities("GpuAcc", - { - {"NonConstWeights", true}, - {"AsyncExecution", false}, - {"ProtectedContentAllocation", true}, - {"ConstantTensorsAsInputs", true}, - {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", true}, - {"MultiAxisPacking", false}, - {"SingleAxisPacking", true} - }); - class ClBackend : public IBackendInternal { public: @@ -90,10 +77,7 @@ public: std::unique_ptr GetDefaultAllocator() const override; - BackendCapabilities GetCapabilities() const override - { - return gpuAccCapabilities; - }; + BackendCapabilities GetCapabilities() const override; virtual bool UseCustomMemoryAllocator(std::shared_ptr allocator, armnn::Optional errMsg) override diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp index 3d4341df18..9d721c08ed 100644 --- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp +++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp @@ -11,6 +11,7 @@ #include #include +#include #include @@ -94,15 +95,28 @@ TEST_CASE("FP16TurboModeTestOnGpuAcc") const armnn::Graph& graph = GetGraphForTesting(optimizedNet.get()); - // Tests that all layers are present in the graph. - CHECK(graph.GetNumLayers() == 5); + if(arm_compute::CLKernelLibrary::get().fp16_supported()) + { + // Tests that all layers are present in the graph. + CHECK(graph.GetNumLayers() == 5); + + // Tests that the vertices exist and have correct names. + CHECK(GraphHasNamedLayer(graph, "input layer")); + CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); + CHECK(GraphHasNamedLayer(graph, "activation layer")); + CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); + CHECK(GraphHasNamedLayer(graph, "output layer")); + } + else + { + // Tests that all layers except for conversion layers are present in the graph. + CHECK(graph.GetNumLayers() == 3); - // Tests that the vertices exist and have correct names. - CHECK(GraphHasNamedLayer(graph, "input layer")); - CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); - CHECK(GraphHasNamedLayer(graph, "activation layer")); - CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); - CHECK(GraphHasNamedLayer(graph, "output layer")); + // Tests that the vertices exist and have correct names. + CHECK(GraphHasNamedLayer(graph, "input layer")); + CHECK(GraphHasNamedLayer(graph, "activation layer")); + CHECK(GraphHasNamedLayer(graph, "output layer")); + } } TEST_CASE("FastMathEnabledTestOnGpuAcc") diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 3364e4be41..ef5258b657 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -6,6 +6,8 @@ #include +#include + namespace armnn { @@ -19,7 +21,8 @@ const BackendCapabilities cpuAccCapabilities("CpuAcc", {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, - {"SingleAxisPacking", true} + {"SingleAxisPacking", true}, + {"HasFp16", arm_compute::CPUInfo::get().has_fp16()} }); diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index ecbe4d5ba9..7d355ea9ca 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -18,7 +18,8 @@ const BackendCapabilities cpuRefCapabilities("CpuRef", {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, - {"SingleAxisPacking", true} + {"SingleAxisPacking", true}, + {"HasFp16", true} }); const std::set oldCpuRefCapabilities { diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index 7b55b28b8b..4066cdcadb 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -410,7 +410,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("fp16-turbo-mode", "If this option is enabled, FP32 layers, " - "weights and biases will be converted to FP16 where the backend supports it", + "weights and biases will be converted to FP16 where the backend supports it. " + "If the first preferred backend does not have FP16 support, this option will be disabled. " + "If the value of converted FP16 is infinity, round to the closest finite FP16 value.", cxxopts::value(m_ExNetParams.m_EnableFp16TurboMode) ->default_value("false")->implicit_value("true")) -- cgit v1.2.1