aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2023-07-25 14:28:27 +0100
committerryan.oshea3 <ryan.oshea3@arm.com>2023-08-02 14:25:26 +0000
commitb4c493430567bff25e61e9df0dbab554c29f635d (patch)
tree9d9fa320c25f5aa9f89aafb8b9b8b6c071990161
parent083802d04b7a4499c4daba860c57e4f152f9c060 (diff)
downloadarmnn-b4c493430567bff25e61e9df0dbab554c29f635d.tar.gz
IVGCVSW-7880 Add check for FP16 backend support
* Check if preferred backends have FP16 support before enable fp16-turbo-mode * Unit tests * Replaced global gpuAccCapabilities with getter method construction * Replaced deprecated function call in SL shim Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: If29b62b330ca8987de8acf6408db11daf25ca0b5
-rw-r--r--shim/sl/canonical/ArmnnDevice.cpp3
-rw-r--r--src/armnn/Network.cpp77
-rw-r--r--src/armnn/Network.hpp2
-rw-r--r--src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp288
-rw-r--r--src/backends/backendsCommon/common.mk1
-rw-r--r--src/backends/cl/ClBackend.cpp17
-rw-r--r--src/backends/cl/ClBackend.hpp18
-rw-r--r--src/backends/cl/test/ClOptimizedNetworkTests.cpp30
-rw-r--r--src/backends/neon/NeonBackend.hpp5
-rw-r--r--src/backends/reference/RefBackend.hpp5
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp4
11 files changed, 408 insertions, 42 deletions
diff --git a/shim/sl/canonical/ArmnnDevice.cpp b/shim/sl/canonical/ArmnnDevice.cpp
index 71135cb13e..4b9fa18a00 100644
--- a/shim/sl/canonical/ArmnnDevice.cpp
+++ b/shim/sl/canonical/ArmnnDevice.cpp
@@ -125,7 +125,8 @@ ArmnnDevice::ArmnnDevice(DriverOptions options)
else
{
if (m_Options.isAsyncModelExecutionEnabled() &&
- armnn::HasCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false}, backend))
+ armnn::HasMatchingCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false},
+ backend))
{
VLOG(DRIVER) << "ArmnnDevice: ArmNN does not support AsyncExecution with the following backend: "
<< backend.Get().c_str();
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 22d2c78c65..3074c1ffe9 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -37,6 +37,7 @@
#include <algorithm>
#include <memory>
#include <vector>
+#include <armnn/ArmNN.hpp>
namespace armnn
{
@@ -837,14 +838,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
// need to set the compute device on the layer
// before we can check if it is supported
layer->SetBackendId(backend);
+ std::string currentReasonIfUnsupported;
// To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
// is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
// to be FP32 and inserting convert layers around the FP32 operator.
- bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
+ bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
+ reasonIfUnsupported += currentReasonIfUnsupported;
+ // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
+ // a cpu or build that does not have fp16 support. We use this to check if we should add
+ // conversion layers or not.
std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
- if (!isLayerSupported ||
- reasonIfUnsupported.find(checkStr) != std::string::npos)
+ if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
{
if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
{
@@ -994,6 +999,51 @@ inline std::vector<DataType> GetLayerInOutDatatype(const Layer* layer)
return {dataTypeIn, dataTypeOut};
}
+bool CheckFp16Support(BackendsMap& backends,
+ const std::vector<BackendId>& availablePreferredBackends)
+{
+ bool hasFp16 = false;
+ // Check if the first preferred backend has FP16 support
+ auto firstBackend = availablePreferredBackends[0];
+ auto backendObjPtr = backends.find(firstBackend)->second.get();
+ ARMNN_ASSERT(backendObjPtr);
+ auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
+ auto backendCapabilities = backendObjPtr->GetCapabilities();
+
+ if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
+ {
+ // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
+ hasFp16 = true;
+ ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
+ << ", has FP16 support.";
+ }
+ else
+ {
+ ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
+ << ", does not have FP16 support. "
+ << "The FP16 turbo mode option will be disable. It will run using FP32.";
+ }
+
+ // Check if the rest of the available preferred backends have FP16 support
+ for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
+ {
+ auto backend = availablePreferredBackends[i];
+ backendObjPtr = backends.find(backend)->second.get();
+ backendCapabilities = backendObjPtr->GetCapabilities();
+ if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
+ {
+ ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. "
+ << "It will run using FP32 when falling back to this backend.";
+ }
+ else
+ {
+ ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support.";
+ }
+ }
+
+ return hasFp16;
+}
+
// Refactor to allow passing the IConnectableLayer* rather than Layer Iterator
// on Graph and SubgraphView which are different types.
void AssignBackendsIConnectable(OptimizedNetworkImpl* optNetObjPtr,
@@ -1913,16 +1963,10 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
- if (options.GetReduceFp32ToFp16())
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
- Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
- Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
- }
-
// Initialize backend settings
BackendSettings backendSettings(backendPreferences, deviceSpec);
- if (backendSettings.GetAvailablePreferredBackends().empty())
+ auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
+ if (availablePreferredBackends.empty())
{
std::stringstream failureMsg;
failureMsg << "None of the preferred backends " << backendPreferences
@@ -1935,6 +1979,17 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
+ if (options.GetReduceFp32ToFp16())
+ {
+ bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
+ if (hasFp16)
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
+ Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
+ Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
+ }
+ }
+
// Assign an available backend to each layer
Graph::Iterator firstLayer = optGraph.begin();
Graph::Iterator lastLayer = optGraph.end();
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index a84a0e9ba4..34549248bc 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -350,6 +350,8 @@ struct OptimizerOptionsOpaqueImpl
}
/// Reduces all Fp32 operators in the model to Fp16 for faster processing.
+ /// If the first preferred backend does not have Fp16 support, this option will be disabled.
+ /// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value.
/// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
/// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16.
/// The overhead of these conversions can lead to a slower overall performance if too many conversions are
diff --git a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
index 0a4a4fafde..90f94bccbf 100644
--- a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
+++ b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -7,6 +7,12 @@
#include <Optimizer.hpp>
+#if defined(ARMNNREF_ENABLED)
+#include <CommonTestUtils.hpp>
+#include <GraphUtils.hpp>
+#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
+#endif
+
#include <doctest/doctest.h>
TEST_SUITE("Optimizer")
@@ -50,4 +56,284 @@ TEST_CASE("Fp32NetworkToFp16OptimizationTest")
CHECK(floor->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
}
+#if defined(ARMNNREF_ENABLED)
+TEST_CASE("ReduceFp32ToFp16EnabledBackendHasFp16SupportTest")
+{
+ using namespace armnn;
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { Compute::CpuRef };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Layers are added to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledBackendNoFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { "MockRef" };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Do not add layers to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Checks that data type is FP32
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendHasFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { Compute::CpuRef, "MockRef" };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Layers are added to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendNoFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { "MockRef", Compute::CpuRef };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Do not add layers to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Checks that data type is FP32
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+}
+#endif // ARMNNREF_ENABLED
+
} \ No newline at end of file
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 2c41285615..303f8aca9c 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -115,5 +115,6 @@ COMMON_TEST_SOURCES := \
ifeq ($(ARMNN_REF_ENABLED),1)
COMMON_TEST_SOURCES += \
+ test/mockBackend/MockImportBackend.cpp \
test/WorkloadDataValidation.cpp
endif # ARMNN_REF_ENABLED == 1
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index b018654288..532892e0d0 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -273,6 +273,23 @@ std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
return std::make_unique<ClBackendDefaultAllocator>();
}
+BackendCapabilities ClBackend::GetCapabilities() const
+{
+ // add new capabilities here..
+ return BackendCapabilities ("GpuAcc",
+ {
+ {"NonConstWeights", true},
+ {"AsyncExecution", false},
+ {"ProtectedContentAllocation", true},
+ {"ConstantTensorsAsInputs", true},
+ {"PreImportIOTensors", false},
+ {"ExternallyManagedMemory", true},
+ {"MultiAxisPacking", false},
+ {"SingleAxisPacking", true},
+ {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
+ });
+}
+
OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
const ModelOptions& modelOptions) const
{
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index d276eacbe1..1d2a866f23 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -21,19 +21,6 @@
namespace armnn
{
-// add new capabilities here..
-const BackendCapabilities gpuAccCapabilities("GpuAcc",
- {
- {"NonConstWeights", true},
- {"AsyncExecution", false},
- {"ProtectedContentAllocation", true},
- {"ConstantTensorsAsInputs", true},
- {"PreImportIOTensors", false},
- {"ExternallyManagedMemory", true},
- {"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
- });
-
class ClBackend : public IBackendInternal
{
public:
@@ -90,10 +77,7 @@ public:
std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
- BackendCapabilities GetCapabilities() const override
- {
- return gpuAccCapabilities;
- };
+ BackendCapabilities GetCapabilities() const override;
virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
armnn::Optional<std::string&> errMsg) override
diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
index 3d4341df18..9d721c08ed 100644
--- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp
+++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
@@ -11,6 +11,7 @@
#include <cl/ClWorkloadFactory.hpp>
#include <cl/ClBackendContext.hpp>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <armnnUtils/Filesystem.hpp>
@@ -94,15 +95,28 @@ TEST_CASE("FP16TurboModeTestOnGpuAcc")
const armnn::Graph& graph = GetGraphForTesting(optimizedNet.get());
- // Tests that all layers are present in the graph.
- CHECK(graph.GetNumLayers() == 5);
+ if(arm_compute::CLKernelLibrary::get().fp16_supported())
+ {
+ // Tests that all layers are present in the graph.
+ CHECK(graph.GetNumLayers() == 5);
+
+ // Tests that the vertices exist and have correct names.
+ CHECK(GraphHasNamedLayer(graph, "input layer"));
+ CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer"));
+ CHECK(GraphHasNamedLayer(graph, "activation layer"));
+ CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer"));
+ CHECK(GraphHasNamedLayer(graph, "output layer"));
+ }
+ else
+ {
+ // Tests that all layers except for conversion layers are present in the graph.
+ CHECK(graph.GetNumLayers() == 3);
- // Tests that the vertices exist and have correct names.
- CHECK(GraphHasNamedLayer(graph, "input layer"));
- CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer"));
- CHECK(GraphHasNamedLayer(graph, "activation layer"));
- CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer"));
- CHECK(GraphHasNamedLayer(graph, "output layer"));
+ // Tests that the vertices exist and have correct names.
+ CHECK(GraphHasNamedLayer(graph, "input layer"));
+ CHECK(GraphHasNamedLayer(graph, "activation layer"));
+ CHECK(GraphHasNamedLayer(graph, "output layer"));
+ }
}
TEST_CASE("FastMathEnabledTestOnGpuAcc")
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index 3364e4be41..ef5258b657 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -6,6 +6,8 @@
#include <armnn/backends/IBackendInternal.hpp>
+#include <arm_compute/core/CPP/CPPTypes.h>
+
namespace armnn
{
@@ -19,7 +21,8 @@ const BackendCapabilities cpuAccCapabilities("CpuAcc",
{"PreImportIOTensors", false},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
+ {"SingleAxisPacking", true},
+ {"HasFp16", arm_compute::CPUInfo::get().has_fp16()}
});
diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp
index ecbe4d5ba9..7d355ea9ca 100644
--- a/src/backends/reference/RefBackend.hpp
+++ b/src/backends/reference/RefBackend.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -18,7 +18,8 @@ const BackendCapabilities cpuRefCapabilities("CpuRef",
{"PreImportIOTensors", true},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
+ {"SingleAxisPacking", true},
+ {"HasFp16", true}
});
const std::set<armnn::BackendCapability> oldCpuRefCapabilities {
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index 7b55b28b8b..4066cdcadb 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -410,7 +410,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
("fp16-turbo-mode",
"If this option is enabled, FP32 layers, "
- "weights and biases will be converted to FP16 where the backend supports it",
+ "weights and biases will be converted to FP16 where the backend supports it. "
+ "If the first preferred backend does not have FP16 support, this option will be disabled. "
+ "If the value of converted FP16 is infinity, round to the closest finite FP16 value.",
cxxopts::value<bool>(m_ExNetParams.m_EnableFp16TurboMode)
->default_value("false")->implicit_value("true"))