diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2023-07-25 14:28:27 +0100 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2023-08-02 14:25:26 +0000 |
commit | b4c493430567bff25e61e9df0dbab554c29f635d (patch) | |
tree | 9d9fa320c25f5aa9f89aafb8b9b8b6c071990161 /src/backends/cl | |
parent | 083802d04b7a4499c4daba860c57e4f152f9c060 (diff) | |
download | armnn-b4c493430567bff25e61e9df0dbab554c29f635d.tar.gz |
IVGCVSW-7880 Add check for FP16 backend support
* Check if preferred backends have FP16 support before enable fp16-turbo-mode
* Unit tests
* Replaced global gpuAccCapabilities with getter method construction
* Replaced deprecated function call in SL shim
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: If29b62b330ca8987de8acf6408db11daf25ca0b5
Diffstat (limited to 'src/backends/cl')
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 17 | ||||
-rw-r--r-- | src/backends/cl/ClBackend.hpp | 18 | ||||
-rw-r--r-- | src/backends/cl/test/ClOptimizedNetworkTests.cpp | 30 |
3 files changed, 40 insertions, 25 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index b018654288..532892e0d0 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -273,6 +273,23 @@ std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const return std::make_unique<ClBackendDefaultAllocator>(); } +BackendCapabilities ClBackend::GetCapabilities() const +{ + // add new capabilities here.. + return BackendCapabilities ("GpuAcc", + { + {"NonConstWeights", true}, + {"AsyncExecution", false}, + {"ProtectedContentAllocation", true}, + {"ConstantTensorsAsInputs", true}, + {"PreImportIOTensors", false}, + {"ExternallyManagedMemory", true}, + {"MultiAxisPacking", false}, + {"SingleAxisPacking", true}, + {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()} + }); +} + OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, const ModelOptions& modelOptions) const { diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index d276eacbe1..1d2a866f23 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -21,19 +21,6 @@ namespace armnn { -// add new capabilities here.. -const BackendCapabilities gpuAccCapabilities("GpuAcc", - { - {"NonConstWeights", true}, - {"AsyncExecution", false}, - {"ProtectedContentAllocation", true}, - {"ConstantTensorsAsInputs", true}, - {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", true}, - {"MultiAxisPacking", false}, - {"SingleAxisPacking", true} - }); - class ClBackend : public IBackendInternal { public: @@ -90,10 +77,7 @@ public: std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override; - BackendCapabilities GetCapabilities() const override - { - return gpuAccCapabilities; - }; + BackendCapabilities GetCapabilities() const override; virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, armnn::Optional<std::string&> errMsg) override diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp index 3d4341df18..9d721c08ed 100644 --- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp +++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp @@ -11,6 +11,7 @@ #include <cl/ClWorkloadFactory.hpp> #include <cl/ClBackendContext.hpp> +#include <arm_compute/core/CL/CLKernelLibrary.h> #include <armnnUtils/Filesystem.hpp> @@ -94,15 +95,28 @@ TEST_CASE("FP16TurboModeTestOnGpuAcc") const armnn::Graph& graph = GetGraphForTesting(optimizedNet.get()); - // Tests that all layers are present in the graph. - CHECK(graph.GetNumLayers() == 5); + if(arm_compute::CLKernelLibrary::get().fp16_supported()) + { + // Tests that all layers are present in the graph. + CHECK(graph.GetNumLayers() == 5); + + // Tests that the vertices exist and have correct names. + CHECK(GraphHasNamedLayer(graph, "input layer")); + CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); + CHECK(GraphHasNamedLayer(graph, "activation layer")); + CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); + CHECK(GraphHasNamedLayer(graph, "output layer")); + } + else + { + // Tests that all layers except for conversion layers are present in the graph. + CHECK(graph.GetNumLayers() == 3); - // Tests that the vertices exist and have correct names. - CHECK(GraphHasNamedLayer(graph, "input layer")); - CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); - CHECK(GraphHasNamedLayer(graph, "activation layer")); - CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); - CHECK(GraphHasNamedLayer(graph, "output layer")); + // Tests that the vertices exist and have correct names. + CHECK(GraphHasNamedLayer(graph, "input layer")); + CHECK(GraphHasNamedLayer(graph, "activation layer")); + CHECK(GraphHasNamedLayer(graph, "output layer")); + } } TEST_CASE("FastMathEnabledTestOnGpuAcc") |