diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-20 15:01:01 +0000 |
---|---|---|
committer | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-20 19:09:07 +0000 |
commit | bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e (patch) | |
tree | 5187f34326414e7dfea80e0f4efaae5cbeb05d1d /src/armnn/Network.cpp | |
parent | cf2ad554502830804e991aca2e5b0741623119b2 (diff) | |
download | armnn-bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e.tar.gz |
IVGCVSW-4520 Implement BFloat16 Optimizer
* Add ReduceFp32ToBf16 to OptimizerOptions
* Add ConvertFp32NetworkToBf16
* Add utility functions to insert conversion layers
* Add constant conversion BF16 <-> FP32
* Unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Iaff77e20c721400b052cb37eb9ef6fe16d7abaff
Diffstat (limited to 'src/armnn/Network.cpp')
-rw-r--r-- | src/armnn/Network.cpp | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 7a6fa8f78c..5f7719730b 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -248,6 +248,86 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, return result; } } + else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16) + { + if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) + && layer->GetType() != LayerType::ConvertFp32ToBf16 + && layer->GetType() != LayerType::ConvertBf16ToFp32) + { + // Insert BF16 -> FP32 conversion layer before current layer + std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers; + if (dataTypeIn == DataType::BFloat16) + { + convertBf16ToFp32Layers = + InsertConvertBf16ToFp32LayersBefore(graph, *layer); + } + + // Insert FP32 -> BF16 conversion layer after current layer + std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers; + if (dataTypeOut == DataType::BFloat16) + { + convertFp32ToBf16Layers = + InsertConvertFp32ToBf16LayersAfter(graph, *layer); + } + + // Assign a supported backend to the newly introduced conversion layers + auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend) + { + bool supportedBackendFound = false; + std::string reasonIfUnsupported; + + // Try preferred backend first + layer->SetBackendId(preferredBackend); + if (IWorkloadFactory::IsLayerSupported(*layer, + EmptyOptional(), + reasonIfUnsupported)) + { + supportedBackendFound = true; + } + else + { + for (const auto& backend : availablePreferredBackends) + { + // Skip preferred backend (we already determined that it is not supported) + if (backend == preferredBackend) + { + continue; + } + + layer->SetBackendId(backend); + if (IWorkloadFactory::IsLayerSupported(*layer, + EmptyOptional(), + reasonIfUnsupported)) + { + supportedBackendFound = true; + break; + } + } + } + + return supportedBackendFound; + }; + + for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnError(convertLayer); + } + } + + for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnError(convertLayer); + } + } + + return result; + } + } + std::stringstream warningMsg; warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << " is not supported on requested backend " << layer->GetBackendId().Get() @@ -898,6 +978,11 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); } + if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16) + { + throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); + } + const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); @@ -934,6 +1019,13 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); } + // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16 + if (options.m_ReduceFp32ToBf16) + { + Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter())); + Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToBFloat())); + } + // Initialize backend settings BackendSettings backendSettings(backendPreferences, deviceSpec); if (backendSettings.GetAvailablePreferredBackends().empty()) |