From 31441595009182c985dacbedc70c41ee6664d070 Mon Sep 17 00:00:00 2001 From: Ryan OShea Date: Mon, 7 Nov 2022 16:20:48 +0000 Subject: IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers - Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c --- src/armnn/Network.cpp | 133 +++----------------------------------------------- 1 file changed, 6 insertions(+), 127 deletions(-) (limited to 'src/armnn/Network.cpp') diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 9d00a69518..6d3058c670 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -604,30 +604,6 @@ bool CheckScaleSetOnQuantizedType(Layer* layer, Optional -LayerT* ConvertBf16ToFp32Weight(Layer* l) -{ - LayerT* layer = PolymorphicDowncast(l); - if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) - && layer->m_Weight) - { - const TensorInfo& info = layer->m_Weight->GetTensorInfo(); - - if (info.GetDataType() == DataType::BFloat16) - { - std::vector newValues(info.GetNumElements()); - - armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32( - layer->m_Weight->template GetConstTensor(), info.GetNumElements(), newValues.data()); - - TensorInfo newInfo(info.GetShape(), DataType::Float32); - ConstTensor newInput(newInfo, newValues); - layer->m_Weight.reset(new ScopedTensorHandle(newInput)); - } - } - return layer; -} - OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, Graph& graph, Layer* layer, @@ -772,98 +748,6 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, return result; } } - else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16) - { - const auto layerType = layer->GetType(); - if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) - && layerType != LayerType::ConvertFp32ToBf16 - && layerType != LayerType::ConvertBf16ToFp32) - { - bool revertConstantWeightsConversion = RevertConstantWeightsToFP32(layer); - - // Insert BF16 -> FP32 conversion layer before current layer. - // Unless we have reverted Constant Weights Type above. - std::vector convertBf16ToFp32Layers; - if (dataTypeIn == DataType::BFloat16 && dataTypeOut != DataType::BFloat16 - && !revertConstantWeightsConversion) - { - convertBf16ToFp32Layers = - InsertConvertBf16ToFp32LayersBefore(graph, *layer); - if (layer->GetType() == LayerType::Convolution2d) - { - ConvertBf16ToFp32Weight(layer); - } - else if (layer->GetType() == LayerType::FullyConnected) - { - ConvertBf16ToFp32Weight(layer); - } - } - - // Insert FP32 -> BF16 conversion layer after current layer - std::vector convertFp32ToBf16Layers; - if (dataTypeOut == DataType::BFloat16) - { - convertFp32ToBf16Layers = - InsertConvertFp32ToBf16LayersAfter(graph, *layer); - } - - // Assign a supported backend to the newly introduced conversion layers - auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend) - { - bool supportedBackendFound = false; - std::string reasonIfUnsupported; - - // Try preferred backend first - layer->SetBackendId(preferredBackend); - if (IWorkloadFactory::IsLayerSupported(*layer, - EmptyOptional(), - reasonIfUnsupported)) - { - supportedBackendFound = true; - } - else - { - for (const auto& backend : availablePreferredBackends) - { - // Skip preferred backend (we already determined that it is not supported) - if (backend == preferredBackend) - { - continue; - } - - layer->SetBackendId(backend); - if (IWorkloadFactory::IsLayerSupported(*layer, - EmptyOptional(), - reasonIfUnsupported)) - { - supportedBackendFound = true; - break; - } - } - } - - return supportedBackendFound; - }; - - for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers) - { - if (!AssignFirstSupportedBackend(convertLayer, backend)) - { - return ReturnError(convertLayer); - } - } - - for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers) - { - if (!AssignFirstSupportedBackend(convertLayer, backend)) - { - return ReturnError(convertLayer); - } - } - - return result; - } - } std::stringstream warningMsg; warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) @@ -1669,6 +1553,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, throw InvalidArgumentException("Invoked Optimize with no backends specified"); } + if (options.m_ReduceFp32ToBf16) + { + throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization " + "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc."); + } + if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16) { throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); @@ -1745,17 +1635,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); } - // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16 - // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16 - // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16 - // Constant and Fp32ToBf16 layers will also be fused so conversion is no longer needed at inference time - if (options.m_ReduceFp32ToBf16) - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16"); - Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter())); - Optimizer::Pass(optGraph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); - } - // Initialize backend settings BackendSettings backendSettings(backendPreferences, deviceSpec); if (backendSettings.GetAvailablePreferredBackends().empty()) -- cgit v1.2.1