aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/Network.cpp
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2022-11-07 16:20:48 +0000
committerryan.oshea3 <ryan.oshea3@arm.com>2022-11-16 15:22:50 +0000
commit31441595009182c985dacbedc70c41ee6664d070 (patch)
tree248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/Network.cpp
parentbd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
downloadarmnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/Network.cpp')
-rw-r--r--src/armnn/Network.cpp133
1 files changed, 6 insertions, 127 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 9d00a69518..6d3058c670 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -604,30 +604,6 @@ bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string
return noErrors;
}
-template <typename LayerT>
-LayerT* ConvertBf16ToFp32Weight(Layer* l)
-{
- LayerT* layer = PolymorphicDowncast<LayerT*>(l);
- if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
- && layer->m_Weight)
- {
- const TensorInfo& info = layer->m_Weight->GetTensorInfo();
-
- if (info.GetDataType() == DataType::BFloat16)
- {
- std::vector<float> newValues(info.GetNumElements());
-
- armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
- layer->m_Weight->template GetConstTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
-
- TensorInfo newInfo(info.GetShape(), DataType::Float32);
- ConstTensor newInput(newInfo, newValues);
- layer->m_Weight.reset(new ScopedTensorHandle(newInput));
- }
- }
- return layer;
-}
-
OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
Graph& graph,
Layer* layer,
@@ -772,98 +748,6 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
return result;
}
}
- else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
- {
- const auto layerType = layer->GetType();
- if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
- && layerType != LayerType::ConvertFp32ToBf16
- && layerType != LayerType::ConvertBf16ToFp32)
- {
- bool revertConstantWeightsConversion = RevertConstantWeightsToFP32(layer);
-
- // Insert BF16 -> FP32 conversion layer before current layer.
- // Unless we have reverted Constant Weights Type above.
- std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
- if (dataTypeIn == DataType::BFloat16 && dataTypeOut != DataType::BFloat16
- && !revertConstantWeightsConversion)
- {
- convertBf16ToFp32Layers =
- InsertConvertBf16ToFp32LayersBefore(graph, *layer);
- if (layer->GetType() == LayerType::Convolution2d)
- {
- ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
- }
- else if (layer->GetType() == LayerType::FullyConnected)
- {
- ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
- }
- }
-
- // Insert FP32 -> BF16 conversion layer after current layer
- std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
- if (dataTypeOut == DataType::BFloat16)
- {
- convertFp32ToBf16Layers =
- InsertConvertFp32ToBf16LayersAfter(graph, *layer);
- }
-
- // Assign a supported backend to the newly introduced conversion layers
- auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
- {
- bool supportedBackendFound = false;
- std::string reasonIfUnsupported;
-
- // Try preferred backend first
- layer->SetBackendId(preferredBackend);
- if (IWorkloadFactory::IsLayerSupported(*layer,
- EmptyOptional(),
- reasonIfUnsupported))
- {
- supportedBackendFound = true;
- }
- else
- {
- for (const auto& backend : availablePreferredBackends)
- {
- // Skip preferred backend (we already determined that it is not supported)
- if (backend == preferredBackend)
- {
- continue;
- }
-
- layer->SetBackendId(backend);
- if (IWorkloadFactory::IsLayerSupported(*layer,
- EmptyOptional(),
- reasonIfUnsupported))
- {
- supportedBackendFound = true;
- break;
- }
- }
- }
-
- return supportedBackendFound;
- };
-
- for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
- {
- if (!AssignFirstSupportedBackend(convertLayer, backend))
- {
- return ReturnError(convertLayer);
- }
- }
-
- for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
- {
- if (!AssignFirstSupportedBackend(convertLayer, backend))
- {
- return ReturnError(convertLayer);
- }
- }
-
- return result;
- }
- }
std::stringstream warningMsg;
warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
@@ -1669,6 +1553,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
throw InvalidArgumentException("Invoked Optimize with no backends specified");
}
+ if (options.m_ReduceFp32ToBf16)
+ {
+ throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
+ "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
+ }
+
if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
{
throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
@@ -1745,17 +1635,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
}
- // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
- // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
- // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
- // Constant and Fp32ToBf16 layers will also be fused so conversion is no longer needed at inference time
- if (options.m_ReduceFp32ToBf16)
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16");
- Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
- Optimizer::Pass(optGraph, MakeOptimizations(FuseConversionLayersIntoConstLayers()));
- }
-
// Initialize backend settings
BackendSettings backendSettings(backendPreferences, deviceSpec);
if (backendSettings.GetAvailablePreferredBackends().empty())