aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2022-11-07 16:20:48 +0000
committerryan.oshea3 <ryan.oshea3@arm.com>2022-11-16 15:22:50 +0000
commit31441595009182c985dacbedc70c41ee6664d070 (patch)
tree248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/optimizations
parentbd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
downloadarmnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r--src/armnn/optimizations/All.hpp2
-rw-r--r--src/armnn/optimizations/ConvertConstants.hpp54
-rw-r--r--src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp79
-rw-r--r--src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp89
4 files changed, 0 insertions, 224 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 0421f31973..a11dec9446 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -9,8 +9,6 @@
#include "ConvertConstants.hpp"
#include "ConvertConstDequantisationLayersToConstLayers.hpp"
#include "ConvertConstPermuteLayersToConstLayers.hpp"
-#include "FuseConvertFp32ToBf16IntoConstLayers.hpp"
-#include "ConvertFp32NetworkToBf16.hpp"
#include "ConvertFp32NetworkToFp16.hpp"
#include "FoldPadIntoLayer2d.hpp"
#include "FuseBatchNorm.hpp"
diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp
index 54c14e5c89..7b2f1fd291 100644
--- a/src/armnn/optimizations/ConvertConstants.hpp
+++ b/src/armnn/optimizations/ConvertConstants.hpp
@@ -11,7 +11,6 @@
#include <armnn/backends/TensorHandle.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
-#include <BFloat16.hpp>
#include <Half.hpp>
namespace armnn
@@ -19,27 +18,6 @@ namespace armnn
namespace optimizations
{
-struct BFloat16ToFloat32
-{
- static void Func(std::shared_ptr<ConstTensorHandle>& handle)
- {
- const TensorInfo& info = handle->GetTensorInfo();
-
- if (info.GetDataType() == DataType::BFloat16)
- {
- std::vector<float> newValues(info.GetNumElements());
-
- armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetConstTensor<BFloat16>(),
- info.GetNumElements(),
- newValues.data());
-
- TensorInfo newInfo(info.GetShape(), DataType::Float32, 0.0f, 0, true);
- ConstTensor newInput(newInfo, newValues);
- handle.reset(new ScopedTensorHandle(newInput));
- }
- }
-};
-
struct Float16ToFloat32
{
static void Func(std::shared_ptr<ConstTensorHandle>& handle)
@@ -61,27 +39,6 @@ struct Float16ToFloat32
}
};
-struct Float32ToBFloat16
-{
- static void Func(std::shared_ptr<ConstTensorHandle>& handle)
- {
- const TensorInfo& info = handle->GetTensorInfo();
-
- if (info.GetDataType() == DataType::Float32)
- {
- std::vector<BFloat16> newValues(info.GetNumElements());
-
- armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetConstTensor<float>(),
- info.GetNumElements(),
- newValues.data());
-
- TensorInfo newInfo(info.GetShape(), DataType::BFloat16, 0.0f, 0, true);
- ConstTensor newInput(newInfo, newValues);
- handle.reset(new ScopedTensorHandle(newInput));
- }
- }
-};
-
struct Float32ToFloat16
{
static void Func(std::shared_ptr<ConstTensorHandle>& handle)
@@ -138,17 +95,6 @@ struct IsFloat16Layer
}
};
-struct IsBFloat16Layer
-{
- static bool Test(const Layer& layer)
- {
- return layer.GetDataType() == DataType::BFloat16;
- }
-};
-
-using ConvertConstantsBFloatToFloat = ConvertConstants<BFloat16ToFloat32, IsFloat32Layer>;
-using ConvertConstantsFloatToBFloat = ConvertConstants<Float32ToBFloat16, IsBFloat16Layer>;
-
using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>;
using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>;
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
deleted file mode 100644
index 6c80e740be..0000000000
--- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include "NetworkUtils.hpp"
-#include "Optimization.hpp"
-
-#include <armnn/utility/PolymorphicDowncast.hpp>
-
-namespace armnn
-{
-namespace optimizations
-{
-
-template <typename LayerT>
-inline LayerT* ConvertWeight(Layer* l)
-{
- LayerT* layer = PolymorphicDowncast<LayerT*>(l);
- if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
- && layer->m_Weight)
- {
- const TensorInfo& info = layer->m_Weight->GetTensorInfo();
-
- if (info.GetDataType() == DataType::Float32)
- {
- std::vector<BFloat16> newValues(info.GetNumElements());
-
- armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(
- layer->m_Weight->template GetConstTensor<float>(),
- info.GetNumElements(),
- newValues.data());
-
- TensorInfo newInfo(info);
- newInfo.SetDataType(DataType::BFloat16);
- ConstTensor newInput(newInfo, newValues);
- layer->m_Weight.reset(new ScopedTensorHandle(newInput));
- }
- }
- return layer;
-}
-
-class ConvertFp32NetworkToBf16Impl
-{
-public:
-
- void Run(Graph& graph, Layer& layer) const
- {
- // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer.
- // And also convert weight data type from Float32 to Bfloat16.
- // Do not convert bias data type.
- if (layer.GetType() == LayerType::Convolution2d)
- {
- if (layer.GetDataType() == DataType::Float32)
- {
- InsertConvertFp32ToBf16LayersBefore(graph,layer);
- ConvertWeight<Convolution2dLayer>(&layer);
- }
- }
- else if (layer.GetType() == LayerType::FullyConnected)
- {
- if (layer.GetDataType() == DataType::Float32)
- {
- InsertConvertFp32ToBf16LayersBefore(graph,layer);
- ConvertWeight<FullyConnectedLayer>(&layer);
- }
- }
- }
-
-protected:
- ConvertFp32NetworkToBf16Impl() = default;
- ~ConvertFp32NetworkToBf16Impl() = default;
-};
-
-using Fp32NetworkToBf16Converter = OptimizeForType<Layer, ConvertFp32NetworkToBf16Impl>;
-
-} // namespace optimizations
-} // namespace armnn
diff --git a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp
deleted file mode 100644
index d112010539..0000000000
--- a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include "Optimization.hpp"
-#include <armnnUtils/Permute.hpp>
-#include <ResolveType.hpp>
-
-namespace armnn
-{
-namespace optimizations
-{
-
-class FuseConvertFp32ToBf16IntoConstLayers
-{
-public:
- void Run(Graph& graph, InputSlot& connection) const
- {
- Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
- Layer& child = connection.GetOwningLayer();
-
- ARMNN_ASSERT(base.GetType() == LayerType::Constant);
- ARMNN_ASSERT(child.GetType() == LayerType::ConvertFp32ToBf16);
-
- auto dataType = base.GetDataType();
- switch (dataType)
- {
- case DataType::Float32:
- ReplaceConvertFp32ToBf16Layer<DataType::BFloat16>(
- graph,
- PolymorphicDowncast<ConstantLayer*>(&base),
- PolymorphicDowncast<ConvertFp32ToBf16Layer*>(&child));
- break;
- default:
- throw InvalidArgumentException(GetDataTypeName(dataType) +
- std::string(" Constant Layer cannot be fused into ") +
- GetDataTypeName(child.GetDataType()) +
- std::string(" conversion layer."));
- }
- }
-protected:
- FuseConvertFp32ToBf16IntoConstLayers() = default;
- ~FuseConvertFp32ToBf16IntoConstLayers() = default;
-private:
- template<armnn::DataType ArmnnType,
- typename T = armnn::ResolveType<ArmnnType>>
- static void ReplaceConvertFp32ToBf16Layer(Graph& graph,
- ConstantLayer* constantLayer,
- ConvertFp32ToBf16Layer* convertFp32ToBf16layer)
- {
- IgnoreUnused(graph);
- /**
- * This optimisation is to find situations where a constant set of inputs is being provided to a
- * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on
- * every inference, instead we want to Convert them once and store them in a Const layer to be
- * used everytime as they will not change.
- */
- TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo();
- std::vector<T> newValues(outputConvertFp32ToBf16Info.GetNumElements());
-
- armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(
- constantLayer->m_LayerOutput->GetConstTensor<float>(),
- outputConvertFp32ToBf16Info.GetNumElements(),
- newValues.data());
- TensorInfo newInfo = outputConvertFp32ToBf16Info;
- newInfo.SetConstant(true);
- ConstTensor newInput(newInfo, newValues);
-
- constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
-
- // Moves connections in convertFp32ToBf16layer output slot to the constant layer.
- // ConvertFp32ToBf16layer layer will be removed if left unconnected.
- convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
-
- // Updating the output tensor
- constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
- ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
- }
-};
-
-using FuseConversionLayersIntoConstLayers = OptimizeForConnection<ConstantLayer,
- ConvertFp32ToBf16Layer,
- FuseConvertFp32ToBf16IntoConstLayers>;
-
-} // namespace optimizations
-} // namespace armnn \ No newline at end of file