plain/22.11/_network_utils_8cpp_source.xhtml

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "NetworkUtils.hpp"

 #include <armnnUtils/FloatingPointConverter.hpp>
 #include <BFloat16.hpp>
 #include "SubgraphViewSelector.hpp"

 #include <armnn/Exceptions.hpp>
 #include <armnn/BackendRegistry.hpp>

 namespace armnn
 {

 namespace
 {

 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
 {
     const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
     TensorInfo newTensorInfo(origTensorInfo);
     newTensorInfo.SetDataType(DataType::Float32);
     outputSlot.SetTensorInfo(newTensorInfo);
 }

 void ChangeOutputBf16ToFp32(Layer& layer)
 {
     for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
     {
         if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
         {
             UpdateOutputSlotToFp32(*outputSlot);
         }
     }
 }

 void ChangeOutputFp16ToFp32(Layer& layer)
 {
     for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
     {
         if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
         {
             UpdateOutputSlotToFp32(*outputSlot);
         }
     }
 }

 } // anonymous namespace

 std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
                                                                          Layer& layer,
                                                                          bool expectCorrectInputType)
 {
     std::vector<ConvertBf16ToFp32Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());

     // Insert a ConvertBf16ToFp32Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
         if (expectCorrectInputType)
         {
             // Only insert ConvertBf16ToFp32Layer before BF16 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
         }

         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertBf16ToFp32Layer* convertLayer =
                 graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());

             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float32);

             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

             convertLayers.emplace_back(convertLayer);
         }
     }

     return convertLayers;
 }

 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
                                                                          Layer& layer,
                                                                          bool expectCorrectInputType)
 {
     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());

     // Insert a ConvertFp32ToBf16Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;

         if ((layer.GetType() == LayerType::Convolution2d ||
              layer.GetType() == LayerType::FullyConnected ||
              layer.GetType() == LayerType::DepthwiseConvolution2d)
                 && inputSlot->GetSlotIndex() == 2)
         {
             // Refrain from reducing bias to Bf16
             continue;
         }
         if (expectCorrectInputType)
         {
             // Only insert ConvertFp32ToBf16Layer before FP32 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
         }

         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertFp32ToBf16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());

             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::BFloat16);

             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

             convertLayers.emplace_back(convertLayer);
         }
     }

     return convertLayers;
 }

 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
                                                                          Layer& layer,
                                                                          bool expectCorrectInputType)
 {
     std::vector<ConvertFp16ToFp32Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());

     // Insert a ConvertFp16ToFp32Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
         if (expectCorrectInputType)
         {
             // Only insert ConvertFp16ToFp32Layer before FP16 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
         }

         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertFp16ToFp32Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());

             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float32);

             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

             convertLayers.emplace_back(convertLayer);
         }
     }

     return convertLayers;
 }

 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
 {
     const unsigned int numOutputSlots = layer.GetNumOutputSlots();

     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
     convertLayers.reserve(numOutputSlots);

     // Update Bf16 output slots to FP32 on current layer
     ChangeOutputBf16ToFp32(layer);

     // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
     {
         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
         {
             const std::string name =
                 std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
             ConvertFp32ToBf16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());

             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::BFloat16);

             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

             convertLayers.emplace_back(convertLayer);
         }
     }

     return convertLayers;
 }

 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
 {
     const unsigned int numOutputSlots = layer.GetNumOutputSlots();

     std::vector<ConvertFp32ToFp16Layer*> convertLayers;
     convertLayers.reserve(numOutputSlots);

     // Update FP16 output slots to FP32 on current layer
     ChangeOutputFp16ToFp32(layer);

     // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
     {
         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
         {
             const std::string name =
                 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
             ConvertFp32ToFp16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());

             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float16);

             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

             convertLayers.emplace_back(convertLayer);
         }
     }

     return convertLayers;
 }

 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile)
 {
     std::vector<DebugLayer*> debugLayers;
     debugLayers.reserve(layer.GetNumOutputSlots());

     // Connect a DebugLayer to each output slot of the layer
     uint32_t outputSlotIdx = 0;
     for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
     {
         const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
             std::to_string(outputSlotIdx);

         DebugLayer* debugLayer =
             graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);

         // Sets output tensor info for the debug layer.
         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
         TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();

         debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);

         // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
         debugLayer->SetBackendId(Compute::CpuRef);

         debugLayers.emplace_back(debugLayer);

         ++outputSlotIdx;
     }

     return debugLayers;
 }

 bool RevertConstantWeightsToFP32(Layer* layer)
 {
     if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
     {
         // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected
         // This prevents a conversion layer being added in during backend assignment which blocks
         // the RedirectMembersToConstantInputs backward compatibility workaround/optimization.
         auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo();

         if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16)
         {
             std::vector<float> newValues(constantLayerInfo.GetNumElements());

             auto weightLayer = PolymorphicDowncast<ConstantLayer*>(
                     &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer());
             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
                     weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(),
                     constantLayerInfo.GetNumElements(),
                     newValues.data());

             TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32);
             newInfo.SetConstant(true);
             ConstTensor newInput(newInfo, newValues);
             weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
             weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo);

             // Connect Conv2d/FullyConnected to InputLayer directly leaving out
             // the ConversionLayer to be cleaned up later
             auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer();
             auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection();

             auto& conversionLayerOutputSlot =
                     layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0);
             auto& conversionLayerInputSlot =
                     layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0);
             actualInputOutputSlot->Disconnect(conversionLayerInputSlot);
             conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0));

             actualInputOutputSlot->Connect(layer->GetInputSlot(0));

             return true;
         }
     }
     return false;
 }

 } // namespace armnn
armnn::Layer::EndInputSlots
std::vector< InputSlot >::iterator EndInputSlots()
Definition: Layer.hpp:250

armnn::InsertConvertFp32ToFp16LayersAfter
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
Definition: NetworkUtils.cpp:212

armnn::InsertConvertFp16ToFp32LayersBefore
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
Definition: NetworkUtils.cpp:140

armnn::Layer::GetNumInputSlots
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:321

armnn::Compute::CpuRef
CPU Execution: Reference C++ kernels.

FloatingPointConverter.hpp

armnn::TensorInfo
Definition: Tensor.hpp:152

armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32
static void ConvertBFloat16ToFloat32(const void *srcBFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:61

armnn::BFloat16
Definition: BFloat16.hpp:15

armnn::ConvertFp16ToFp32Layer
This layer converts data type Float 16 to Float 32.
Definition: ConvertFp16ToFp32Layer.hpp:14

armnn::ConvertBf16ToFp32Layer
This layer converts data type BFloat16 to Float32.
Definition: ConvertBf16ToFp32Layer.hpp:14

armnn::LayerType::FullyConnected

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6

armnn::Layer::SetBackendId
void SetBackendId(const BackendId &id)
Definition: Layer.hpp:278

armnn::InputSlot::GetConnection
const IOutputSlot * GetConnection() const override
Definition: Layer.hpp:206

armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322

BackendRegistry.hpp

armnn::Layer::BeginInputSlots
std::vector< InputSlot >::iterator BeginInputSlots()
Definition: Layer.hpp:249

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:324

armnn::InsertDebugLayerAfter
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer, bool toFile)
Definition: NetworkUtils.cpp:245

armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:198

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327

armnn::RevertConstantWeightsToFP32
bool RevertConstantWeightsToFP32(Layer *layer)
Definition: NetworkUtils.cpp:277

armnn::ScopedTensorHandle
Definition: TensorHandle.hpp:115

armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:227

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273

armnn::InsertConvertBf16ToFp32LayersBefore
std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
Definition: NetworkUtils.cpp:53

NetworkUtils.hpp

armnn::LayerType::Convolution2d

armnn::DataType::Float16

armnn::OutputSlot
Definition: Layer.hpp:87

armnn::IOutputSlot::GetOwningIConnectableLayer
virtual const IConnectableLayer & GetOwningIConnectableLayer() const =0

armnn::DebugLayer
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

armnn::DataType::BFloat16

armnn::TensorInfo::SetDataType
void SetDataType(DataType type)
Definition: Tensor.hpp:199

armnn::InsertConvertFp32ToBf16LayersAfter
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter(Graph &graph, Layer &layer)
Definition: NetworkUtils.cpp:179

armnn::Graph
Definition: Graph.hpp:30

armnn::ConvertFp32ToFp16Layer
This layer converts data type Float 32 to Float 16.
Definition: ConvertFp32ToFp16Layer.hpp:13

armnn::LayerType::DepthwiseConvolution2d

armnn::Layer::BeginOutputSlots
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:253

Exceptions.hpp

armnn::InsertConvertFp32ToBf16LayersBefore
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
Definition: NetworkUtils.cpp:92

armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87

armnn::IConnectableLayer::GetInputSlot
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.

armnn::TensorInfo::SetConstant
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514

armnn::Layer::EndOutputSlots
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:254

armnn::IInputSlot::GetConnection
virtual const IOutputSlot * GetConnection() const =0

armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:326

BFloat16.hpp

armnn::DataType::Float32

armnn::IOutputSlot::GetTensorInfo
virtual const TensorInfo & GetTensorInfo() const =0

armnn::IConnectableLayer::GetOutputSlot
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.

armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:319

armnn::ConvertFp32ToBf16Layer
This layer converts data type Float32 to BFloat16.
Definition: ConvertFp32ToBf16Layer.hpp:14

armnn::Graph::InsertNewLayer
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
Definition: Graph.hpp:471

SubgraphViewSelector.hpp

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92

armnn::Layer
Definition: Layer.hpp:217