ArmNN
 22.11
FuseConvertFp32ToBf16IntoConstLayers.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "Optimization.hpp"
9 #include <armnnUtils/Permute.hpp>
10 #include <ResolveType.hpp>
11 
12 namespace armnn
13 {
14 namespace optimizations
15 {
16 
18 {
19 public:
20  void Run(Graph& graph, InputSlot& connection) const
21  {
22  Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
23  Layer& child = connection.GetOwningLayer();
24 
27 
28  auto dataType = base.GetDataType();
29  switch (dataType)
30  {
31  case DataType::Float32:
32  ReplaceConvertFp32ToBf16Layer<DataType::BFloat16>(
33  graph,
34  PolymorphicDowncast<ConstantLayer*>(&base),
35  PolymorphicDowncast<ConvertFp32ToBf16Layer*>(&child));
36  break;
37  default:
39  std::string(" Constant Layer cannot be fused into ") +
40  GetDataTypeName(child.GetDataType()) +
41  std::string(" conversion layer."));
42  }
43  }
44 protected:
47 private:
48  template<armnn::DataType ArmnnType,
49  typename T = armnn::ResolveType<ArmnnType>>
50  static void ReplaceConvertFp32ToBf16Layer(Graph& graph,
51  ConstantLayer* constantLayer,
52  ConvertFp32ToBf16Layer* convertFp32ToBf16layer)
53  {
54  IgnoreUnused(graph);
55  /**
56  * This optimisation is to find situations where a constant set of inputs is being provided to a
57  * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on
58  * every inference, instead we want to Convert them once and store them in a Const layer to be
59  * used everytime as they will not change.
60  */
61  TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo();
62  std::vector<T> newValues(outputConvertFp32ToBf16Info.GetNumElements());
63 
65  constantLayer->m_LayerOutput->GetConstTensor<float>(),
66  outputConvertFp32ToBf16Info.GetNumElements(),
67  newValues.data());
68  TensorInfo newInfo = outputConvertFp32ToBf16Info;
69  newInfo.SetConstant(true);
70  ConstTensor newInput(newInfo, newValues);
71 
72  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
73 
74  // Moves connections in convertFp32ToBf16layer output slot to the constant layer.
75  // ConvertFp32ToBf16layer layer will be removed if left unconnected.
76  convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
77 
78  // Updating the output tensor
79  constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
80  ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
81  }
82 };
83 
87 
88 } // namespace optimizations
89 } // namespace armnn
A layer that the constant data can be bound to.
bool IsConstant() const
Definition: Tensor.cpp:509
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Layer & GetOwningLayer() const
Definition: Layer.hpp:119
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:202
DataType
Definition: Types.hpp:48
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
static void ConvertFloat32ToBFloat16(const float *srcFloat32Buffer, size_t numElements, void *dstBFloat16Buffer)
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
DataType GetDataType() const
Definition: Layer.cpp:313
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:326
This layer converts data type Float32 to BFloat16.
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:145
unsigned int GetNumElements() const
Definition: Tensor.hpp:196