ArmNN
 22.05.01
ConvertConstDequantisationLayersToConstLayers.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Optimization.hpp"
8 #include "NetworkUtils.hpp"
9 
10 namespace armnn
11 {
12 namespace optimizations
13 {
14 
16 {
17 public:
18  void Run(Graph& graph, InputSlot& connection) const
19  {
20  Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
21  Layer& child = connection.GetOwningLayer();
22 
25 
26  ReplaceConstDequantisationLayer(graph,
27  PolymorphicDowncast<ConstantLayer*>(&base),
28  PolymorphicDowncast<DequantizeLayer*>(&child));
29 
30  }
31 protected:
34 private:
35 
36  static void ReplaceConstDequantisationLayer(Graph& graph,
37  ConstantLayer* constantLayer,
38  DequantizeLayer* dequantizeLayer)
39  {
40  IgnoreUnused(graph);
41  /**
42  * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
43  * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
44  * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
45  */
46  TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
47  TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
48  TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
49 
50  ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
51  auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
52 
53  std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
54  if (constantInfo.GetDataType() == DataType::Float16 &&
55  inputDequantizeInfo.GetDataType() == DataType::Float16 &&
56  outputDequantizeInfo.GetDataType() == DataType::Float32)
57  {
59  outputDequantizeInfo.GetNumElements(),
60  newValues.data());
61  }
62  else if (constantInfo.GetDataType() == DataType::QAsymmS8 &&
63  inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 &&
64  outputDequantizeInfo.GetDataType() == DataType::Float32)
65  {
66  ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
67  outputDequantizeInfo.GetNumElements(),
68  newValues.data());
69  }
70 
71  TensorInfo newInfo = outputDequantizeInfo;
72  newInfo.SetConstant(true);
73  ConstTensor newInput(newInfo, newValues);
74  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
75 
76  // Moves connections in dequantize output to the constant layer.
77  // Dequantize layer will be removed if left unconnected.
78  dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
79 
80  // Updating the output tensor
81  constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
82  ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
83 
84  // Set isConstant to true in all input tensor infos where constantLayer is now connected to
85  for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
86  {
87  auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
89  info.SetConstant();
90  constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
92  }
93  }
94 
95 
96 static void ConvertInt8To32(const void* srcInt8Buffer,
97  size_t numElements,
98  float* dstFloat32Buffer)
99 {
100  ARMNN_ASSERT(srcInt8Buffer != nullptr);
101  ARMNN_ASSERT(dstFloat32Buffer != nullptr);
102 
103  const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
104 
105  for (size_t i = 0; i < numElements; ++i)
106  {
107  dstFloat32Buffer[i] = pInt8[i];
108  }
109 }
110 
111 };
112 
117 
118 } // namespace optimizations
119 } // namespace armnn
A layer that the constant data can be bound to.
bool IsConstant() const
Definition: Tensor.cpp:509
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Layer & GetOwningLayer() const
Definition: Layer.hpp:118
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:320
unsigned int GetNumConnections() const override
Definition: Layer.hpp:143
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322
This layer dequantizes the input tensor.
DataType GetDataType() const
Definition: Tensor.hpp:198
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:324
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:145
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:75
unsigned int GetNumElements() const
Definition: Tensor.hpp:196